diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2012-03-06 02:27:59 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2012-03-08 21:25:06 -0500 |
commit | 7230c5644188cd9e3fb380cc97dde00c464a3ba7 (patch) | |
tree | 8e71a0a2e8167e21b46e96165b7dd53fa7e7b7f2 /arch/powerpc/kernel/irq.c | |
parent | d9ada91ae2969ae6b6dc3574fd08a6ebda5df766 (diff) |
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
Diffstat (limited to 'arch/powerpc/kernel/irq.c')
-rw-r--r-- | arch/powerpc/kernel/irq.c | 204 |
1 files changed, 146 insertions, 58 deletions
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9b6e80668cfb..eb804e15b29b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -95,14 +95,14 @@ extern int tau_interrupts(int); | |||
95 | 95 | ||
96 | int distribute_irqs = 1; | 96 | int distribute_irqs = 1; |
97 | 97 | ||
98 | static inline notrace unsigned long get_hard_enabled(void) | 98 | static inline notrace unsigned long get_irq_happened(void) |
99 | { | 99 | { |
100 | unsigned long enabled; | 100 | unsigned long happened; |
101 | 101 | ||
102 | __asm__ __volatile__("lbz %0,%1(13)" | 102 | __asm__ __volatile__("lbz %0,%1(13)" |
103 | : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); | 103 | : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); |
104 | 104 | ||
105 | return enabled; | 105 | return happened; |
106 | } | 106 | } |
107 | 107 | ||
108 | static inline notrace void set_soft_enabled(unsigned long enable) | 108 | static inline notrace void set_soft_enabled(unsigned long enable) |
@@ -111,88 +111,167 @@ static inline notrace void set_soft_enabled(unsigned long enable) | |||
111 | : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); | 111 | : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); |
112 | } | 112 | } |
113 | 113 | ||
114 | static inline notrace void decrementer_check_overflow(void) | 114 | static inline notrace int decrementer_check_overflow(void) |
115 | { | 115 | { |
116 | u64 now = get_tb_or_rtc(); | 116 | u64 now = get_tb_or_rtc(); |
117 | u64 *next_tb; | 117 | u64 *next_tb = &__get_cpu_var(decrementers_next_tb); |
118 | 118 | ||
119 | preempt_disable(); | ||
120 | next_tb = &__get_cpu_var(decrementers_next_tb); | ||
121 | |||
122 | if (now >= *next_tb) | 119 | if (now >= *next_tb) |
123 | set_dec(1); | 120 | set_dec(1); |
124 | preempt_enable(); | 121 | return now >= *next_tb; |
125 | } | 122 | } |
126 | 123 | ||
127 | notrace void arch_local_irq_restore(unsigned long en) | 124 | /* This is called whenever we are re-enabling interrupts |
125 | * and returns either 0 (nothing to do) or 500/900 if there's | ||
126 | * either an EE or a DEC to generate. | ||
127 | * | ||
128 | * This is called in two contexts: From arch_local_irq_restore() | ||
129 | * before soft-enabling interrupts, and from the exception exit | ||
130 | * path when returning from an interrupt from a soft-disabled to | ||
131 | * a soft enabled context. In both case we have interrupts hard | ||
132 | * disabled. | ||
133 | * | ||
134 | * We take care of only clearing the bits we handled in the | ||
135 | * PACA irq_happened field since we can only re-emit one at a | ||
136 | * time and we don't want to "lose" one. | ||
137 | */ | ||
138 | notrace unsigned int __check_irq_replay(void) | ||
128 | { | 139 | { |
129 | /* | 140 | /* |
130 | * get_paca()->soft_enabled = en; | 141 | * We use local_paca rather than get_paca() to avoid all |
131 | * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? | 142 | * the debug_smp_processor_id() business in this low level |
132 | * That was allowed before, and in such a case we do need to take care | 143 | * function |
133 | * that gcc will set soft_enabled directly via r13, not choose to use | ||
134 | * an intermediate register, lest we're preempted to a different cpu. | ||
135 | */ | 144 | */ |
136 | set_soft_enabled(en); | 145 | unsigned char happened = local_paca->irq_happened; |
137 | if (!en) | ||
138 | return; | ||
139 | 146 | ||
140 | #ifdef CONFIG_PPC_STD_MMU_64 | 147 | /* Clear bit 0 which we wouldn't clear otherwise */ |
141 | if (firmware_has_feature(FW_FEATURE_ISERIES)) { | 148 | local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; |
142 | /* | 149 | |
143 | * Do we need to disable preemption here? Not really: in the | 150 | /* |
144 | * unlikely event that we're preempted to a different cpu in | 151 | * Force the delivery of pending soft-disabled interrupts on PS3. |
145 | * between getting r13, loading its lppaca_ptr, and loading | 152 | * Any HV call will have this side effect. |
146 | * its any_int, we might call iseries_handle_interrupts without | 153 | */ |
147 | * an interrupt pending on the new cpu, but that's no disaster, | 154 | if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { |
148 | * is it? And the business of preempting us off the old cpu | 155 | u64 tmp, tmp2; |
149 | * would itself involve a local_irq_restore which handles the | 156 | lv1_get_version_info(&tmp, &tmp2); |
150 | * interrupt to that cpu. | ||
151 | * | ||
152 | * But use "local_paca->lppaca_ptr" instead of "get_lppaca()" | ||
153 | * to avoid any preemption checking added into get_paca(). | ||
154 | */ | ||
155 | if (local_paca->lppaca_ptr->int_dword.any_int) | ||
156 | iseries_handle_interrupts(); | ||
157 | } | 157 | } |
158 | #endif /* CONFIG_PPC_STD_MMU_64 */ | ||
159 | 158 | ||
160 | /* | 159 | /* |
161 | * if (get_paca()->hard_enabled) return; | 160 | * We may have missed a decrementer interrupt. We check the |
162 | * But again we need to take care that gcc gets hard_enabled directly | 161 | * decrementer itself rather than the paca irq_happened field |
163 | * via r13, not choose to use an intermediate register, lest we're | 162 | * in case we also had a rollover while hard disabled |
164 | * preempted to a different cpu in between the two instructions. | 163 | */ |
164 | local_paca->irq_happened &= ~PACA_IRQ_DEC; | ||
165 | if (decrementer_check_overflow()) | ||
166 | return 0x900; | ||
167 | |||
168 | /* Finally check if an external interrupt happened */ | ||
169 | local_paca->irq_happened &= ~PACA_IRQ_EE; | ||
170 | if (happened & PACA_IRQ_EE) | ||
171 | return 0x500; | ||
172 | |||
173 | #ifdef CONFIG_PPC_BOOK3E | ||
174 | /* Finally check if an EPR external interrupt happened | ||
175 | * this bit is typically set if we need to handle another | ||
176 | * "edge" interrupt from within the MPIC "EPR" handler | ||
177 | */ | ||
178 | local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; | ||
179 | if (happened & PACA_IRQ_EE_EDGE) | ||
180 | return 0x500; | ||
181 | |||
182 | local_paca->irq_happened &= ~PACA_IRQ_DBELL; | ||
183 | if (happened & PACA_IRQ_DBELL) | ||
184 | return 0x280; | ||
185 | #endif /* CONFIG_PPC_BOOK3E */ | ||
186 | |||
187 | /* There should be nothing left ! */ | ||
188 | BUG_ON(local_paca->irq_happened != 0); | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | notrace void arch_local_irq_restore(unsigned long en) | ||
194 | { | ||
195 | unsigned char irq_happened; | ||
196 | unsigned int replay; | ||
197 | |||
198 | /* Write the new soft-enabled value */ | ||
199 | set_soft_enabled(en); | ||
200 | if (!en) | ||
201 | return; | ||
202 | /* | ||
203 | * From this point onward, we can take interrupts, preempt, | ||
204 | * etc... unless we got hard-disabled. We check if an event | ||
205 | * happened. If none happened, we know we can just return. | ||
206 | * | ||
207 | * We may have preempted before the check below, in which case | ||
208 | * we are checking the "new" CPU instead of the old one. This | ||
209 | * is only a problem if an event happened on the "old" CPU. | ||
210 | * | ||
211 | * External interrupt events on non-iseries will have caused | ||
212 | * interrupts to be hard-disabled, so there is no problem, we | ||
213 | * cannot have preempted. | ||
214 | * | ||
215 | * That leaves us with EEs on iSeries or decrementer interrupts, | ||
216 | * which I decided to safely ignore. The preemption would have | ||
217 | * itself been the result of an interrupt, upon which return we | ||
218 | * will have checked for pending events on the old CPU. | ||
165 | */ | 219 | */ |
166 | if (get_hard_enabled()) | 220 | irq_happened = get_irq_happened(); |
221 | if (!irq_happened) | ||
167 | return; | 222 | return; |
168 | 223 | ||
169 | /* | 224 | /* |
170 | * Need to hard-enable interrupts here. Since currently disabled, | 225 | * We need to hard disable to get a trusted value from |
171 | * no need to take further asm precautions against preemption; but | 226 | * __check_irq_replay(). We also need to soft-disable |
172 | * use local_paca instead of get_paca() to avoid preemption checking. | 227 | * again to avoid warnings in there due to the use of |
228 | * per-cpu variables. | ||
229 | * | ||
230 | * We know that if the value in irq_happened is exactly 0x01 | ||
231 | * then we are already hard disabled (there are other less | ||
232 | * common cases that we'll ignore for now), so we skip the | ||
233 | * (expensive) mtmsrd. | ||
173 | */ | 234 | */ |
174 | local_paca->hard_enabled = en; | 235 | if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) |
236 | __hard_irq_disable(); | ||
237 | set_soft_enabled(0); | ||
175 | 238 | ||
176 | /* | 239 | /* |
177 | * Trigger the decrementer if we have a pending event. Some processors | 240 | * Check if anything needs to be re-emitted. We haven't |
178 | * only trigger on edge transitions of the sign bit. We might also | 241 | * soft-enabled yet to avoid warnings in decrementer_check_overflow |
179 | * have disabled interrupts long enough that the decrementer wrapped | 242 | * accessing per-cpu variables |
180 | * to positive. | ||
181 | */ | 243 | */ |
182 | decrementer_check_overflow(); | 244 | replay = __check_irq_replay(); |
245 | |||
246 | /* We can soft-enable now */ | ||
247 | set_soft_enabled(1); | ||
183 | 248 | ||
184 | /* | 249 | /* |
185 | * Force the delivery of pending soft-disabled interrupts on PS3. | 250 | * And replay if we have to. This will return with interrupts |
186 | * Any HV call will have this side effect. | 251 | * hard-enabled. |
187 | */ | 252 | */ |
188 | if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { | 253 | if (replay) { |
189 | u64 tmp, tmp2; | 254 | __replay_interrupt(replay); |
190 | lv1_get_version_info(&tmp, &tmp2); | 255 | return; |
191 | } | 256 | } |
192 | 257 | ||
258 | /* Finally, let's ensure we are hard enabled */ | ||
193 | __hard_irq_enable(); | 259 | __hard_irq_enable(); |
194 | } | 260 | } |
195 | EXPORT_SYMBOL(arch_local_irq_restore); | 261 | EXPORT_SYMBOL(arch_local_irq_restore); |
262 | |||
263 | /* | ||
264 | * This is specifically called by assembly code to re-enable interrupts | ||
265 | * if they are currently disabled. This is typically called before | ||
266 | * schedule() or do_signal() when returning to userspace. We do it | ||
267 | * in C to avoid the burden of dealing with lockdep etc... | ||
268 | */ | ||
269 | void restore_interrupts(void) | ||
270 | { | ||
271 | if (irqs_disabled()) | ||
272 | local_irq_enable(); | ||
273 | } | ||
274 | |||
196 | #endif /* CONFIG_PPC64 */ | 275 | #endif /* CONFIG_PPC64 */ |
197 | 276 | ||
198 | int arch_show_interrupts(struct seq_file *p, int prec) | 277 | int arch_show_interrupts(struct seq_file *p, int prec) |
@@ -360,8 +439,17 @@ void do_IRQ(struct pt_regs *regs) | |||
360 | 439 | ||
361 | check_stack_overflow(); | 440 | check_stack_overflow(); |
362 | 441 | ||
442 | /* | ||
443 | * Query the platform PIC for the interrupt & ack it. | ||
444 | * | ||
445 | * This will typically lower the interrupt line to the CPU | ||
446 | */ | ||
363 | irq = ppc_md.get_irq(); | 447 | irq = ppc_md.get_irq(); |
364 | 448 | ||
449 | /* We can hard enable interrupts now */ | ||
450 | may_hard_irq_enable(); | ||
451 | |||
452 | /* And finally process it */ | ||
365 | if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) | 453 | if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) |
366 | handle_one_irq(irq); | 454 | handle_one_irq(irq); |
367 | else if (irq != NO_IRQ_IGNORE) | 455 | else if (irq != NO_IRQ_IGNORE) |