aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/irq.c
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-06 02:27:59 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-08 21:25:06 -0500
commit7230c5644188cd9e3fb380cc97dde00c464a3ba7 (patch)
tree8e71a0a2e8167e21b46e96165b7dd53fa7e7b7f2 /arch/powerpc/kernel/irq.c
parentd9ada91ae2969ae6b6dc3574fd08a6ebda5df766 (diff)
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some issues that this tries to address. We don't do the various workarounds we need to do when re-enabling interrupts in some cases such as when returning from an interrupt and thus we may still lose or get delayed decrementer or doorbell interrupts. The current scheme also makes it much harder to handle the external "edge" interrupts provided by some BookE processors when using the EPR facility (External Proxy) and the Freescale Hypervisor. Additionally, we tend to keep interrupts hard disabled in a number of cases, such as decrementer interrupts, external interrupts, or when a masked decrementer interrupt is pending. This is sub-optimal. This is an attempt at fixing it all in one go by reworking the way we do the lazy interrupt disabling from the ground up. The base idea is to replace the "hard_enabled" field with a "irq_happened" field in which we store a bit mask of what interrupt occurred while soft-disabled. When re-enabling, either via arch_local_irq_restore() or when returning from an interrupt, we can now decide what to do by testing bits in that field. We then implement replaying of the missed interrupts either by re-using the existing exception frame (in exception exit case) or via the creation of a new one from an assembly trampoline (in the arch_local_irq_enable case). This removes the need to play with the decrementer to try to create fake interrupts, among others. In addition, this adds a few refinements: - We no longer hard disable decrementer interrupts that occur while soft-disabled. We now simply bump the decrementer back to max (on BookS) or leave it stopped (on BookE) and continue with hard interrupts enabled, which means that we'll potentially get better sample quality from performance monitor interrupts. - Timer, decrementer and doorbell interrupts now hard-enable shortly after removing the source of the interrupt, which means they no longer run entirely hard disabled. Again, this will improve perf sample quality. - On Book3E 64-bit, we now make the performance monitor interrupt act as an NMI like Book3S (the necessary C code for that to work appear to already be present in the FSL perf code, notably calling nmi_enter instead of irq_enter). (This also fixes a bug where BookE perfmon interrupts could clobber r14 ... oops) - We could make "masked" decrementer interrupts act as NMIs when doing timer-based perf sampling to improve the sample quality. Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- v2: - Add hard-enable to decrementer, timer and doorbells - Fix CR clobber in masked irq handling on BookE - Make embedded perf interrupt act as an NMI - Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want to retrigger an interrupt without preventing hard-enable v3: - Fix or vs. ori bug on Book3E - Fix enabling of interrupts for some exceptions on Book3E v4: - Fix resend of doorbells on return from interrupt on Book3E v5: - Rebased on top of my latest series, which involves some significant rework of some aspects of the patch. v6: - 32-bit compile fix - more compile fixes with various .config combos - factor out the asm code to soft-disable interrupts - remove the C wrapper around preempt_schedule_irq v7: - Fix a bug with hard irq state tracking on native power7
Diffstat (limited to 'arch/powerpc/kernel/irq.c')
-rw-r--r--arch/powerpc/kernel/irq.c204
1 files changed, 146 insertions, 58 deletions
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9b6e80668cfb..eb804e15b29b 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -95,14 +95,14 @@ extern int tau_interrupts(int);
95 95
96int distribute_irqs = 1; 96int distribute_irqs = 1;
97 97
98static inline notrace unsigned long get_hard_enabled(void) 98static inline notrace unsigned long get_irq_happened(void)
99{ 99{
100 unsigned long enabled; 100 unsigned long happened;
101 101
102 __asm__ __volatile__("lbz %0,%1(13)" 102 __asm__ __volatile__("lbz %0,%1(13)"
103 : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); 103 : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
104 104
105 return enabled; 105 return happened;
106} 106}
107 107
108static inline notrace void set_soft_enabled(unsigned long enable) 108static inline notrace void set_soft_enabled(unsigned long enable)
@@ -111,88 +111,167 @@ static inline notrace void set_soft_enabled(unsigned long enable)
111 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); 111 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
112} 112}
113 113
114static inline notrace void decrementer_check_overflow(void) 114static inline notrace int decrementer_check_overflow(void)
115{ 115{
116 u64 now = get_tb_or_rtc(); 116 u64 now = get_tb_or_rtc();
117 u64 *next_tb; 117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
118 118
119 preempt_disable();
120 next_tb = &__get_cpu_var(decrementers_next_tb);
121
122 if (now >= *next_tb) 119 if (now >= *next_tb)
123 set_dec(1); 120 set_dec(1);
124 preempt_enable(); 121 return now >= *next_tb;
125} 122}
126 123
127notrace void arch_local_irq_restore(unsigned long en) 124/* This is called whenever we are re-enabling interrupts
125 * and returns either 0 (nothing to do) or 500/900 if there's
126 * either an EE or a DEC to generate.
127 *
128 * This is called in two contexts: From arch_local_irq_restore()
129 * before soft-enabling interrupts, and from the exception exit
130 * path when returning from an interrupt from a soft-disabled to
131 * a soft enabled context. In both case we have interrupts hard
132 * disabled.
133 *
134 * We take care of only clearing the bits we handled in the
135 * PACA irq_happened field since we can only re-emit one at a
136 * time and we don't want to "lose" one.
137 */
138notrace unsigned int __check_irq_replay(void)
128{ 139{
129 /* 140 /*
130 * get_paca()->soft_enabled = en; 141 * We use local_paca rather than get_paca() to avoid all
131 * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? 142 * the debug_smp_processor_id() business in this low level
132 * That was allowed before, and in such a case we do need to take care 143 * function
133 * that gcc will set soft_enabled directly via r13, not choose to use
134 * an intermediate register, lest we're preempted to a different cpu.
135 */ 144 */
136 set_soft_enabled(en); 145 unsigned char happened = local_paca->irq_happened;
137 if (!en)
138 return;
139 146
140#ifdef CONFIG_PPC_STD_MMU_64 147 /* Clear bit 0 which we wouldn't clear otherwise */
141 if (firmware_has_feature(FW_FEATURE_ISERIES)) { 148 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
142 /* 149
143 * Do we need to disable preemption here? Not really: in the 150 /*
144 * unlikely event that we're preempted to a different cpu in 151 * Force the delivery of pending soft-disabled interrupts on PS3.
145 * between getting r13, loading its lppaca_ptr, and loading 152 * Any HV call will have this side effect.
146 * its any_int, we might call iseries_handle_interrupts without 153 */
147 * an interrupt pending on the new cpu, but that's no disaster, 154 if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
148 * is it? And the business of preempting us off the old cpu 155 u64 tmp, tmp2;
149 * would itself involve a local_irq_restore which handles the 156 lv1_get_version_info(&tmp, &tmp2);
150 * interrupt to that cpu.
151 *
152 * But use "local_paca->lppaca_ptr" instead of "get_lppaca()"
153 * to avoid any preemption checking added into get_paca().
154 */
155 if (local_paca->lppaca_ptr->int_dword.any_int)
156 iseries_handle_interrupts();
157 } 157 }
158#endif /* CONFIG_PPC_STD_MMU_64 */
159 158
160 /* 159 /*
161 * if (get_paca()->hard_enabled) return; 160 * We may have missed a decrementer interrupt. We check the
162 * But again we need to take care that gcc gets hard_enabled directly 161 * decrementer itself rather than the paca irq_happened field
163 * via r13, not choose to use an intermediate register, lest we're 162 * in case we also had a rollover while hard disabled
164 * preempted to a different cpu in between the two instructions. 163 */
164 local_paca->irq_happened &= ~PACA_IRQ_DEC;
165 if (decrementer_check_overflow())
166 return 0x900;
167
168 /* Finally check if an external interrupt happened */
169 local_paca->irq_happened &= ~PACA_IRQ_EE;
170 if (happened & PACA_IRQ_EE)
171 return 0x500;
172
173#ifdef CONFIG_PPC_BOOK3E
174 /* Finally check if an EPR external interrupt happened
175 * this bit is typically set if we need to handle another
176 * "edge" interrupt from within the MPIC "EPR" handler
177 */
178 local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
179 if (happened & PACA_IRQ_EE_EDGE)
180 return 0x500;
181
182 local_paca->irq_happened &= ~PACA_IRQ_DBELL;
183 if (happened & PACA_IRQ_DBELL)
184 return 0x280;
185#endif /* CONFIG_PPC_BOOK3E */
186
187 /* There should be nothing left ! */
188 BUG_ON(local_paca->irq_happened != 0);
189
190 return 0;
191}
192
193notrace void arch_local_irq_restore(unsigned long en)
194{
195 unsigned char irq_happened;
196 unsigned int replay;
197
198 /* Write the new soft-enabled value */
199 set_soft_enabled(en);
200 if (!en)
201 return;
202 /*
203 * From this point onward, we can take interrupts, preempt,
204 * etc... unless we got hard-disabled. We check if an event
205 * happened. If none happened, we know we can just return.
206 *
207 * We may have preempted before the check below, in which case
208 * we are checking the "new" CPU instead of the old one. This
209 * is only a problem if an event happened on the "old" CPU.
210 *
211 * External interrupt events on non-iseries will have caused
212 * interrupts to be hard-disabled, so there is no problem, we
213 * cannot have preempted.
214 *
215 * That leaves us with EEs on iSeries or decrementer interrupts,
216 * which I decided to safely ignore. The preemption would have
217 * itself been the result of an interrupt, upon which return we
218 * will have checked for pending events on the old CPU.
165 */ 219 */
166 if (get_hard_enabled()) 220 irq_happened = get_irq_happened();
221 if (!irq_happened)
167 return; 222 return;
168 223
169 /* 224 /*
170 * Need to hard-enable interrupts here. Since currently disabled, 225 * We need to hard disable to get a trusted value from
171 * no need to take further asm precautions against preemption; but 226 * __check_irq_replay(). We also need to soft-disable
172 * use local_paca instead of get_paca() to avoid preemption checking. 227 * again to avoid warnings in there due to the use of
228 * per-cpu variables.
229 *
230 * We know that if the value in irq_happened is exactly 0x01
231 * then we are already hard disabled (there are other less
232 * common cases that we'll ignore for now), so we skip the
233 * (expensive) mtmsrd.
173 */ 234 */
174 local_paca->hard_enabled = en; 235 if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
236 __hard_irq_disable();
237 set_soft_enabled(0);
175 238
176 /* 239 /*
177 * Trigger the decrementer if we have a pending event. Some processors 240 * Check if anything needs to be re-emitted. We haven't
178 * only trigger on edge transitions of the sign bit. We might also 241 * soft-enabled yet to avoid warnings in decrementer_check_overflow
179 * have disabled interrupts long enough that the decrementer wrapped 242 * accessing per-cpu variables
180 * to positive.
181 */ 243 */
182 decrementer_check_overflow(); 244 replay = __check_irq_replay();
245
246 /* We can soft-enable now */
247 set_soft_enabled(1);
183 248
184 /* 249 /*
185 * Force the delivery of pending soft-disabled interrupts on PS3. 250 * And replay if we have to. This will return with interrupts
186 * Any HV call will have this side effect. 251 * hard-enabled.
187 */ 252 */
188 if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { 253 if (replay) {
189 u64 tmp, tmp2; 254 __replay_interrupt(replay);
190 lv1_get_version_info(&tmp, &tmp2); 255 return;
191 } 256 }
192 257
258 /* Finally, let's ensure we are hard enabled */
193 __hard_irq_enable(); 259 __hard_irq_enable();
194} 260}
195EXPORT_SYMBOL(arch_local_irq_restore); 261EXPORT_SYMBOL(arch_local_irq_restore);
262
263/*
264 * This is specifically called by assembly code to re-enable interrupts
265 * if they are currently disabled. This is typically called before
266 * schedule() or do_signal() when returning to userspace. We do it
267 * in C to avoid the burden of dealing with lockdep etc...
268 */
269void restore_interrupts(void)
270{
271 if (irqs_disabled())
272 local_irq_enable();
273}
274
196#endif /* CONFIG_PPC64 */ 275#endif /* CONFIG_PPC64 */
197 276
198int arch_show_interrupts(struct seq_file *p, int prec) 277int arch_show_interrupts(struct seq_file *p, int prec)
@@ -360,8 +439,17 @@ void do_IRQ(struct pt_regs *regs)
360 439
361 check_stack_overflow(); 440 check_stack_overflow();
362 441
442 /*
443 * Query the platform PIC for the interrupt & ack it.
444 *
445 * This will typically lower the interrupt line to the CPU
446 */
363 irq = ppc_md.get_irq(); 447 irq = ppc_md.get_irq();
364 448
449 /* We can hard enable interrupts now */
450 may_hard_irq_enable();
451
452 /* And finally process it */
365 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) 453 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE)
366 handle_one_irq(irq); 454 handle_one_irq(irq);
367 else if (irq != NO_IRQ_IGNORE) 455 else if (irq != NO_IRQ_IGNORE)