diff options
author | Paul Mackerras <paulus@samba.org> | 2010-04-13 16:46:04 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-05-12 00:34:00 -0400 |
commit | 0fe1ac48bef018bed896307cd12f6ca9b5e704ab (patch) | |
tree | 6f5e68619798312ee808f23c1a0cc5799a131545 | |
parent | cea0d767c29669bf89f86e4aee46ef462d2ebae8 (diff) |
powerpc/perf_event: Fix oops due to perf_event_do_pending call
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts
should be hard-disabled at this point but they were enabled. Because
the interrupt was not recoverable the system panicked.
The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.
The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1. This means we can remove the tests in the exception
exit path and raw_local_irq_restore.
This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit. (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/include/asm/hw_irq.h | 38 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 9 | ||||
-rw-r--r-- | arch/powerpc/kernel/irq.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 60 |
5 files changed, 48 insertions, 66 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 9f4c9d4f5803..bd100fcf40d0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
130 | */ | 130 | */ |
131 | struct irq_chip; | 131 | struct irq_chip; |
132 | 132 | ||
133 | #ifdef CONFIG_PERF_EVENTS | ||
134 | |||
135 | #ifdef CONFIG_PPC64 | ||
136 | static inline unsigned long test_perf_event_pending(void) | ||
137 | { | ||
138 | unsigned long x; | ||
139 | |||
140 | asm volatile("lbz %0,%1(13)" | ||
141 | : "=r" (x) | ||
142 | : "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
143 | return x; | ||
144 | } | ||
145 | |||
146 | static inline void set_perf_event_pending(void) | ||
147 | { | ||
148 | asm volatile("stb %0,%1(13)" : : | ||
149 | "r" (1), | ||
150 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
151 | } | ||
152 | |||
153 | static inline void clear_perf_event_pending(void) | ||
154 | { | ||
155 | asm volatile("stb %0,%1(13)" : : | ||
156 | "r" (0), | ||
157 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
158 | } | ||
159 | #endif /* CONFIG_PPC64 */ | ||
160 | |||
161 | #else /* CONFIG_PERF_EVENTS */ | ||
162 | |||
163 | static inline unsigned long test_perf_event_pending(void) | ||
164 | { | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static inline void clear_perf_event_pending(void) {} | ||
169 | #endif /* CONFIG_PERF_EVENTS */ | ||
170 | |||
171 | #endif /* __KERNEL__ */ | 133 | #endif /* __KERNEL__ */ |
172 | #endif /* _ASM_POWERPC_HW_IRQ_H */ | 134 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..c09138d150d4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -133,7 +133,6 @@ int main(void) | |||
133 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 133 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
134 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 134 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
135 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 135 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
136 | DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); | ||
137 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
138 | #ifdef CONFIG_PPC_MM_SLICES | 137 | #ifdef CONFIG_PPC_MM_SLICES |
139 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, | 138 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 07109d843787..42e9d908914a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
556 | 2: | 556 | 2: |
557 | TRACE_AND_RESTORE_IRQ(r5); | 557 | TRACE_AND_RESTORE_IRQ(r5); |
558 | 558 | ||
559 | #ifdef CONFIG_PERF_EVENTS | ||
560 | /* check paca->perf_event_pending if we're enabling ints */ | ||
561 | lbz r3,PACAPERFPEND(r13) | ||
562 | and. r3,r3,r5 | ||
563 | beq 27f | ||
564 | bl .perf_event_do_pending | ||
565 | 27: | ||
566 | #endif /* CONFIG_PERF_EVENTS */ | ||
567 | |||
568 | /* extract EE bit and use it to restore paca->hard_enabled */ | 559 | /* extract EE bit and use it to restore paca->hard_enabled */ |
569 | ld r3,_MSR(r1) | 560 | ld r3,_MSR(r1) |
570 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 561 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f2031c22..066bd31551d5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -53,7 +53,6 @@ | |||
53 | #include <linux/bootmem.h> | 53 | #include <linux/bootmem.h> |
54 | #include <linux/pci.h> | 54 | #include <linux/pci.h> |
55 | #include <linux/debugfs.h> | 55 | #include <linux/debugfs.h> |
56 | #include <linux/perf_event.h> | ||
57 | 56 | ||
58 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
59 | #include <asm/system.h> | 58 | #include <asm/system.h> |
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) | |||
145 | } | 144 | } |
146 | #endif /* CONFIG_PPC_STD_MMU_64 */ | 145 | #endif /* CONFIG_PPC_STD_MMU_64 */ |
147 | 146 | ||
148 | if (test_perf_event_pending()) { | ||
149 | clear_perf_event_pending(); | ||
150 | perf_event_do_pending(); | ||
151 | } | ||
152 | |||
153 | /* | 147 | /* |
154 | * if (get_paca()->hard_enabled) return; | 148 | * if (get_paca()->hard_enabled) return; |
155 | * But again we need to take care that gcc gets hard_enabled directly | 149 | * But again we need to take care that gcc gets hard_enabled directly |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a3e49a..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) | |||
532 | } | 532 | } |
533 | #endif /* CONFIG_PPC_ISERIES */ | 533 | #endif /* CONFIG_PPC_ISERIES */ |
534 | 534 | ||
535 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) | 535 | #ifdef CONFIG_PERF_EVENTS |
536 | DEFINE_PER_CPU(u8, perf_event_pending); | ||
537 | 536 | ||
538 | void set_perf_event_pending(void) | 537 | /* |
538 | * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... | ||
539 | */ | ||
540 | #ifdef CONFIG_PPC64 | ||
541 | static inline unsigned long test_perf_event_pending(void) | ||
539 | { | 542 | { |
540 | get_cpu_var(perf_event_pending) = 1; | 543 | unsigned long x; |
541 | set_dec(1); | 544 | |
542 | put_cpu_var(perf_event_pending); | 545 | asm volatile("lbz %0,%1(13)" |
546 | : "=r" (x) | ||
547 | : "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
548 | return x; | ||
543 | } | 549 | } |
544 | 550 | ||
551 | static inline void set_perf_event_pending_flag(void) | ||
552 | { | ||
553 | asm volatile("stb %0,%1(13)" : : | ||
554 | "r" (1), | ||
555 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
556 | } | ||
557 | |||
558 | static inline void clear_perf_event_pending(void) | ||
559 | { | ||
560 | asm volatile("stb %0,%1(13)" : : | ||
561 | "r" (0), | ||
562 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
563 | } | ||
564 | |||
565 | #else /* 32-bit */ | ||
566 | |||
567 | DEFINE_PER_CPU(u8, perf_event_pending); | ||
568 | |||
569 | #define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 | ||
545 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) | 570 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) |
546 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 | 571 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 |
547 | 572 | ||
548 | #else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | 573 | #endif /* 32 vs 64 bit */ |
574 | |||
575 | void set_perf_event_pending(void) | ||
576 | { | ||
577 | preempt_disable(); | ||
578 | set_perf_event_pending_flag(); | ||
579 | set_dec(1); | ||
580 | preempt_enable(); | ||
581 | } | ||
582 | |||
583 | #else /* CONFIG_PERF_EVENTS */ | ||
549 | 584 | ||
550 | #define test_perf_event_pending() 0 | 585 | #define test_perf_event_pending() 0 |
551 | #define clear_perf_event_pending() | 586 | #define clear_perf_event_pending() |
552 | 587 | ||
553 | #endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | 588 | #endif /* CONFIG_PERF_EVENTS */ |
554 | 589 | ||
555 | /* | 590 | /* |
556 | * For iSeries shared processors, we have to let the hypervisor | 591 | * For iSeries shared processors, we have to let the hypervisor |
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) | |||
582 | set_dec(DECREMENTER_MAX); | 617 | set_dec(DECREMENTER_MAX); |
583 | 618 | ||
584 | #ifdef CONFIG_PPC32 | 619 | #ifdef CONFIG_PPC32 |
585 | if (test_perf_event_pending()) { | ||
586 | clear_perf_event_pending(); | ||
587 | perf_event_do_pending(); | ||
588 | } | ||
589 | if (atomic_read(&ppc_n_lost_interrupts) != 0) | 620 | if (atomic_read(&ppc_n_lost_interrupts) != 0) |
590 | do_IRQ(regs); | 621 | do_IRQ(regs); |
591 | #endif | 622 | #endif |
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) | |||
604 | 635 | ||
605 | calculate_steal_time(); | 636 | calculate_steal_time(); |
606 | 637 | ||
638 | if (test_perf_event_pending()) { | ||
639 | clear_perf_event_pending(); | ||
640 | perf_event_do_pending(); | ||
641 | } | ||
642 | |||
607 | #ifdef CONFIG_PPC_ISERIES | 643 | #ifdef CONFIG_PPC_ISERIES |
608 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | 644 | if (firmware_has_feature(FW_FEATURE_ISERIES)) |
609 | get_lppaca()->int_dword.fields.decr_int = 0; | 645 | get_lppaca()->int_dword.fields.decr_int = 0; |