aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKan Liang <kan.liang@intel.com>2016-03-03 18:07:28 -0500
committerIngo Molnar <mingo@kernel.org>2016-03-08 06:18:32 -0500
commitc3d266c8a9838cc141b69548bc3b1b18808ae8c4 (patch)
tree6ba1a08030d635de03e934559805ad680dbe157f
parente72daf3f4d764c47fb71c9bdc7f9c54a503825b1 (diff)
perf/x86/intel: Fix PEBS warning by only restoring active PMU in pmi
This patch tries to fix a PEBS warning found in my stress test. The following perf command can easily trigger the pebs warning or spurious NMI error on Skylake/Broadwell/Haswell platforms: sudo perf record -e 'cpu/umask=0x04,event=0xc4/pp,cycles,branches,ref-cycles,cache-misses,cache-references' --call-graph fp -b -c1000 -a Also the NMI watchdog must be enabled. For this case, the events number is larger than counter number. So perf has to do multiplexing. In perf_mux_hrtimer_handler, it does perf_pmu_disable(), schedule out old events, rotate_ctx, schedule in new events and finally perf_pmu_enable(). If the old events include precise event, the MSR_IA32_PEBS_ENABLE should be cleared when perf_pmu_disable(). The MSR_IA32_PEBS_ENABLE should keep 0 until the perf_pmu_enable() is called and the new event is precise event. However, there is a corner case which could restore PEBS_ENABLE to stale value during the above period. In perf_pmu_disable(), GLOBAL_CTRL will be set to 0 to stop overflow and followed PMI. But there may be pending PMI from an earlier overflow, which cannot be stopped. So even GLOBAL_CTRL is cleared, the kernel still be possible to get PMI. At the end of the PMI handler, __intel_pmu_enable_all() will be called, which will restore the stale values if old events haven't scheduled out. Once the stale pebs value is set, it's impossible to be corrected if the new events are non-precise. Because the pebs_enabled will be set to 0. x86_pmu.enable_all() will ignore the MSR_IA32_PEBS_ENABLE setting. As a result, the following NMI with stale PEBS_ENABLE trigger pebs warning. The pending PMI after enabled=0 will become harmless if the NMI handler does not change the state. This patch checks cpuc->enabled in pmi and only restore the state when PMU is active. Here is the dump: Call Trace: <NMI> [<ffffffff813c3a2e>] dump_stack+0x63/0x85 [<ffffffff810a46f2>] warn_slowpath_common+0x82/0xc0 [<ffffffff810a483a>] warn_slowpath_null+0x1a/0x20 [<ffffffff8100fe2e>] intel_pmu_drain_pebs_nhm+0x2be/0x320 [<ffffffff8100caa9>] intel_pmu_handle_irq+0x279/0x460 [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40 [<ffffffff811f290d>] ? vunmap_page_range+0x20d/0x330 [<ffffffff811f2f11>] ? unmap_kernel_range_noflush+0x11/0x20 [<ffffffff8148379f>] ? ghes_copy_tofrom_phys+0x10f/0x2a0 [<ffffffff814839c8>] ? ghes_read_estatus+0x98/0x170 [<ffffffff81005a7d>] perf_event_nmi_handler+0x2d/0x50 [<ffffffff810310b9>] nmi_handle+0x69/0x120 [<ffffffff810316f6>] default_do_nmi+0xe6/0x100 [<ffffffff810317f2>] do_nmi+0xe2/0x130 [<ffffffff817aea71>] end_repeat_nmi+0x1a/0x1e [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40 [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40 [<ffffffff810639b6>] ? native_write_msr_safe+0x6/0x40 <<EOE>> <IRQ> [<ffffffff81006df8>] ? x86_perf_event_set_period+0xd8/0x180 [<ffffffff81006eec>] x86_pmu_start+0x4c/0x100 [<ffffffff8100722d>] x86_pmu_enable+0x28d/0x300 [<ffffffff811994d7>] perf_pmu_enable.part.81+0x7/0x10 [<ffffffff8119cb70>] perf_mux_hrtimer_handler+0x200/0x280 [<ffffffff8119c970>] ? __perf_install_in_context+0xc0/0xc0 [<ffffffff8110f92d>] __hrtimer_run_queues+0xfd/0x280 [<ffffffff811100d8>] hrtimer_interrupt+0xa8/0x190 [<ffffffff81199080>] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [<ffffffff81051bd8>] local_apic_timer_interrupt+0x38/0x60 [<ffffffff817af01d>] smp_apic_timer_interrupt+0x3d/0x50 [<ffffffff817ad15c>] apic_timer_interrupt+0x8c/0xa0 <EOI> [<ffffffff81199080>] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [<ffffffff81123de5>] ? smp_call_function_single+0xd5/0x130 [<ffffffff81123ddb>] ? smp_call_function_single+0xcb/0x130 [<ffffffff81199080>] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [<ffffffff8119765a>] event_function_call+0x10a/0x120 [<ffffffff8119c660>] ? ctx_resched+0x90/0x90 [<ffffffff811971e0>] ? cpu_clock_event_read+0x30/0x30 [<ffffffff811976d0>] ? _perf_event_disable+0x60/0x60 [<ffffffff8119772b>] _perf_event_enable+0x5b/0x70 [<ffffffff81197388>] perf_event_for_each_child+0x38/0xa0 [<ffffffff811976d0>] ? _perf_event_disable+0x60/0x60 [<ffffffff811a0ffd>] perf_ioctl+0x12d/0x3c0 [<ffffffff8134d855>] ? selinux_file_ioctl+0x95/0x1e0 [<ffffffff8124a3a1>] do_vfs_ioctl+0xa1/0x5a0 [<ffffffff81036d29>] ? sched_clock+0x9/0x10 [<ffffffff8124a919>] SyS_ioctl+0x79/0x90 [<ffffffff817ac4b2>] entry_SYSCALL_64_fastpath+0x1a/0xa4 ---[ end trace aef202839fe9a71d ]--- Uhhuh. NMI received for unknown reason 2d on CPU 2. Do you have a strange power saving mode enabled? Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: <stable@vger.kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1457046448-6184-1-git-send-email-kan.liang@intel.com [ Fixed various typos and other small details. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/core.c13
-rw-r--r--arch/x86/events/intel/core.c15
-rw-r--r--arch/x86/events/intel/knc.c4
3 files changed, 29 insertions, 3 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 7402c8182813..5e830d0c95c9 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -597,6 +597,19 @@ void x86_pmu_disable_all(void)
597 } 597 }
598} 598}
599 599
600/*
601 * There may be PMI landing after enabled=0. The PMI hitting could be before or
602 * after disable_all.
603 *
604 * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
605 * It will not be re-enabled in the NMI handler again, because enabled=0. After
606 * handling the NMI, disable_all will be called, which will not change the
607 * state either. If PMI hits after disable_all, the PMU is already disabled
608 * before entering NMI handler. The NMI handler will not change the state
609 * either.
610 *
611 * So either situation is harmless.
612 */
600static void x86_pmu_disable(struct pmu *pmu) 613static void x86_pmu_disable(struct pmu *pmu)
601{ 614{
602 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 615 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a7ec685657a5..b3f6349a33b5 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs
1502}; 1502};
1503 1503
1504/* 1504/*
1505 * Use from PMIs where the LBRs are already disabled. 1505 * Used from PMIs where the LBRs are already disabled.
1506 *
1507 * This function could be called consecutively. It is required to remain in
1508 * disabled state if called consecutively.
1509 *
1510 * During consecutive calls, the same disable value will be written to related
1511 * registers, so the PMU state remains unchanged. hw.state in
1512 * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
1513 * calls.
1506 */ 1514 */
1507static void __intel_pmu_disable_all(void) 1515static void __intel_pmu_disable_all(void)
1508{ 1516{
@@ -1929,7 +1937,10 @@ again:
1929 goto again; 1937 goto again;
1930 1938
1931done: 1939done:
1932 __intel_pmu_enable_all(0, true); 1940 /* Only restore PMU state when it's active. See x86_pmu_disable(). */
1941 if (cpuc->enabled)
1942 __intel_pmu_enable_all(0, true);
1943
1933 /* 1944 /*
1934 * Only unmask the NMI after the overflow counters 1945 * Only unmask the NMI after the overflow counters
1935 * have been reset. This avoids spurious NMIs on 1946 * have been reset. This avoids spurious NMIs on
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 206226e08f49..548d5f774b07 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -263,7 +263,9 @@ again:
263 goto again; 263 goto again;
264 264
265done: 265done:
266 knc_pmu_enable_all(0); 266 /* Only restore PMU state when it's active. See x86_pmu_disable(). */
267 if (cpuc->enabled)
268 knc_pmu_enable_all(0);
267 269
268 return handled; 270 return handled;
269} 271}