aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2016-12-28 08:31:03 -0500
committerIngo Molnar <mingo@kernel.org>2017-01-14 05:06:49 -0500
commit475113d937adfd150eb82b5e2c5507125a68e7af (patch)
tree8c5b0c58d3308738b440e5e1ec8e479ecf6ac11e
parent321027c1fe77f892f4ea07846aeae08cefbbb290 (diff)
perf/x86/intel: Account interrupts for PEBS errors
It's possible to set up PEBS events to get only errors and not any data, like on SNB-X (model 45) and IVB-EP (model 62) via 2 perf commands running simultaneously: taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10 This leads to a soft lock up, because the error path of the intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt for error PEBS interrupts, so in case you're getting ONLY errors you don't have a way to stop the event when it's over the max_samples_per_tick limit: NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816] ... RIP: 0010:[<ffffffff81159232>] [<ffffffff81159232>] smp_call_function_single+0xe2/0x140 ... Call Trace: ? trace_hardirqs_on_caller+0xf5/0x1b0 ? perf_cgroup_attach+0x70/0x70 perf_install_in_context+0x199/0x1b0 ? ctx_resched+0x90/0x90 SYSC_perf_event_open+0x641/0xf90 SyS_perf_event_open+0x9/0x10 do_syscall_64+0x6c/0x1f0 entry_SYSCALL64_slow_path+0x25/0x25 Add perf_event_account_interrupt() which does the interrupt and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s error path. We keep the pending_kill and pending_wakeup logic only in the __perf_event_overflow() path, because they make sense only if there's any data to deliver. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vince@deater.net> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/intel/ds.c6
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--kernel/events/core.c47
3 files changed, 37 insertions, 17 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1389 continue; 1389 continue;
1390 1390
1391 /* log dropped samples number */ 1391 /* log dropped samples number */
1392 if (error[bit]) 1392 if (error[bit]) {
1393 perf_log_lost_samples(event, error[bit]); 1393 perf_log_lost_samples(event, error[bit]);
1394 1394
1395 if (perf_event_account_interrupt(event))
1396 x86_pmu_stop(event, 0);
1397 }
1398
1395 if (counts[bit]) { 1399 if (counts[bit]) {
1396 __intel_pmu_pebs_event(event, iregs, base, 1400 __intel_pmu_pebs_event(event, iregs, base,
1397 top, bit, counts[bit]); 1401 top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
1259extern void perf_event_disable_local(struct perf_event *event); 1259extern void perf_event_disable_local(struct perf_event *event);
1260extern void perf_event_disable_inatomic(struct perf_event *event); 1260extern void perf_event_disable_inatomic(struct perf_event *event);
1261extern void perf_event_task_tick(void); 1261extern void perf_event_task_tick(void);
1262extern int perf_event_account_interrupt(struct perf_event *event);
1262#else /* !CONFIG_PERF_EVENTS: */ 1263#else /* !CONFIG_PERF_EVENTS: */
1263static inline void * 1264static inline void *
1264perf_aux_output_begin(struct perf_output_handle *handle, 1265perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cbc5937265da..110b38a58493 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
7060 perf_output_end(&handle); 7060 perf_output_end(&handle);
7061} 7061}
7062 7062
7063/* 7063static int
7064 * Generic event overflow handling, sampling. 7064__perf_event_account_interrupt(struct perf_event *event, int throttle)
7065 */
7066
7067static int __perf_event_overflow(struct perf_event *event,
7068 int throttle, struct perf_sample_data *data,
7069 struct pt_regs *regs)
7070{ 7065{
7071 int events = atomic_read(&event->event_limit);
7072 struct hw_perf_event *hwc = &event->hw; 7066 struct hw_perf_event *hwc = &event->hw;
7073 u64 seq;
7074 int ret = 0; 7067 int ret = 0;
7075 7068 u64 seq;
7076 /*
7077 * Non-sampling counters might still use the PMI to fold short
7078 * hardware counters, ignore those.
7079 */
7080 if (unlikely(!is_sampling_event(event)))
7081 return 0;
7082 7069
7083 seq = __this_cpu_read(perf_throttled_seq); 7070 seq = __this_cpu_read(perf_throttled_seq);
7084 if (seq != hwc->interrupts_seq) { 7071 if (seq != hwc->interrupts_seq) {
@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
7106 perf_adjust_period(event, delta, hwc->last_period, true); 7093 perf_adjust_period(event, delta, hwc->last_period, true);
7107 } 7094 }
7108 7095
7096 return ret;
7097}
7098
7099int perf_event_account_interrupt(struct perf_event *event)
7100{
7101 return __perf_event_account_interrupt(event, 1);
7102}
7103
7104/*
7105 * Generic event overflow handling, sampling.
7106 */
7107
7108static int __perf_event_overflow(struct perf_event *event,
7109 int throttle, struct perf_sample_data *data,
7110 struct pt_regs *regs)
7111{
7112 int events = atomic_read(&event->event_limit);
7113 int ret = 0;
7114
7115 /*
7116 * Non-sampling counters might still use the PMI to fold short
7117 * hardware counters, ignore those.
7118 */
7119 if (unlikely(!is_sampling_event(event)))
7120 return 0;
7121
7122 ret = __perf_event_account_interrupt(event, throttle);
7123
7109 /* 7124 /*
7110 * XXX event_limit might not quite work as expected on inherited 7125 * XXX event_limit might not quite work as expected on inherited
7111 * events 7126 * events