diff options
author | Jiri Olsa <jolsa@kernel.org> | 2016-12-28 08:31:03 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-01-14 05:06:49 -0500 |
commit | 475113d937adfd150eb82b5e2c5507125a68e7af (patch) | |
tree | 8c5b0c58d3308738b440e5e1ec8e479ecf6ac11e | |
parent | 321027c1fe77f892f4ea07846aeae08cefbbb290 (diff) |
perf/x86/intel: Account interrupts for PEBS errors
It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:
taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10
This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:
NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
...
RIP: 0010:[<ffffffff81159232>] [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
...
Call Trace:
? trace_hardirqs_on_caller+0xf5/0x1b0
? perf_cgroup_attach+0x70/0x70
perf_install_in_context+0x199/0x1b0
? ctx_resched+0x90/0x90
SYSC_perf_event_open+0x641/0xf90
SyS_perf_event_open+0x9/0x10
do_syscall_64+0x6c/0x1f0
entry_SYSCALL64_slow_path+0x25/0x25
Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.
We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/intel/ds.c | 6 | ||||
-rw-r--r-- | include/linux/perf_event.h | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 47 |
3 files changed, 37 insertions, 17 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd3..9dfeeeca0ea8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
1389 | continue; | 1389 | continue; |
1390 | 1390 | ||
1391 | /* log dropped samples number */ | 1391 | /* log dropped samples number */ |
1392 | if (error[bit]) | 1392 | if (error[bit]) { |
1393 | perf_log_lost_samples(event, error[bit]); | 1393 | perf_log_lost_samples(event, error[bit]); |
1394 | 1394 | ||
1395 | if (perf_event_account_interrupt(event)) | ||
1396 | x86_pmu_stop(event, 0); | ||
1397 | } | ||
1398 | |||
1395 | if (counts[bit]) { | 1399 | if (counts[bit]) { |
1396 | __intel_pmu_pebs_event(event, iregs, base, | 1400 | __intel_pmu_pebs_event(event, iregs, base, |
1397 | top, bit, counts[bit]); | 1401 | top, bit, counts[bit]); |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecdb9817..78ed8105e64d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); | |||
1259 | extern void perf_event_disable_local(struct perf_event *event); | 1259 | extern void perf_event_disable_local(struct perf_event *event); |
1260 | extern void perf_event_disable_inatomic(struct perf_event *event); | 1260 | extern void perf_event_disable_inatomic(struct perf_event *event); |
1261 | extern void perf_event_task_tick(void); | 1261 | extern void perf_event_task_tick(void); |
1262 | extern int perf_event_account_interrupt(struct perf_event *event); | ||
1262 | #else /* !CONFIG_PERF_EVENTS: */ | 1263 | #else /* !CONFIG_PERF_EVENTS: */ |
1263 | static inline void * | 1264 | static inline void * |
1264 | perf_aux_output_begin(struct perf_output_handle *handle, | 1265 | perf_aux_output_begin(struct perf_output_handle *handle, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index cbc5937265da..110b38a58493 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) | |||
7060 | perf_output_end(&handle); | 7060 | perf_output_end(&handle); |
7061 | } | 7061 | } |
7062 | 7062 | ||
7063 | /* | 7063 | static int |
7064 | * Generic event overflow handling, sampling. | 7064 | __perf_event_account_interrupt(struct perf_event *event, int throttle) |
7065 | */ | ||
7066 | |||
7067 | static int __perf_event_overflow(struct perf_event *event, | ||
7068 | int throttle, struct perf_sample_data *data, | ||
7069 | struct pt_regs *regs) | ||
7070 | { | 7065 | { |
7071 | int events = atomic_read(&event->event_limit); | ||
7072 | struct hw_perf_event *hwc = &event->hw; | 7066 | struct hw_perf_event *hwc = &event->hw; |
7073 | u64 seq; | ||
7074 | int ret = 0; | 7067 | int ret = 0; |
7075 | 7068 | u64 seq; | |
7076 | /* | ||
7077 | * Non-sampling counters might still use the PMI to fold short | ||
7078 | * hardware counters, ignore those. | ||
7079 | */ | ||
7080 | if (unlikely(!is_sampling_event(event))) | ||
7081 | return 0; | ||
7082 | 7069 | ||
7083 | seq = __this_cpu_read(perf_throttled_seq); | 7070 | seq = __this_cpu_read(perf_throttled_seq); |
7084 | if (seq != hwc->interrupts_seq) { | 7071 | if (seq != hwc->interrupts_seq) { |
@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, | |||
7106 | perf_adjust_period(event, delta, hwc->last_period, true); | 7093 | perf_adjust_period(event, delta, hwc->last_period, true); |
7107 | } | 7094 | } |
7108 | 7095 | ||
7096 | return ret; | ||
7097 | } | ||
7098 | |||
7099 | int perf_event_account_interrupt(struct perf_event *event) | ||
7100 | { | ||
7101 | return __perf_event_account_interrupt(event, 1); | ||
7102 | } | ||
7103 | |||
7104 | /* | ||
7105 | * Generic event overflow handling, sampling. | ||
7106 | */ | ||
7107 | |||
7108 | static int __perf_event_overflow(struct perf_event *event, | ||
7109 | int throttle, struct perf_sample_data *data, | ||
7110 | struct pt_regs *regs) | ||
7111 | { | ||
7112 | int events = atomic_read(&event->event_limit); | ||
7113 | int ret = 0; | ||
7114 | |||
7115 | /* | ||
7116 | * Non-sampling counters might still use the PMI to fold short | ||
7117 | * hardware counters, ignore those. | ||
7118 | */ | ||
7119 | if (unlikely(!is_sampling_event(event))) | ||
7120 | return 0; | ||
7121 | |||
7122 | ret = __perf_event_account_interrupt(event, throttle); | ||
7123 | |||
7109 | /* | 7124 | /* |
7110 | * XXX event_limit might not quite work as expected on inherited | 7125 | * XXX event_limit might not quite work as expected on inherited |
7111 | * events | 7126 | * events |