aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2018-05-03 15:47:16 -0400
committerIngo Molnar <mingo@kernel.org>2018-05-25 02:11:10 -0400
commita1150c202207cc8501bebc45b63c264f91959260 (patch)
tree17b8820b35d29a2e82875b8339ca036fe6452db2
parentbd9c67ad9693bacef086d65c1c6744645d4777e7 (diff)
perf/core: Fix group scheduling with mixed hw and sw events
When hw and sw events are mixed in the same group, they are all attached to the hw perf_event_context. This sometimes requires moving group of perf_event to a different context. We found a bug in how the kernel handles this, for example if we do: perf stat -e '{faults,ref-cycles,faults}' -I 1000 1.005591180 1,297 faults 1.005591180 457,476,576 ref-cycles 1.005591180 <not supported> faults First, sw event "faults" is attached to the sw context, and becomes the group leader. Then, hw event "ref-cycles" is attached, so both events are moved to the hw context. Last, another sw "faults" tries to attach, but it fails because of mismatch between the new target ctx (from sw pmu) and the group_leader's ctx (hw context, same as ref-cycles). The broken condition is: group_leader is sw event; group_leader is on hw context; add a sw event to the group. Fix this scenario by checking group_leader's context (instead of just event type). If group_leader is on hw context, use the ->pmu of this context to look up context for the new event. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: <kernel-team@fb.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes: b04243ef7006 ("perf: Complete software pmu grouping") Link: http://lkml.kernel.org/r/20180503194716.162815-1-songliubraving@fb.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/perf_event.h8
-rw-r--r--kernel/events/core.c21
2 files changed, 19 insertions, 10 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e71e99eb9a4e..def866f7269b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1016,6 +1016,14 @@ static inline int is_software_event(struct perf_event *event)
1016 return event->event_caps & PERF_EV_CAP_SOFTWARE; 1016 return event->event_caps & PERF_EV_CAP_SOFTWARE;
1017} 1017}
1018 1018
1019/*
1020 * Return 1 for event in sw context, 0 for event in hw context
1021 */
1022static inline int in_software_context(struct perf_event *event)
1023{
1024 return event->ctx->pmu->task_ctx_nr == perf_sw_context;
1025}
1026
1019extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 1027extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1020 1028
1021extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); 1029extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67612ce359ad..ce6aa5ff3c96 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10521,19 +10521,20 @@ SYSCALL_DEFINE5(perf_event_open,
10521 if (pmu->task_ctx_nr == perf_sw_context) 10521 if (pmu->task_ctx_nr == perf_sw_context)
10522 event->event_caps |= PERF_EV_CAP_SOFTWARE; 10522 event->event_caps |= PERF_EV_CAP_SOFTWARE;
10523 10523
10524 if (group_leader && 10524 if (group_leader) {
10525 (is_software_event(event) != is_software_event(group_leader))) { 10525 if (is_software_event(event) &&
10526 if (is_software_event(event)) { 10526 !in_software_context(group_leader)) {
10527 /* 10527 /*
10528 * If event and group_leader are not both a software 10528 * If the event is a sw event, but the group_leader
10529 * event, and event is, then group leader is not. 10529 * is on hw context.
10530 * 10530 *
10531 * Allow the addition of software events to !software 10531 * Allow the addition of software events to hw
10532 * groups, this is safe because software events never 10532 * groups, this is safe because software events
10533 * fail to schedule. 10533 * never fail to schedule.
10534 */ 10534 */
10535 pmu = group_leader->pmu; 10535 pmu = group_leader->ctx->pmu;
10536 } else if (is_software_event(group_leader) && 10536 } else if (!is_software_event(event) &&
10537 is_software_event(group_leader) &&
10537 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { 10538 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
10538 /* 10539 /*
10539 * In case the group is a pure software group, and we 10540 * In case the group is a pure software group, and we