diff options
author | Stephane Eranian <eranian@google.com> | 2012-02-09 17:21:00 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-05 08:55:42 -0500 |
commit | d010b3326cf06b3406cdd88af16dcf4e4b6fec2e (patch) | |
tree | d0468d78582aeff6a603cb5d29b1a14310106896 /include | |
parent | 2481c5fa6db0237e4f0168f88913178b2b495b7c (diff) |
perf: Add callback to flush branch_stack on context switch
With branch stack sampling, it is possible to filter by priv levels.
In system-wide mode, that means it is possible to capture only user
level branches. The builtin SW LBR filter needs to disassemble code
based on LBR captured addresses. For that, it needs to know the task
the addresses are associated with. Because of context switches, the
content of the branch stack buffer may contain addresses from
different tasks.
We need a callback on context switch to either flush the branch stack
or save it. This patch adds a new callback in struct pmu which is called
during context switches. The callback is called only when necessary.
That is when a system-wide context has, at least, one event which
uses PERF_SAMPLE_BRANCH_STACK. The callback is never called for
per-thread context.
In this version, the Intel x86 code simply flushes (resets) the LBR
on context switches (fills it with zeroes). Those zeroed branches are
then filtered out by the SW filter.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-11-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/perf_event.h | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5fc494f4a094..fbbf5e598368 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -746,6 +746,11 @@ struct pmu { | |||
746 | * if no implementation is provided it will default to: event->hw.idx + 1. | 746 | * if no implementation is provided it will default to: event->hw.idx + 1. |
747 | */ | 747 | */ |
748 | int (*event_idx) (struct perf_event *event); /*optional */ | 748 | int (*event_idx) (struct perf_event *event); /*optional */ |
749 | |||
750 | /* | ||
751 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
752 | */ | ||
753 | void (*flush_branch_stack) (void); | ||
749 | }; | 754 | }; |
750 | 755 | ||
751 | /** | 756 | /** |
@@ -979,7 +984,8 @@ struct perf_event_context { | |||
979 | u64 parent_gen; | 984 | u64 parent_gen; |
980 | u64 generation; | 985 | u64 generation; |
981 | int pin_count; | 986 | int pin_count; |
982 | int nr_cgroups; /* cgroup events present */ | 987 | int nr_cgroups; /* cgroup evts */ |
988 | int nr_branch_stack; /* branch_stack evt */ | ||
983 | struct rcu_head rcu_head; | 989 | struct rcu_head rcu_head; |
984 | }; | 990 | }; |
985 | 991 | ||
@@ -1044,6 +1050,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
1044 | extern u64 perf_event_read_value(struct perf_event *event, | 1050 | extern u64 perf_event_read_value(struct perf_event *event, |
1045 | u64 *enabled, u64 *running); | 1051 | u64 *enabled, u64 *running); |
1046 | 1052 | ||
1053 | |||
1047 | struct perf_sample_data { | 1054 | struct perf_sample_data { |
1048 | u64 type; | 1055 | u64 type; |
1049 | 1056 | ||