aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:21:00 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:42 -0500
commitd010b3326cf06b3406cdd88af16dcf4e4b6fec2e (patch)
treed0468d78582aeff6a603cb5d29b1a14310106896 /include
parent2481c5fa6db0237e4f0168f88913178b2b495b7c (diff)
perf: Add callback to flush branch_stack on context switch
With branch stack sampling, it is possible to filter by priv levels. In system-wide mode, that means it is possible to capture only user level branches. The builtin SW LBR filter needs to disassemble code based on LBR captured addresses. For that, it needs to know the task the addresses are associated with. Because of context switches, the content of the branch stack buffer may contain addresses from different tasks. We need a callback on context switch to either flush the branch stack or save it. This patch adds a new callback in struct pmu which is called during context switches. The callback is called only when necessary. That is when a system-wide context has, at least, one event which uses PERF_SAMPLE_BRANCH_STACK. The callback is never called for per-thread context. In this version, the Intel x86 code simply flushes (resets) the LBR on context switches (fills it with zeroes). Those zeroed branches are then filtered out by the SW filter. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-11-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r--include/linux/perf_event.h9
1 files changed, 8 insertions, 1 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5fc494f4a094..fbbf5e598368 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -746,6 +746,11 @@ struct pmu {
746 * if no implementation is provided it will default to: event->hw.idx + 1. 746 * if no implementation is provided it will default to: event->hw.idx + 1.
747 */ 747 */
748 int (*event_idx) (struct perf_event *event); /*optional */ 748 int (*event_idx) (struct perf_event *event); /*optional */
749
750 /*
751 * flush branch stack on context-switches (needed in cpu-wide mode)
752 */
753 void (*flush_branch_stack) (void);
749}; 754};
750 755
751/** 756/**
@@ -979,7 +984,8 @@ struct perf_event_context {
979 u64 parent_gen; 984 u64 parent_gen;
980 u64 generation; 985 u64 generation;
981 int pin_count; 986 int pin_count;
982 int nr_cgroups; /* cgroup events present */ 987 int nr_cgroups; /* cgroup evts */
988 int nr_branch_stack; /* branch_stack evt */
983 struct rcu_head rcu_head; 989 struct rcu_head rcu_head;
984}; 990};
985 991
@@ -1044,6 +1050,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
1044extern u64 perf_event_read_value(struct perf_event *event, 1050extern u64 perf_event_read_value(struct perf_event *event,
1045 u64 *enabled, u64 *running); 1051 u64 *enabled, u64 *running);
1046 1052
1053
1047struct perf_sample_data { 1054struct perf_sample_data {
1048 u64 type; 1055 u64 type;
1049 1056