aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:21:00 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:42 -0500
commitd010b3326cf06b3406cdd88af16dcf4e4b6fec2e (patch)
treed0468d78582aeff6a603cb5d29b1a14310106896 /kernel/events
parent2481c5fa6db0237e4f0168f88913178b2b495b7c (diff)
perf: Add callback to flush branch_stack on context switch
With branch stack sampling, it is possible to filter by priv levels. In system-wide mode, that means it is possible to capture only user level branches. The builtin SW LBR filter needs to disassemble code based on LBR captured addresses. For that, it needs to know the task the addresses are associated with. Because of context switches, the content of the branch stack buffer may contain addresses from different tasks. We need a callback on context switch to either flush the branch stack or save it. This patch adds a new callback in struct pmu which is called during context switches. The callback is called only when necessary. That is when a system-wide context has, at least, one event which uses PERF_SAMPLE_BRANCH_STACK. The callback is never called for per-thread context. In this version, the Intel x86 code simply flushes (resets) the LBR on context switches (fills it with zeroes). Those zeroed branches are then filtered out by the SW filter. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-11-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c85
1 files changed, 85 insertions, 0 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 242bb51c67f2..c61234b1a988 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -137,6 +137,7 @@ enum event_type_t {
137 */ 137 */
138struct static_key_deferred perf_sched_events __read_mostly; 138struct static_key_deferred perf_sched_events __read_mostly;
139static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 139static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
140static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
140 141
141static atomic_t nr_mmap_events __read_mostly; 142static atomic_t nr_mmap_events __read_mostly;
142static atomic_t nr_comm_events __read_mostly; 143static atomic_t nr_comm_events __read_mostly;
@@ -888,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
888 if (is_cgroup_event(event)) 889 if (is_cgroup_event(event))
889 ctx->nr_cgroups++; 890 ctx->nr_cgroups++;
890 891
892 if (has_branch_stack(event))
893 ctx->nr_branch_stack++;
894
891 list_add_rcu(&event->event_entry, &ctx->event_list); 895 list_add_rcu(&event->event_entry, &ctx->event_list);
892 if (!ctx->nr_events) 896 if (!ctx->nr_events)
893 perf_pmu_rotate_start(ctx->pmu); 897 perf_pmu_rotate_start(ctx->pmu);
@@ -1027,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1027 cpuctx->cgrp = NULL; 1031 cpuctx->cgrp = NULL;
1028 } 1032 }
1029 1033
1034 if (has_branch_stack(event))
1035 ctx->nr_branch_stack--;
1036
1030 ctx->nr_events--; 1037 ctx->nr_events--;
1031 if (event->attr.inherit_stat) 1038 if (event->attr.inherit_stat)
1032 ctx->nr_stat--; 1039 ctx->nr_stat--;
@@ -2202,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2202} 2209}
2203 2210
2204/* 2211/*
2212 * When sampling the branck stack in system-wide, it may be necessary
2213 * to flush the stack on context switch. This happens when the branch
2214 * stack does not tag its entries with the pid of the current task.
2215 * Otherwise it becomes impossible to associate a branch entry with a
2216 * task. This ambiguity is more likely to appear when the branch stack
2217 * supports priv level filtering and the user sets it to monitor only
2218 * at the user level (which could be a useful measurement in system-wide
2219 * mode). In that case, the risk is high of having a branch stack with
2220 * branch from multiple tasks. Flushing may mean dropping the existing
2221 * entries or stashing them somewhere in the PMU specific code layer.
2222 *
2223 * This function provides the context switch callback to the lower code
2224 * layer. It is invoked ONLY when there is at least one system-wide context
2225 * with at least one active event using taken branch sampling.
2226 */
2227static void perf_branch_stack_sched_in(struct task_struct *prev,
2228 struct task_struct *task)
2229{
2230 struct perf_cpu_context *cpuctx;
2231 struct pmu *pmu;
2232 unsigned long flags;
2233
2234 /* no need to flush branch stack if not changing task */
2235 if (prev == task)
2236 return;
2237
2238 local_irq_save(flags);
2239
2240 rcu_read_lock();
2241
2242 list_for_each_entry_rcu(pmu, &pmus, entry) {
2243 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2244
2245 /*
2246 * check if the context has at least one
2247 * event using PERF_SAMPLE_BRANCH_STACK
2248 */
2249 if (cpuctx->ctx.nr_branch_stack > 0
2250 && pmu->flush_branch_stack) {
2251
2252 pmu = cpuctx->ctx.pmu;
2253
2254 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2255
2256 perf_pmu_disable(pmu);
2257
2258 pmu->flush_branch_stack();
2259
2260 perf_pmu_enable(pmu);
2261
2262 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2263 }
2264 }
2265
2266 rcu_read_unlock();
2267
2268 local_irq_restore(flags);
2269}
2270
2271/*
2205 * Called from scheduler to add the events of the current task 2272 * Called from scheduler to add the events of the current task
2206 * with interrupts disabled. 2273 * with interrupts disabled.
2207 * 2274 *
@@ -2232,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2232 */ 2299 */
2233 if (atomic_read(&__get_cpu_var(perf_cgroup_events))) 2300 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2234 perf_cgroup_sched_in(prev, task); 2301 perf_cgroup_sched_in(prev, task);
2302
2303 /* check for system-wide branch_stack events */
2304 if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
2305 perf_branch_stack_sched_in(prev, task);
2235} 2306}
2236 2307
2237static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2308static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2798,6 +2869,14 @@ static void free_event(struct perf_event *event)
2798 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); 2869 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2799 static_key_slow_dec_deferred(&perf_sched_events); 2870 static_key_slow_dec_deferred(&perf_sched_events);
2800 } 2871 }
2872
2873 if (has_branch_stack(event)) {
2874 static_key_slow_dec_deferred(&perf_sched_events);
2875 /* is system-wide event */
2876 if (!(event->attach_state & PERF_ATTACH_TASK))
2877 atomic_dec(&per_cpu(perf_branch_stack_events,
2878 event->cpu));
2879 }
2801 } 2880 }
2802 2881
2803 if (event->rb) { 2882 if (event->rb) {
@@ -5924,6 +6003,12 @@ done:
5924 return ERR_PTR(err); 6003 return ERR_PTR(err);
5925 } 6004 }
5926 } 6005 }
6006 if (has_branch_stack(event)) {
6007 static_key_slow_inc(&perf_sched_events.key);
6008 if (!(event->attach_state & PERF_ATTACH_TASK))
6009 atomic_inc(&per_cpu(perf_branch_stack_events,
6010 event->cpu));
6011 }
5927 } 6012 }
5928 6013
5929 return event; 6014 return event;