aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:21:00 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:42 -0500
commitd010b3326cf06b3406cdd88af16dcf4e4b6fec2e (patch)
treed0468d78582aeff6a603cb5d29b1a14310106896 /arch
parent2481c5fa6db0237e4f0168f88913178b2b495b7c (diff)
perf: Add callback to flush branch_stack on context switch
With branch stack sampling, it is possible to filter by priv levels. In system-wide mode, that means it is possible to capture only user level branches. The builtin SW LBR filter needs to disassemble code based on LBR captured addresses. For that, it needs to know the task the addresses are associated with. Because of context switches, the content of the branch stack buffer may contain addresses from different tasks. We need a callback on context switch to either flush the branch stack or save it. This patch adds a new callback in struct pmu which is called during context switches. The callback is called only when necessary. That is when a system-wide context has, at least, one event which uses PERF_SAMPLE_BRANCH_STACK. The callback is never called for per-thread context. In this version, the Intel x86 code simply flushes (resets) the LBR on context switches (fills it with zeroes). Those zeroed branches are then filtered out by the SW filter. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-11-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c13
3 files changed, 28 insertions, 7 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index cea567483274..0a18d16cb58d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1671,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
1671 NULL, 1671 NULL,
1672}; 1672};
1673 1673
1674static void x86_pmu_flush_branch_stack(void)
1675{
1676 if (x86_pmu.flush_branch_stack)
1677 x86_pmu.flush_branch_stack();
1678}
1679
1674static struct pmu pmu = { 1680static struct pmu pmu = {
1675 .pmu_enable = x86_pmu_enable, 1681 .pmu_enable = x86_pmu_enable,
1676 .pmu_disable = x86_pmu_disable, 1682 .pmu_disable = x86_pmu_disable,
1677 1683
1678 .attr_groups = x86_pmu_attr_groups, 1684 .attr_groups = x86_pmu_attr_groups,
1679 1685
1680 .event_init = x86_pmu_event_init, 1686 .event_init = x86_pmu_event_init,
1681 1687
1682 .add = x86_pmu_add, 1688 .add = x86_pmu_add,
1683 .del = x86_pmu_del, 1689 .del = x86_pmu_del,
1684 .start = x86_pmu_start, 1690 .start = x86_pmu_start,
1685 .stop = x86_pmu_stop, 1691 .stop = x86_pmu_stop,
1686 .read = x86_pmu_read, 1692 .read = x86_pmu_read,
1687 1693
1688 .start_txn = x86_pmu_start_txn, 1694 .start_txn = x86_pmu_start_txn,
1689 .cancel_txn = x86_pmu_cancel_txn, 1695 .cancel_txn = x86_pmu_cancel_txn,
1690 .commit_txn = x86_pmu_commit_txn, 1696 .commit_txn = x86_pmu_commit_txn,
1691 1697
1692 .event_idx = x86_pmu_event_idx, 1698 .event_idx = x86_pmu_event_idx,
1699 .flush_branch_stack = x86_pmu_flush_branch_stack,
1693}; 1700};
1694 1701
1695void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 1702void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index f104c054dc5c..74387c12dc72 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -324,6 +324,7 @@ struct x86_pmu {
324 void (*cpu_starting)(int cpu); 324 void (*cpu_starting)(int cpu);
325 void (*cpu_dying)(int cpu); 325 void (*cpu_dying)(int cpu);
326 void (*cpu_dead)(int cpu); 326 void (*cpu_dead)(int cpu);
327 void (*flush_branch_stack)(void);
327 328
328 /* 329 /*
329 * Intel Arch Perfmon v2+ 330 * Intel Arch Perfmon v2+
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7cc1e2dcc4dd..6627089232a7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1539,6 +1539,18 @@ static void intel_pmu_cpu_dying(int cpu)
1539 fini_debug_store_on_cpu(cpu); 1539 fini_debug_store_on_cpu(cpu);
1540} 1540}
1541 1541
1542static void intel_pmu_flush_branch_stack(void)
1543{
1544 /*
1545 * Intel LBR does not tag entries with the
1546 * PID of the current task, then we need to
1547 * flush it on ctxsw
1548 * For now, we simply reset it
1549 */
1550 if (x86_pmu.lbr_nr)
1551 intel_pmu_lbr_reset();
1552}
1553
1542static __initconst const struct x86_pmu intel_pmu = { 1554static __initconst const struct x86_pmu intel_pmu = {
1543 .name = "Intel", 1555 .name = "Intel",
1544 .handle_irq = intel_pmu_handle_irq, 1556 .handle_irq = intel_pmu_handle_irq,
@@ -1566,6 +1578,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1566 .cpu_starting = intel_pmu_cpu_starting, 1578 .cpu_starting = intel_pmu_cpu_starting,
1567 .cpu_dying = intel_pmu_cpu_dying, 1579 .cpu_dying = intel_pmu_cpu_dying,
1568 .guest_get_msrs = intel_guest_get_msrs, 1580 .guest_get_msrs = intel_guest_get_msrs,
1581 .flush_branch_stack = intel_pmu_flush_branch_stack,
1569}; 1582};
1570 1583
1571static __init void intel_clovertown_quirk(void) 1584static __init void intel_clovertown_quirk(void)