aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-01-24 10:10:32 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-04-01 11:16:31 -0400
commitf20093eef5f7843a25adfc0512617d4b1ff1aa6e (patch)
tree1e1d008f98adab4477e3803ed24f3f2a22b34aaf /arch/x86/kernel/cpu/perf_event_intel.c
parentd6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (diff)
perf/x86: Add memory profiling via PEBS Load Latency
This patch adds support for memory profiling using the PEBS Load Latency facility. Load accesses are sampled by HW and the instruction address, data address, load latency, data source, tlb, locked information can be saved in the sampling buffer if using the PERF_SAMPLE_COST (for latency), PERF_SAMPLE_ADDR, PERF_SAMPLE_DATA_SRC types. To enable PEBS Load Latency, users have to use the model specific event: - on NHM/WSM: MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD - on SNB/IVB: MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD To make things easier, this patch also exports a generic alias via sysfs: mem-loads. It export the right event encoding based on the host CPU and can be used directly by the perf tool. Loosely based on Intel's Lin Ming patch posted on LKML in July 2011. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-9-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index df3beaac3397..d5ea5a03cd37 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
82{ 82{
83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
84 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
84 EVENT_EXTRA_END 85 EVENT_EXTRA_END
85}; 86};
86 87
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
136{ 137{
137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 138 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
138 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 139 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
140 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
139 EVENT_EXTRA_END 141 EVENT_EXTRA_END
140}; 142};
141 143
@@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
155static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 157static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
156 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 158 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
157 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 159 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
160 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
158 EVENT_EXTRA_END 161 EVENT_EXTRA_END
159}; 162};
160 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166
167struct attribute *nhm_events_attrs[] = {
168 EVENT_PTR(mem_ld_nhm),
169 NULL,
170};
171
172struct attribute *snb_events_attrs[] = {
173 EVENT_PTR(mem_ld_snb),
174 NULL,
175};
176
161static u64 intel_pmu_event_map(int hw_event) 177static u64 intel_pmu_event_map(int hw_event)
162{ 178{
163 return intel_perfmon_event_map[hw_event]; 179 return intel_perfmon_event_map[hw_event];
@@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void)
2035 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 2051 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2036 x86_pmu.extra_regs = intel_nehalem_extra_regs; 2052 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2037 2053
2054 x86_pmu.cpu_events = nhm_events_attrs;
2055
2038 /* UOPS_ISSUED.STALLED_CYCLES */ 2056 /* UOPS_ISSUED.STALLED_CYCLES */
2039 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2057 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2040 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2058 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void)
2078 x86_pmu.extra_regs = intel_westmere_extra_regs; 2096 x86_pmu.extra_regs = intel_westmere_extra_regs;
2079 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2097 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2080 2098
2099 x86_pmu.cpu_events = nhm_events_attrs;
2100
2081 /* UOPS_ISSUED.STALLED_CYCLES */ 2101 /* UOPS_ISSUED.STALLED_CYCLES */
2082 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2102 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2083 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2103 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void)
2106 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2126 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2107 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2127 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2108 2128
2129 x86_pmu.cpu_events = snb_events_attrs;
2130
2109 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2131 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2110 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2132 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2111 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2133 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void)
2132 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2154 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2133 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2155 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2134 2156
2157 x86_pmu.cpu_events = snb_events_attrs;
2158
2135 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2159 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2136 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2160 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2137 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2161 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);