diff options
author | Stephane Eranian <eranian@google.com> | 2013-01-24 10:10:32 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-04-01 11:16:31 -0400 |
commit | f20093eef5f7843a25adfc0512617d4b1ff1aa6e (patch) | |
tree | 1e1d008f98adab4477e3803ed24f3f2a22b34aaf /arch/x86/kernel/cpu/perf_event_intel.c | |
parent | d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (diff) |
perf/x86: Add memory profiling via PEBS Load Latency
This patch adds support for memory profiling using the
PEBS Load Latency facility.
Load accesses are sampled by HW and the instruction
address, data address, load latency, data source, tlb,
locked information can be saved in the sampling buffer
if using the PERF_SAMPLE_COST (for latency),
PERF_SAMPLE_ADDR, PERF_SAMPLE_DATA_SRC types.
To enable PEBS Load Latency, users have to use the
model specific event:
- on NHM/WSM: MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD
- on SNB/IVB: MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD
To make things easier, this patch also exports a generic
alias via sysfs: mem-loads. It export the right event
encoding based on the host CPU and can be used directly
by the perf tool.
Loosely based on Intel's Lin Ming patch posted on LKML
in July 2011.
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: namhyung.kim@lge.com
Link: http://lkml.kernel.org/r/1359040242-8269-9-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index df3beaac3397..d5ea5a03cd37 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 81 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
82 | { | 82 | { |
83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 83 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
84 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
84 | EVENT_EXTRA_END | 85 | EVENT_EXTRA_END |
85 | }; | 86 | }; |
86 | 87 | ||
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | |||
136 | { | 137 | { |
137 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 138 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
138 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), | 139 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
140 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | ||
139 | EVENT_EXTRA_END | 141 | EVENT_EXTRA_END |
140 | }; | 142 | }; |
141 | 143 | ||
@@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
155 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | 157 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
156 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), | 158 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), |
157 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), | 159 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), |
160 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
158 | EVENT_EXTRA_END | 161 | EVENT_EXTRA_END |
159 | }; | 162 | }; |
160 | 163 | ||
164 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); | ||
165 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); | ||
166 | |||
167 | struct attribute *nhm_events_attrs[] = { | ||
168 | EVENT_PTR(mem_ld_nhm), | ||
169 | NULL, | ||
170 | }; | ||
171 | |||
172 | struct attribute *snb_events_attrs[] = { | ||
173 | EVENT_PTR(mem_ld_snb), | ||
174 | NULL, | ||
175 | }; | ||
176 | |||
161 | static u64 intel_pmu_event_map(int hw_event) | 177 | static u64 intel_pmu_event_map(int hw_event) |
162 | { | 178 | { |
163 | return intel_perfmon_event_map[hw_event]; | 179 | return intel_perfmon_event_map[hw_event]; |
@@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void) | |||
2035 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 2051 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
2036 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 2052 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
2037 | 2053 | ||
2054 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2055 | |||
2038 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2056 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2039 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2057 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2040 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2058 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void) | |||
2078 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 2096 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
2079 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2097 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2080 | 2098 | ||
2099 | x86_pmu.cpu_events = nhm_events_attrs; | ||
2100 | |||
2081 | /* UOPS_ISSUED.STALLED_CYCLES */ | 2101 | /* UOPS_ISSUED.STALLED_CYCLES */ |
2082 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2102 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2083 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2103 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void) | |||
2106 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2126 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2107 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2127 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2108 | 2128 | ||
2129 | x86_pmu.cpu_events = snb_events_attrs; | ||
2130 | |||
2109 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2131 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2110 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2132 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2111 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2133 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
@@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void) | |||
2132 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 2154 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
2133 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 2155 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
2134 | 2156 | ||
2157 | x86_pmu.cpu_events = snb_events_attrs; | ||
2158 | |||
2135 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 2159 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
2136 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = | 2160 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
2137 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | 2161 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |