diff options
author | Robert Richter <robert.richter@amd.com> | 2012-03-12 07:54:32 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-05-09 09:23:14 -0400 |
commit | 450bbd493d436f9eadd1b7828158f37559f26674 (patch) | |
tree | ae96802ceb4b1061e70bb2bb705ac8d0126d6ac3 | |
parent | d47e8238cd76f1ffa7c8cd30e08b8e9074fd597e (diff) |
perf/x86-ibs: Precise event sampling with IBS for AMD CPUs
This patch adds support for precise event sampling with IBS. There are
two counting modes to count either cycles or micro-ops. If the
corresponding performance counter events (hw events) are setup with
the precise flag set, the request is redirected to the ibs pmu:
perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
perf record -a -e r076:p ... # same as -e cpu-cycles:p
perf record -a -e r0C1:p ... # use ibs op counting micro-ops
Each ibs sample contains a linear address that points to the
instruction that was causing the sample to trigger. With ibs we have
skid 0. Thus, ibs supports precise levels 1 and 2. Samples are marked
with the PERF_EFLAGS_EXACT flag set. In rare cases the rip is invalid
when IBS was not able to record the rip correctly. Then the
PERF_EFLAGS_EXACT flag is cleared and the rip is taken from pt_regs.
V2:
* don't drop samples in precise level 2 if rip is invalid, instead
support the PERF_EFLAGS_EXACT flag
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120502103309.GP18810@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_ibs.c | 73 |
2 files changed, 76 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 589286f28877..65652265fffd 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event) | |||
134 | 134 | ||
135 | static int amd_pmu_hw_config(struct perf_event *event) | 135 | static int amd_pmu_hw_config(struct perf_event *event) |
136 | { | 136 | { |
137 | int ret = x86_pmu_hw_config(event); | 137 | int ret; |
138 | 138 | ||
139 | /* pass precise event sampling to ibs: */ | ||
140 | if (event->attr.precise_ip && get_ibs_caps()) | ||
141 | return -ENOENT; | ||
142 | |||
143 | ret = x86_pmu_hw_config(event); | ||
139 | if (ret) | 144 | if (ret) |
140 | return ret; | 145 | return ret; |
141 | 146 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index cc1f3293d6c2..34dfa853f6df 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -145,17 +145,80 @@ static struct perf_ibs *get_ibs_pmu(int type) | |||
145 | return NULL; | 145 | return NULL; |
146 | } | 146 | } |
147 | 147 | ||
148 | /* | ||
149 | * Use IBS for precise event sampling: | ||
150 | * | ||
151 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count | ||
152 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p | ||
153 | * perf record -a -e r0C1:p ... # use ibs op counting micro-ops | ||
154 | * | ||
155 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | ||
156 | * MSRC001_1033) is used to select either cycle or micro-ops counting | ||
157 | * mode. | ||
158 | * | ||
159 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | ||
160 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | ||
161 | * rip is invalid when IBS was not able to record the rip correctly. | ||
162 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | ||
163 | * | ||
164 | */ | ||
165 | static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | ||
166 | { | ||
167 | switch (event->attr.precise_ip) { | ||
168 | case 0: | ||
169 | return -ENOENT; | ||
170 | case 1: | ||
171 | case 2: | ||
172 | break; | ||
173 | default: | ||
174 | return -EOPNOTSUPP; | ||
175 | } | ||
176 | |||
177 | switch (event->attr.type) { | ||
178 | case PERF_TYPE_HARDWARE: | ||
179 | switch (event->attr.config) { | ||
180 | case PERF_COUNT_HW_CPU_CYCLES: | ||
181 | *config = 0; | ||
182 | return 0; | ||
183 | } | ||
184 | break; | ||
185 | case PERF_TYPE_RAW: | ||
186 | switch (event->attr.config) { | ||
187 | case 0x0076: | ||
188 | *config = 0; | ||
189 | return 0; | ||
190 | case 0x00C1: | ||
191 | *config = IBS_OP_CNT_CTL; | ||
192 | return 0; | ||
193 | } | ||
194 | break; | ||
195 | default: | ||
196 | return -ENOENT; | ||
197 | } | ||
198 | |||
199 | return -EOPNOTSUPP; | ||
200 | } | ||
201 | |||
148 | static int perf_ibs_init(struct perf_event *event) | 202 | static int perf_ibs_init(struct perf_event *event) |
149 | { | 203 | { |
150 | struct hw_perf_event *hwc = &event->hw; | 204 | struct hw_perf_event *hwc = &event->hw; |
151 | struct perf_ibs *perf_ibs; | 205 | struct perf_ibs *perf_ibs; |
152 | u64 max_cnt, config; | 206 | u64 max_cnt, config; |
207 | int ret; | ||
153 | 208 | ||
154 | perf_ibs = get_ibs_pmu(event->attr.type); | 209 | perf_ibs = get_ibs_pmu(event->attr.type); |
155 | if (!perf_ibs) | 210 | if (perf_ibs) { |
211 | config = event->attr.config; | ||
212 | } else { | ||
213 | perf_ibs = &perf_ibs_op; | ||
214 | ret = perf_ibs_precise_event(event, &config); | ||
215 | if (ret) | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | if (event->pmu != &perf_ibs->pmu) | ||
156 | return -ENOENT; | 220 | return -ENOENT; |
157 | 221 | ||
158 | config = event->attr.config; | ||
159 | if (config & ~perf_ibs->config_mask) | 222 | if (config & ~perf_ibs->config_mask) |
160 | return -EINVAL; | 223 | return -EINVAL; |
161 | 224 | ||
@@ -437,8 +500,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | |||
437 | ibs_data.size = sizeof(u64) * size; | 500 | ibs_data.size = sizeof(u64) * size; |
438 | 501 | ||
439 | regs = *iregs; | 502 | regs = *iregs; |
440 | if (!check_rip || !(ibs_data.regs[2] & IBS_RIP_INVALID)) | 503 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { |
504 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
505 | } else { | ||
441 | instruction_pointer_set(®s, ibs_data.regs[1]); | 506 | instruction_pointer_set(®s, ibs_data.regs[1]); |
507 | regs.flags |= PERF_EFLAGS_EXACT; | ||
508 | } | ||
442 | 509 | ||
443 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | 510 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { |
444 | raw.size = sizeof(u32) + ibs_data.size; | 511 | raw.size = sizeof(u32) + ibs_data.size; |