diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-03 07:12:23 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:32 -0500 |
commit | ef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 (patch) | |
tree | ccf39f5051608c1eccac9171259c2d7bc381cc96 | |
parent | caff2befffe899e63df5cc760b7ed01cfd902685 (diff) |
perf, x86: use LBR for PEBS IP+1 fixup
Use the LBR to fix up the PEBS IP+1 issue.
As said, PEBS reports the next instruction, here we use the LBR to find
the last branch and from that construct the actual IP. If the IP matches
the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the
beginning of the last basic block and decode forward.
Once we find a match to the current IP, we use the previous location.
This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which
conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction
that caused the event (barring CPU errata).
The fixup can fail due to various reasons:
1) LBR contains invalid data (quite possible)
2) part of the basic block got paged out
3) the reported IP isn't part of the basic block (see 1)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.619375431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 19 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 70 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 84 | ||||
-rw-r--r-- | include/linux/perf_event.h | 6 |
5 files changed, 144 insertions, 39 deletions
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..a9038c951619 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void); | |||
136 | 136 | ||
137 | #define PERF_EVENT_INDEX_OFFSET 0 | 137 | #define PERF_EVENT_INDEX_OFFSET 0 |
138 | 138 | ||
139 | /* | ||
140 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
141 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
142 | * care what we do with it. | ||
143 | */ | ||
144 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
145 | |||
146 | #define perf_misc_flags(regs) \ | ||
147 | ({ int misc = 0; \ | ||
148 | if (user_mode(regs)) \ | ||
149 | misc |= PERF_RECORD_MISC_USER; \ | ||
150 | else \ | ||
151 | misc |= PERF_RECORD_MISC_KERNEL; \ | ||
152 | if (regs->flags & PERF_EFLAGS_EXACT) \ | ||
153 | misc |= PERF_RECORD_MISC_EXACT; \ | ||
154 | misc; }) | ||
155 | |||
156 | #define perf_instruction_pointer(regs) ((regs)->ip) | ||
157 | |||
139 | #else | 158 | #else |
140 | static inline void init_hw_perf_events(void) { } | 159 | static inline void init_hw_perf_events(void) { } |
141 | static inline void perf_events_lapic_init(void) { } | 160 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1badff6b6b28..5cb4e8dcee4b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -29,6 +29,41 @@ | |||
29 | #include <asm/stacktrace.h> | 29 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | 31 | ||
32 | /* | ||
33 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
34 | */ | ||
35 | static unsigned long | ||
36 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
37 | { | ||
38 | unsigned long offset, addr = (unsigned long)from; | ||
39 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
40 | unsigned long size, len = 0; | ||
41 | struct page *page; | ||
42 | void *map; | ||
43 | int ret; | ||
44 | |||
45 | do { | ||
46 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
47 | if (!ret) | ||
48 | break; | ||
49 | |||
50 | offset = addr & (PAGE_SIZE - 1); | ||
51 | size = min(PAGE_SIZE - offset, n - len); | ||
52 | |||
53 | map = kmap_atomic(page, type); | ||
54 | memcpy(to, map+offset, size); | ||
55 | kunmap_atomic(map, type); | ||
56 | put_page(page); | ||
57 | |||
58 | len += size; | ||
59 | to += size; | ||
60 | addr += size; | ||
61 | |||
62 | } while (len < n); | ||
63 | |||
64 | return len; | ||
65 | } | ||
66 | |||
32 | static u64 perf_event_mask __read_mostly; | 67 | static u64 perf_event_mask __read_mostly; |
33 | 68 | ||
34 | struct event_constraint { | 69 | struct event_constraint { |
@@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1550 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1585 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1551 | } | 1586 | } |
1552 | 1587 | ||
1553 | /* | ||
1554 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1555 | */ | ||
1556 | static unsigned long | ||
1557 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1558 | { | ||
1559 | unsigned long offset, addr = (unsigned long)from; | ||
1560 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1561 | unsigned long size, len = 0; | ||
1562 | struct page *page; | ||
1563 | void *map; | ||
1564 | int ret; | ||
1565 | |||
1566 | do { | ||
1567 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1568 | if (!ret) | ||
1569 | break; | ||
1570 | |||
1571 | offset = addr & (PAGE_SIZE - 1); | ||
1572 | size = min(PAGE_SIZE - offset, n - len); | ||
1573 | |||
1574 | map = kmap_atomic(page, type); | ||
1575 | memcpy(to, map+offset, size); | ||
1576 | kunmap_atomic(map, type); | ||
1577 | put_page(page); | ||
1578 | |||
1579 | len += size; | ||
1580 | to += size; | ||
1581 | addr += size; | ||
1582 | |||
1583 | } while (len < n); | ||
1584 | |||
1585 | return len; | ||
1586 | } | ||
1587 | |||
1588 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1588 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1589 | { | 1589 | { |
1590 | unsigned long bytes; | 1590 | unsigned long bytes; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 44f6ed42a934..7eb78be3b229 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
547 | x86_pmu_disable_event(event); | 547 | x86_pmu_disable_event(event); |
548 | 548 | ||
549 | if (unlikely(event->attr.precise)) | 549 | if (unlikely(event->attr.precise)) |
550 | intel_pmu_pebs_disable(hwc); | 550 | intel_pmu_pebs_disable(event); |
551 | } | 551 | } |
552 | 552 | ||
553 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | 553 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
@@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
600 | } | 600 | } |
601 | 601 | ||
602 | if (unlikely(event->attr.precise)) | 602 | if (unlikely(event->attr.precise)) |
603 | intel_pmu_pebs_enable(hwc); | 603 | intel_pmu_pebs_enable(event); |
604 | 604 | ||
605 | __x86_pmu_enable_event(hwc); | 605 | __x86_pmu_enable_event(hwc); |
606 | } | 606 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 0d994ef213b9..50e6ff3281fc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event) | |||
331 | return &emptyconstraint; | 331 | return &emptyconstraint; |
332 | } | 332 | } |
333 | 333 | ||
334 | static void intel_pmu_pebs_enable(struct hw_perf_event *hwc) | 334 | static void intel_pmu_pebs_enable(struct perf_event *event) |
335 | { | 335 | { |
336 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 336 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
337 | struct hw_perf_event *hwc = &event->hw; | ||
337 | u64 val = cpuc->pebs_enabled; | 338 | u64 val = cpuc->pebs_enabled; |
338 | 339 | ||
339 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 340 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
340 | 341 | ||
341 | val |= 1ULL << hwc->idx; | 342 | val |= 1ULL << hwc->idx; |
342 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); | 343 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); |
344 | |||
345 | intel_pmu_lbr_enable(event); | ||
343 | } | 346 | } |
344 | 347 | ||
345 | static void intel_pmu_pebs_disable(struct hw_perf_event *hwc) | 348 | static void intel_pmu_pebs_disable(struct perf_event *event) |
346 | { | 349 | { |
347 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 350 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
351 | struct hw_perf_event *hwc = &event->hw; | ||
348 | u64 val = cpuc->pebs_enabled; | 352 | u64 val = cpuc->pebs_enabled; |
349 | 353 | ||
350 | val &= ~(1ULL << hwc->idx); | 354 | val &= ~(1ULL << hwc->idx); |
351 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); | 355 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); |
352 | 356 | ||
353 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 357 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
358 | |||
359 | intel_pmu_lbr_disable(event); | ||
354 | } | 360 | } |
355 | 361 | ||
356 | static void intel_pmu_pebs_enable_all(void) | 362 | static void intel_pmu_pebs_enable_all(void) |
@@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void) | |||
369 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 375 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
370 | } | 376 | } |
371 | 377 | ||
378 | #include <asm/insn.h> | ||
379 | |||
380 | #define MAX_INSN_SIZE 16 | ||
381 | |||
382 | static inline bool kernel_ip(unsigned long ip) | ||
383 | { | ||
384 | #ifdef CONFIG_X86_32 | ||
385 | return ip > PAGE_OFFSET; | ||
386 | #else | ||
387 | return (long)ip < 0; | ||
388 | #endif | ||
389 | } | ||
390 | |||
391 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
392 | { | ||
393 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
394 | unsigned long from = cpuc->lbr_entries[0].from; | ||
395 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
396 | unsigned long ip = regs->ip; | ||
397 | |||
398 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
399 | return 0; | ||
400 | |||
401 | if (ip < to) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * We sampled a branch insn, rewind using the LBR stack | ||
406 | */ | ||
407 | if (ip == to) { | ||
408 | regs->ip = from; | ||
409 | return 1; | ||
410 | } | ||
411 | |||
412 | do { | ||
413 | struct insn insn; | ||
414 | u8 buf[MAX_INSN_SIZE]; | ||
415 | void *kaddr; | ||
416 | |||
417 | old_to = to; | ||
418 | if (!kernel_ip(ip)) { | ||
419 | int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to); | ||
420 | |||
421 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
422 | if (bytes != size) | ||
423 | return 0; | ||
424 | |||
425 | kaddr = buf; | ||
426 | } else | ||
427 | kaddr = (void *)to; | ||
428 | |||
429 | kernel_insn_init(&insn, kaddr); | ||
430 | insn_get_length(&insn); | ||
431 | to += insn.length; | ||
432 | } while (to < ip); | ||
433 | |||
434 | if (to == ip) { | ||
435 | regs->ip = old_to; | ||
436 | return 1; | ||
437 | } | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
372 | static int intel_pmu_save_and_restart(struct perf_event *event); | 442 | static int intel_pmu_save_and_restart(struct perf_event *event); |
373 | static void intel_pmu_disable_event(struct perf_event *event); | 443 | static void intel_pmu_disable_event(struct perf_event *event); |
374 | 444 | ||
@@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
424 | regs.bp = at->bp; | 494 | regs.bp = at->bp; |
425 | regs.sp = at->sp; | 495 | regs.sp = at->sp; |
426 | 496 | ||
497 | if (intel_pmu_pebs_fixup_ip(®s)) | ||
498 | regs.flags |= PERF_EFLAGS_EXACT; | ||
499 | else | ||
500 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
501 | |||
427 | if (perf_event_overflow(event, 1, &data, ®s)) | 502 | if (perf_event_overflow(event, 1, &data, ®s)) |
428 | intel_pmu_disable_event(event); | 503 | intel_pmu_disable_event(event); |
429 | 504 | ||
@@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
487 | regs.bp = at->bp; | 562 | regs.bp = at->bp; |
488 | regs.sp = at->sp; | 563 | regs.sp = at->sp; |
489 | 564 | ||
565 | if (intel_pmu_pebs_fixup_ip(®s)) | ||
566 | regs.flags |= PERF_EFLAGS_EXACT; | ||
567 | else | ||
568 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
569 | |||
490 | if (perf_event_overflow(event, 1, &data, ®s)) | 570 | if (perf_event_overflow(event, 1, &data, ®s)) |
491 | intel_pmu_disable_event(event); | 571 | intel_pmu_disable_event(event); |
492 | } | 572 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ab4fd9ede264..be85f7c4a94f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -294,6 +294,12 @@ struct perf_event_mmap_page { | |||
294 | #define PERF_RECORD_MISC_USER (2 << 0) | 294 | #define PERF_RECORD_MISC_USER (2 << 0) |
295 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) | 295 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) |
296 | 296 | ||
297 | #define PERF_RECORD_MISC_EXACT (1 << 14) | ||
298 | /* | ||
299 | * Reserve the last bit to indicate some extended misc field | ||
300 | */ | ||
301 | #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) | ||
302 | |||
297 | struct perf_event_header { | 303 | struct perf_event_header { |
298 | __u32 type; | 304 | __u32 type; |
299 | __u16 misc; | 305 | __u16 misc; |