aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-03-03 07:12:23 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-10 07:23:32 -0500
commitef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 (patch)
treeccf39f5051608c1eccac9171259c2d7bc381cc96
parentcaff2befffe899e63df5cc760b7ed01cfd902685 (diff)
perf, x86: use LBR for PEBS IP+1 fixup
Use the LBR to fix up the PEBS IP+1 issue. As said, PEBS reports the next instruction, here we use the LBR to find the last branch and from that construct the actual IP. If the IP matches the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the beginning of the last basic block and decode forward. Once we find a match to the current IP, we use the previous location. This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction that caused the event (barring CPU errata). The fixup can fail due to various reasons: 1) LBR contains invalid data (quite possible) 2) part of the basic block got paged out 3) the reported IP isn't part of the basic block (see 1) Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.619375431@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/perf_event.h19
-rw-r--r--arch/x86/kernel/cpu/perf_event.c70
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c84
-rw-r--r--include/linux/perf_event.h6
5 files changed, 144 insertions, 39 deletions
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index db6109a885a7..a9038c951619 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void);
136 136
137#define PERF_EVENT_INDEX_OFFSET 0 137#define PERF_EVENT_INDEX_OFFSET 0
138 138
139/*
140 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
141 * This flag is otherwise unused and ABI specified to be 0, so nobody should
142 * care what we do with it.
143 */
144#define PERF_EFLAGS_EXACT (1UL << 3)
145
146#define perf_misc_flags(regs) \
147({ int misc = 0; \
148 if (user_mode(regs)) \
149 misc |= PERF_RECORD_MISC_USER; \
150 else \
151 misc |= PERF_RECORD_MISC_KERNEL; \
152 if (regs->flags & PERF_EFLAGS_EXACT) \
153 misc |= PERF_RECORD_MISC_EXACT; \
154 misc; })
155
156#define perf_instruction_pointer(regs) ((regs)->ip)
157
139#else 158#else
140static inline void init_hw_perf_events(void) { } 159static inline void init_hw_perf_events(void) { }
141static inline void perf_events_lapic_init(void) { } 160static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1badff6b6b28..5cb4e8dcee4b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -29,6 +29,41 @@
29#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
30#include <asm/nmi.h> 30#include <asm/nmi.h>
31 31
32/*
33 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
34 */
35static unsigned long
36copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
37{
38 unsigned long offset, addr = (unsigned long)from;
39 int type = in_nmi() ? KM_NMI : KM_IRQ0;
40 unsigned long size, len = 0;
41 struct page *page;
42 void *map;
43 int ret;
44
45 do {
46 ret = __get_user_pages_fast(addr, 1, 0, &page);
47 if (!ret)
48 break;
49
50 offset = addr & (PAGE_SIZE - 1);
51 size = min(PAGE_SIZE - offset, n - len);
52
53 map = kmap_atomic(page, type);
54 memcpy(to, map+offset, size);
55 kunmap_atomic(map, type);
56 put_page(page);
57
58 len += size;
59 to += size;
60 addr += size;
61
62 } while (len < n);
63
64 return len;
65}
66
32static u64 perf_event_mask __read_mostly; 67static u64 perf_event_mask __read_mostly;
33 68
34struct event_constraint { 69struct event_constraint {
@@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1550 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1585 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1551} 1586}
1552 1587
1553/*
1554 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1555 */
1556static unsigned long
1557copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1558{
1559 unsigned long offset, addr = (unsigned long)from;
1560 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1561 unsigned long size, len = 0;
1562 struct page *page;
1563 void *map;
1564 int ret;
1565
1566 do {
1567 ret = __get_user_pages_fast(addr, 1, 0, &page);
1568 if (!ret)
1569 break;
1570
1571 offset = addr & (PAGE_SIZE - 1);
1572 size = min(PAGE_SIZE - offset, n - len);
1573
1574 map = kmap_atomic(page, type);
1575 memcpy(to, map+offset, size);
1576 kunmap_atomic(map, type);
1577 put_page(page);
1578
1579 len += size;
1580 to += size;
1581 addr += size;
1582
1583 } while (len < n);
1584
1585 return len;
1586}
1587
1588static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1588static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1589{ 1589{
1590 unsigned long bytes; 1590 unsigned long bytes;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 44f6ed42a934..7eb78be3b229 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
547 x86_pmu_disable_event(event); 547 x86_pmu_disable_event(event);
548 548
549 if (unlikely(event->attr.precise)) 549 if (unlikely(event->attr.precise))
550 intel_pmu_pebs_disable(hwc); 550 intel_pmu_pebs_disable(event);
551} 551}
552 552
553static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 553static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
600 } 600 }
601 601
602 if (unlikely(event->attr.precise)) 602 if (unlikely(event->attr.precise))
603 intel_pmu_pebs_enable(hwc); 603 intel_pmu_pebs_enable(event);
604 604
605 __x86_pmu_enable_event(hwc); 605 __x86_pmu_enable_event(hwc);
606} 606}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0d994ef213b9..50e6ff3281fc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event)
331 return &emptyconstraint; 331 return &emptyconstraint;
332} 332}
333 333
334static void intel_pmu_pebs_enable(struct hw_perf_event *hwc) 334static void intel_pmu_pebs_enable(struct perf_event *event)
335{ 335{
336 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 336 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
337 struct hw_perf_event *hwc = &event->hw;
337 u64 val = cpuc->pebs_enabled; 338 u64 val = cpuc->pebs_enabled;
338 339
339 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 340 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
340 341
341 val |= 1ULL << hwc->idx; 342 val |= 1ULL << hwc->idx;
342 wrmsrl(MSR_IA32_PEBS_ENABLE, val); 343 wrmsrl(MSR_IA32_PEBS_ENABLE, val);
344
345 intel_pmu_lbr_enable(event);
343} 346}
344 347
345static void intel_pmu_pebs_disable(struct hw_perf_event *hwc) 348static void intel_pmu_pebs_disable(struct perf_event *event)
346{ 349{
347 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 350 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
351 struct hw_perf_event *hwc = &event->hw;
348 u64 val = cpuc->pebs_enabled; 352 u64 val = cpuc->pebs_enabled;
349 353
350 val &= ~(1ULL << hwc->idx); 354 val &= ~(1ULL << hwc->idx);
351 wrmsrl(MSR_IA32_PEBS_ENABLE, val); 355 wrmsrl(MSR_IA32_PEBS_ENABLE, val);
352 356
353 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 357 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
358
359 intel_pmu_lbr_disable(event);
354} 360}
355 361
356static void intel_pmu_pebs_enable_all(void) 362static void intel_pmu_pebs_enable_all(void)
@@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void)
369 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 375 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
370} 376}
371 377
378#include <asm/insn.h>
379
380#define MAX_INSN_SIZE 16
381
382static inline bool kernel_ip(unsigned long ip)
383{
384#ifdef CONFIG_X86_32
385 return ip > PAGE_OFFSET;
386#else
387 return (long)ip < 0;
388#endif
389}
390
391static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
392{
393 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
394 unsigned long from = cpuc->lbr_entries[0].from;
395 unsigned long old_to, to = cpuc->lbr_entries[0].to;
396 unsigned long ip = regs->ip;
397
398 if (!cpuc->lbr_stack.nr || !from || !to)
399 return 0;
400
401 if (ip < to)
402 return 0;
403
404 /*
405 * We sampled a branch insn, rewind using the LBR stack
406 */
407 if (ip == to) {
408 regs->ip = from;
409 return 1;
410 }
411
412 do {
413 struct insn insn;
414 u8 buf[MAX_INSN_SIZE];
415 void *kaddr;
416
417 old_to = to;
418 if (!kernel_ip(ip)) {
419 int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to);
420
421 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
422 if (bytes != size)
423 return 0;
424
425 kaddr = buf;
426 } else
427 kaddr = (void *)to;
428
429 kernel_insn_init(&insn, kaddr);
430 insn_get_length(&insn);
431 to += insn.length;
432 } while (to < ip);
433
434 if (to == ip) {
435 regs->ip = old_to;
436 return 1;
437 }
438
439 return 0;
440}
441
372static int intel_pmu_save_and_restart(struct perf_event *event); 442static int intel_pmu_save_and_restart(struct perf_event *event);
373static void intel_pmu_disable_event(struct perf_event *event); 443static void intel_pmu_disable_event(struct perf_event *event);
374 444
@@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
424 regs.bp = at->bp; 494 regs.bp = at->bp;
425 regs.sp = at->sp; 495 regs.sp = at->sp;
426 496
497 if (intel_pmu_pebs_fixup_ip(&regs))
498 regs.flags |= PERF_EFLAGS_EXACT;
499 else
500 regs.flags &= ~PERF_EFLAGS_EXACT;
501
427 if (perf_event_overflow(event, 1, &data, &regs)) 502 if (perf_event_overflow(event, 1, &data, &regs))
428 intel_pmu_disable_event(event); 503 intel_pmu_disable_event(event);
429 504
@@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
487 regs.bp = at->bp; 562 regs.bp = at->bp;
488 regs.sp = at->sp; 563 regs.sp = at->sp;
489 564
565 if (intel_pmu_pebs_fixup_ip(&regs))
566 regs.flags |= PERF_EFLAGS_EXACT;
567 else
568 regs.flags &= ~PERF_EFLAGS_EXACT;
569
490 if (perf_event_overflow(event, 1, &data, &regs)) 570 if (perf_event_overflow(event, 1, &data, &regs))
491 intel_pmu_disable_event(event); 571 intel_pmu_disable_event(event);
492 } 572 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ab4fd9ede264..be85f7c4a94f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -294,6 +294,12 @@ struct perf_event_mmap_page {
294#define PERF_RECORD_MISC_USER (2 << 0) 294#define PERF_RECORD_MISC_USER (2 << 0)
295#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) 295#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
296 296
297#define PERF_RECORD_MISC_EXACT (1 << 14)
298/*
299 * Reserve the last bit to indicate some extended misc field
300 */
301#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
302
297struct perf_event_header { 303struct perf_event_header {
298 __u32 type; 304 __u32 type;
299 __u16 misc; 305 __u16 misc;