aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/annotate.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
commit41d859a83c567a9c9f50a34082cc64aab0abb0cd (patch)
treeab911ea521701401413d041e1b92225f3dbdab41 /tools/perf/util/annotate.c
parent4658000955d1864b54890214434e171949c7f1c5 (diff)
parentbac2e4a96d1c0bcce5e9654dcc902f75576b9b03 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Main perf kernel side changes: - uprobes updates/fixes. (Oleg Nesterov) - Add PERF_RECORD_SWITCH to indicate context switches and use it in tooling. (Adrian Hunter) - Support BPF programs attached to uprobes and first steps for BPF tooling support. (Wang Nan) - x86 generic x86 MSR-to-perf PMU driver. (Andy Lutomirski) - x86 Intel PT, LBR and BTS updates. (Alexander Shishkin) - x86 Intel Skylake support. (Andi Kleen) - x86 Intel Knights Landing (KNL) RAPL support. (Dasaratharaman Chandramouli) - x86 Intel Broadwell-DE uncore support. (Kan Liang) - x86 hw breakpoints robustization (Andy Lutomirski) Main perf tooling side changes: - Support Intel PT in several tools, enabling the use of the processor trace feature introduced in Intel Broadwell processors: (Adrian Hunter) # dmesg | grep Performance # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver. # perf record -e intel_pt//u -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.216 MB perf.data ] # perf script # then navigate in the tool output to some area, like this one: 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so) 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so) 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so) 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so) 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so) 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so) 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so) 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so) 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so) 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so) 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so) 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so) 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so) 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so) 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so) 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so) 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so) - Add support for using several Intel PT features (CYC, MTC packets), the relevant documentation was updated in: tools/perf/Documentation/intel-pt.txt briefly describing those packets, its purposes, how to configure them in the event config terms and relevant external documentation for further reading. (Adrian Hunter) - Introduce support for probing at an absolute address, for user and kernel 'perf probe's, useful when one have the symbol maps on a developer machine but not on an embedded system. (Wang Nan) - Add Intel BTS support, with a call-graph script to show it and PT in use in a GUI using 'perf script' python scripting with postgresql and Qt. (Adrian Hunter) - Allow selecting the type of callchains per event, including disabling callchains in all but one entry in an event list, to save space, and also to ask for the callchains collected in one event to be used in other events. (Kan Liang) - Beautify more syscall arguments in 'perf trace': (Arnaldo Carvalho de Melo) * A bunch more translate file/pathnames from pointers to strings. * Convert numbers to strings for the 'keyctl' syscall 'option' arg. * Add missing 'clockid' entries. - Introduce 'srcfile' sort key: (Andi Kleen) # perf record -F 10000 usleep 1 # perf report --stdio --dsos '[kernel.vmlinux]' -s srcfile <SNIP> # Overhead Source File 26.49% copy_page_64.S 5.49% signal.c 0.51% msr.h # It can be combined with other fields, for instance, experiment with '-s srcfile,symbol'. There are some oddities in some distros and with some specific DSOs, being investigated, so your mileage may vary. - Support per-event 'freq' term: (Namhyung Kim) $ perf record -e 'cpu/instructions,freq=1234/',cycles -c 1000 sleep 1 $ perf evlist -F cpu/instructions,freq=1234/: sample_freq=1234 cycles: sample_period=1000 $ - Deref sys_enter pointer args with contents from probe:vfs_getname, showing pathnames instead of pointers in many syscalls in 'perf trace'. (Arnaldo Carvalho de Melo) - Stop collecting /proc/kallsyms in perf.data files, saving about 4.5MB on a typical x86-64 system, use the the symbol resolution routines used in all the other tools (report, top, etc) now that we can ask libtraceevent to use perf's symbol resolution code. (Arnaldo Carvalho de Melo) - Allow filtering out of perf's PID via 'perf record --exclude-perf'. (Wang Nan) - 'perf trace' now supports syscall groups, like strace, i.e: $ trace -e file touch file Will expand 'file' into multiple, file related, syscalls. More work needed to add extra groups for other syscall groups, and also to complement what was added for the 'file' group, included as a proof of concept. (Arnaldo Carvalho de Melo) - Add lock_pi stresser to 'perf bench futex', to test the kernel code related to FUTEX_(UN)LOCK_PI. (Davidlohr Bueso) - Let user have timestamps with per-thread recording in 'perf record' (Adrian Hunter) - ... and tons of other changes, see the shortlog and the Git log for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (240 commits) perf evlist: Add backpointer for perf_env to evlist perf tools: Rename perf_session_env to perf_env perf tools: Do not change lib/api/fs/debugfs directly perf tools: Add tracing_path and remove unneeded functions perf buildid: Introduce sysfs/filename__sprintf_build_id perf evsel: Add a backpointer to the evlist a evsel is in perf trace: Add header with copyright and background info perf scripts python: Add new compaction-times script perf stat: Get correct cpu id for print_aggr tools lib traceeveent: Allow for negative numbers in print format perf script: Add --[no-]-demangle/--[no-]-demangle-kernel tracing/uprobes: Do not print '0x (null)' when offset is 0 perf probe: Support probing at absolute address perf probe: Fix error reported when offset without function perf probe: Fix list result when address is zero perf probe: Fix list result when symbol can't be found tools build: Allow duplicate objects in the object list perf tools: Remove export.h from MANIFEST perf probe: Prevent segfault when reading probe point with absolute address perf tools: Update Intel PT documentation ...
Diffstat (limited to 'tools/perf/util/annotate.c')
-rw-r--r--tools/perf/util/annotate.c128
1 files changed, 125 insertions, 3 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 03b7bc70eb66..d1eece70b84d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
473 return 0; 473 return 0;
474} 474}
475 475
476/* The cycles histogram is lazily allocated. */
477static int symbol__alloc_hist_cycles(struct symbol *sym)
478{
479 struct annotation *notes = symbol__annotation(sym);
480 const size_t size = symbol__size(sym);
481
482 notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
483 if (notes->src->cycles_hist == NULL)
484 return -1;
485 return 0;
486}
487
476void symbol__annotate_zero_histograms(struct symbol *sym) 488void symbol__annotate_zero_histograms(struct symbol *sym)
477{ 489{
478 struct annotation *notes = symbol__annotation(sym); 490 struct annotation *notes = symbol__annotation(sym);
479 491
480 pthread_mutex_lock(&notes->lock); 492 pthread_mutex_lock(&notes->lock);
481 if (notes->src != NULL) 493 if (notes->src != NULL) {
482 memset(notes->src->histograms, 0, 494 memset(notes->src->histograms, 0,
483 notes->src->nr_histograms * notes->src->sizeof_sym_hist); 495 notes->src->nr_histograms * notes->src->sizeof_sym_hist);
496 if (notes->src->cycles_hist)
497 memset(notes->src->cycles_hist, 0,
498 symbol__size(sym) * sizeof(struct cyc_hist));
499 }
484 pthread_mutex_unlock(&notes->lock); 500 pthread_mutex_unlock(&notes->lock);
485} 501}
486 502
503static int __symbol__account_cycles(struct annotation *notes,
504 u64 start,
505 unsigned offset, unsigned cycles,
506 unsigned have_start)
507{
508 struct cyc_hist *ch;
509
510 ch = notes->src->cycles_hist;
511 /*
512 * For now we can only account one basic block per
513 * final jump. But multiple could be overlapping.
514 * Always account the longest one. So when
515 * a shorter one has been already seen throw it away.
516 *
517 * We separately always account the full cycles.
518 */
519 ch[offset].num_aggr++;
520 ch[offset].cycles_aggr += cycles;
521
522 if (!have_start && ch[offset].have_start)
523 return 0;
524 if (ch[offset].num) {
525 if (have_start && (!ch[offset].have_start ||
526 ch[offset].start > start)) {
527 ch[offset].have_start = 0;
528 ch[offset].cycles = 0;
529 ch[offset].num = 0;
530 if (ch[offset].reset < 0xffff)
531 ch[offset].reset++;
532 } else if (have_start &&
533 ch[offset].start < start)
534 return 0;
535 }
536 ch[offset].have_start = have_start;
537 ch[offset].start = start;
538 ch[offset].cycles += cycles;
539 ch[offset].num++;
540 return 0;
541}
542
487static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, 543static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
488 struct annotation *notes, int evidx, u64 addr) 544 struct annotation *notes, int evidx, u64 addr)
489{ 545{
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
506 return 0; 562 return 0;
507} 563}
508 564
509static struct annotation *symbol__get_annotation(struct symbol *sym) 565static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
510{ 566{
511 struct annotation *notes = symbol__annotation(sym); 567 struct annotation *notes = symbol__annotation(sym);
512 568
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
514 if (symbol__alloc_hist(sym) < 0) 570 if (symbol__alloc_hist(sym) < 0)
515 return NULL; 571 return NULL;
516 } 572 }
573 if (!notes->src->cycles_hist && cycles) {
574 if (symbol__alloc_hist_cycles(sym) < 0)
575 return NULL;
576 }
517 return notes; 577 return notes;
518} 578}
519 579
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
524 584
525 if (sym == NULL) 585 if (sym == NULL)
526 return 0; 586 return 0;
527 notes = symbol__get_annotation(sym); 587 notes = symbol__get_annotation(sym, false);
528 if (notes == NULL) 588 if (notes == NULL)
529 return -ENOMEM; 589 return -ENOMEM;
530 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); 590 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
531} 591}
532 592
593static int symbol__account_cycles(u64 addr, u64 start,
594 struct symbol *sym, unsigned cycles)
595{
596 struct annotation *notes;
597 unsigned offset;
598
599 if (sym == NULL)
600 return 0;
601 notes = symbol__get_annotation(sym, true);
602 if (notes == NULL)
603 return -ENOMEM;
604 if (addr < sym->start || addr >= sym->end)
605 return -ERANGE;
606
607 if (start) {
608 if (start < sym->start || start >= sym->end)
609 return -ERANGE;
610 if (start >= addr)
611 start = 0;
612 }
613 offset = addr - sym->start;
614 return __symbol__account_cycles(notes,
615 start ? start - sym->start : 0,
616 offset, cycles,
617 !!start);
618}
619
620int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
621 struct addr_map_symbol *start,
622 unsigned cycles)
623{
624 u64 saddr = 0;
625 int err;
626
627 if (!cycles)
628 return 0;
629
630 /*
631 * Only set start when IPC can be computed. We can only
632 * compute it when the basic block is completely in a single
633 * function.
634 * Special case the case when the jump is elsewhere, but
635 * it starts on the function start.
636 */
637 if (start &&
638 (start->sym == ams->sym ||
639 (ams->sym &&
640 start->addr == ams->sym->start + ams->map->start)))
641 saddr = start->al_addr;
642 if (saddr == 0)
643 pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
644 ams->addr,
645 start ? start->addr : 0,
646 ams->sym ? ams->sym->start + ams->map->start : 0,
647 saddr);
648 err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
649 if (err)
650 pr_debug2("account_cycles failed %d\n", err);
651 return err;
652}
653
533int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) 654int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
534{ 655{
535 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); 656 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
@@ -1005,6 +1126,7 @@ fallback:
1005 dso->annotate_warned = 1; 1126 dso->annotate_warned = 1;
1006 pr_err("Can't annotate %s:\n\n" 1127 pr_err("Can't annotate %s:\n\n"
1007 "No vmlinux file%s\nwas found in the path.\n\n" 1128 "No vmlinux file%s\nwas found in the path.\n\n"
1129 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
1008 "Please use:\n\n" 1130 "Please use:\n\n"
1009 " perf buildid-cache -vu vmlinux\n\n" 1131 " perf buildid-cache -vu vmlinux\n\n"
1010 "or:\n\n" 1132 "or:\n\n"