aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/evlist.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-11 20:06:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-11 20:06:34 -0500
commitad5d69899e52792671c1aa6c7360464c7edfe09c (patch)
tree21833c1fdab4b3cf791d4fdc86dd578e4a620514 /tools/perf/util/evlist.c
parentef1417a5a6a400dbc1a2f44da716ab146a29ddc4 (diff)
parentcaea6cf52139116e43e615d87fcbf9823e197fdf (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "As a first remark I'd like to note that the way to build perf tooling has been simplified and sped up, in the future it should be enough for you to build perf via: cd tools/perf/ make install (ie without the -j option.) The build system will figure out the number of CPUs and will do a parallel build+install. The various build system inefficiencies and breakages Linus reported against the v3.12 pull request should now be resolved - please (re-)report any remaining annoyances or bugs. Main changes on the perf kernel side: * Performance optimizations: . perf ring-buffer code optimizations, by Peter Zijlstra . perf ring-buffer code optimizations, by Oleg Nesterov . x86 NMI call-stack processing optimizations, by Peter Zijlstra . perf context-switch optimizations, by Peter Zijlstra . perf sampling speedups, by Peter Zijlstra . x86 Intel PEBS processing speedups, by Peter Zijlstra * Enhanced hardware support: . for Intel Ivy Bridge-EP uncore PMUs, by Zheng Yan . for Haswell transactions, by Andi Kleen, Peter Zijlstra * Core perf events code enhancements and fixes by Oleg Nesterov: . for uprobes, if fork() is called with pending ret-probes . for uprobes platform support code * New ABI details by Andi Kleen: . Report x86 Haswell TSX transaction abort cost as weight Main changes on the perf tooling side (some of these tooling changes utilize the above kernel side changes): * 'perf report/top' enhancements: . Convert callchain children list to rbtree, greatly reducing the time taken for callchain processing, from Namhyung Kim. . Add new COMM infrastructure, further improving histogram processing, from Frédéric Weisbecker, one fix from Namhyung Kim. . Add /proc/kcore based live-annotation improvements, including build-id cache support, multi map 'call' instruction navigation fixes, kcore address validation, objdump workarounds. From Adrian Hunter. . Show progress on histogram collapsing, that can take a long time, from Namhyung Kim. . Add --max-stack option to limit callchain stack scan in 'top' and 'report', improving callchain processing when reducing the stack depth is an option, from Waiman Long. . Add new option --ignore-vmlinux for perf top, from Willy Tarreau. * 'perf trace' enhancements: . 'perf trace' now can can use a 'perf probe' dynamic tracepoints to hook into the userspace -> kernel pathname copy so that it can map fds to pathnames without reading /proc/pid/fd/ symlinks. From Arnaldo Carvalho de Melo. . Show VFS path associated with fd in live sessions, using a 'vfs_getname' 'perf probe' created dynamic tracepoint or by looking at /proc/pid/fd, from Arnaldo Carvalho de Melo. . Add 'trace' beautifiers for lots of syscall arguments, from Arnaldo Carvalho de Melo. . Implement more compact 'trace' output by suppressing zeroed args, from Arnaldo Carvalho de Melo. . Show thread COMM by default in 'trace', from Arnaldo Carvalho de Melo. . Add option to show full timestamp in 'trace', from David Ahern. . Add 'record' command in 'trace', to record raw_syscalls:*, from David Ahern. . Add summary option to dump syscall statistics in 'trace', from David Ahern. . Improve error messages in 'trace', providing hints about system configuration steps needed for using it, from Ramkumar Ramachandra. . 'perf trace' now emits hints as to why tracing is not possible, helping the user to setup the system to allow tracing in the desired permission granularity, telling if the problem is due to debugfs not being mounted or with not enough permission for !root, /proc/sys/kernel/perf_event_paranoit value, etc. From Arnaldo Carvalho de Melo. * 'perf record' enhancements: . Check maximum frequency rate for record/top, emitting better error messages, from Jiri Olsa. . 'perf record' code cleanups, from David Ahern. . Improve write_output error message in 'perf record', from Adrian Hunter. . Allow specifying B/K/M/G unit to the --mmap-pages arguments, from Jiri Olsa. . Fix command line callchain attribute tests to handle the new -g/--call-chain semantics, from Arnaldo Carvalho de Melo. * 'perf kvm' enhancements: . Disable live kvm command if timerfd is not supported, from David Ahern. . Fix detection of non-core features, from David Ahern. * 'perf list' enhancements: . Add usage to 'perf list', from David Ahern. . Show error in 'perf list' if tracepoints not available, from Pekka Enberg. * 'perf probe' enhancements: . Support "$vars" meta argument syntax for local variables, allowing asking for all possible variables at a given probe point to be collected when it hits, from Masami Hiramatsu. * 'perf sched' enhancements: . Address the root cause of that 'perf sched' stack initialization build slowdown, by programmatically setting a big array after moving the global variable back to the stack. Fix from Adrian Hunter. * 'perf script' enhancements: . Set up output options for in-stream attributes, from Adrian Hunter. . Print addr by default for BTS in 'perf script', from Adrian Juntmer * 'perf stat' enhancements: . Improved messages when doing profiling in all or a subset of CPUs using a workload as the session delimitator, as in: 'perf stat --cpu 0,2 sleep 10s' from Arnaldo Carvalho de Melo. . Add units to nanosec-based counters in 'perf stat', from David Ahern. . Remove bogus info when using 'perf stat' -e cycles/instructions, from Ramkumar Ramachandra. * 'perf lock' enhancements: . 'perf lock' fixes and cleanups, from Davidlohr Bueso. * 'perf test' enhancements: . Fixup PERF_SAMPLE_TRANSACTION handling in sample synthesizing and 'perf test', from Adrian Hunter. . Clarify the "sample parsing" test entry, from Arnaldo Carvalho de Melo. . Consider PERF_SAMPLE_TRANSACTION in the "sample parsing" test, from Arnaldo Carvalho de Melo. . Memory leak fixes in 'perf test', from Felipe Pena. * 'perf bench' enhancements: . Change the procps visible command-name of invididual benchmark tests plus cleanups, from Ingo Molnar. * Generic perf tooling infrastructure/plumbing changes: . Separating data file properties from session, code reorganization from Jiri Olsa. . Fix version when building out of tree, as when using one of these: $ make help | grep perf perf-tar-src-pkg - Build perf-3.12.0.tar source tarball perf-targz-src-pkg - Build perf-3.12.0.tar.gz source tarball perf-tarbz2-src-pkg - Build perf-3.12.0.tar.bz2 source tarball perf-tarxz-src-pkg - Build perf-3.12.0.tar.xz source tarball $ from David Ahern. . Enhance option parse error message, showing just the help lines of the options affected, from Namhyung Kim. . libtraceevent updates from upstream trace-cmd repo, from Steven Rostedt. . Always use perf_evsel__set_sample_bit to set sample_type, from Adrian Hunter. . Memory and mmap leak fixes from Chenggang Qin. . Assorted build fixes for from David Ahern and Jiri Olsa. . Speed up and prettify the build system, from Ingo Molnar. . Implement addr2line directly using libbfd, from Roberto Vitillo. . Separate the GTK support in a separate libperf-gtk.so DSO, that is only loaded when --gtk is specified, from Namhyung Kim. . perf bash completion fixes and improvements from Ramkumar Ramachandra. . Support for Openembedded/Yocto -dbg packages, from Ricardo Ribalda Delgado. And lots and lots of other fixes and code reorganizations that did not make it into the list, see the shortlog, diffstat and the Git log for details!" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (300 commits) uprobes: Fix the memory out of bound overwrite in copy_insn() uprobes: Fix the wrong usage of current->utask in uprobe_copy_process() perf tools: Remove unneeded include perf record: Remove post_processing_offset variable perf record: Remove advance_output function perf record: Refactor feature handling into a separate function perf trace: Don't relookup fields by name in each sample perf tools: Fix version when building out of tree perf evsel: Ditch evsel->handler.data field uprobes: Export write_opcode() as uprobe_write_opcode() uprobes: Introduce arch_uprobe->ixol uprobes: Kill module_init() and module_exit() uprobes: Move function declarations out of arch perf/x86/intel: Add Ivy Bridge-EP uncore IRP box support perf/x86/intel/uncore: Add filter support for IvyBridge-EP QPI boxes perf: Factor out strncpy() in perf_event_mmap_event() tools/perf: Add required memory barriers perf: Fix arch_perf_out_copy_user default perf: Update a stale comment perf: Optimize perf_output_begin() -- address calculation ...
Diffstat (limited to 'tools/perf/util/evlist.c')
-rw-r--r--tools/perf/util/evlist.c249
1 files changed, 188 insertions, 61 deletions
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e584cd30b0f2..b939221efd8d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
18#include <unistd.h> 18#include <unistd.h>
19 19
20#include "parse-events.h" 20#include "parse-events.h"
21#include "parse-options.h"
21 22
22#include <sys/mman.h> 23#include <sys/mman.h>
23 24
@@ -49,6 +50,18 @@ struct perf_evlist *perf_evlist__new(void)
49 return evlist; 50 return evlist;
50} 51}
51 52
53struct perf_evlist *perf_evlist__new_default(void)
54{
55 struct perf_evlist *evlist = perf_evlist__new();
56
57 if (evlist && perf_evlist__add_default(evlist)) {
58 perf_evlist__delete(evlist);
59 evlist = NULL;
60 }
61
62 return evlist;
63}
64
52/** 65/**
53 * perf_evlist__set_id_pos - set the positions of event ids. 66 * perf_evlist__set_id_pos - set the positions of event ids.
54 * @evlist: selected event list 67 * @evlist: selected event list
@@ -242,7 +255,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist,
242 if (evsel == NULL) 255 if (evsel == NULL)
243 return -1; 256 return -1;
244 257
245 evsel->handler.func = handler; 258 evsel->handler = handler;
246 perf_evlist__add(evlist, evsel); 259 perf_evlist__add(evlist, evsel);
247 return 0; 260 return 0;
248} 261}
@@ -527,7 +540,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
527 if ((old & md->mask) + size != ((old + size) & md->mask)) { 540 if ((old & md->mask) + size != ((old + size) & md->mask)) {
528 unsigned int offset = old; 541 unsigned int offset = old;
529 unsigned int len = min(sizeof(*event), size), cpy; 542 unsigned int len = min(sizeof(*event), size), cpy;
530 void *dst = &md->event_copy; 543 void *dst = md->event_copy;
531 544
532 do { 545 do {
533 cpy = min(md->mask + 1 - (offset & md->mask), len); 546 cpy = min(md->mask + 1 - (offset & md->mask), len);
@@ -537,7 +550,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
537 len -= cpy; 550 len -= cpy;
538 } while (len); 551 } while (len);
539 552
540 event = &md->event_copy; 553 event = (union perf_event *) md->event_copy;
541 } 554 }
542 555
543 old += size; 556 old += size;
@@ -594,6 +607,8 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
594 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, 607 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
595 MAP_SHARED, fd, 0); 608 MAP_SHARED, fd, 0);
596 if (evlist->mmap[idx].base == MAP_FAILED) { 609 if (evlist->mmap[idx].base == MAP_FAILED) {
610 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
611 errno);
597 evlist->mmap[idx].base = NULL; 612 evlist->mmap[idx].base = NULL;
598 return -1; 613 return -1;
599 } 614 }
@@ -602,9 +617,36 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
602 return 0; 617 return 0;
603} 618}
604 619
605static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask) 620static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
621 int prot, int mask, int cpu, int thread,
622 int *output)
606{ 623{
607 struct perf_evsel *evsel; 624 struct perf_evsel *evsel;
625
626 list_for_each_entry(evsel, &evlist->entries, node) {
627 int fd = FD(evsel, cpu, thread);
628
629 if (*output == -1) {
630 *output = fd;
631 if (__perf_evlist__mmap(evlist, idx, prot, mask,
632 *output) < 0)
633 return -1;
634 } else {
635 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
636 return -1;
637 }
638
639 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
640 perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
641 return -1;
642 }
643
644 return 0;
645}
646
647static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot,
648 int mask)
649{
608 int cpu, thread; 650 int cpu, thread;
609 int nr_cpus = cpu_map__nr(evlist->cpus); 651 int nr_cpus = cpu_map__nr(evlist->cpus);
610 int nr_threads = thread_map__nr(evlist->threads); 652 int nr_threads = thread_map__nr(evlist->threads);
@@ -614,23 +656,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
614 int output = -1; 656 int output = -1;
615 657
616 for (thread = 0; thread < nr_threads; thread++) { 658 for (thread = 0; thread < nr_threads; thread++) {
617 list_for_each_entry(evsel, &evlist->entries, node) { 659 if (perf_evlist__mmap_per_evsel(evlist, cpu, prot, mask,
618 int fd = FD(evsel, cpu, thread); 660 cpu, thread, &output))
619 661 goto out_unmap;
620 if (output == -1) {
621 output = fd;
622 if (__perf_evlist__mmap(evlist, cpu,
623 prot, mask, output) < 0)
624 goto out_unmap;
625 } else {
626 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
627 goto out_unmap;
628 }
629
630 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
631 perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
632 goto out_unmap;
633 }
634 } 662 }
635 } 663 }
636 664
@@ -642,9 +670,9 @@ out_unmap:
642 return -1; 670 return -1;
643} 671}
644 672
645static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask) 673static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot,
674 int mask)
646{ 675{
647 struct perf_evsel *evsel;
648 int thread; 676 int thread;
649 int nr_threads = thread_map__nr(evlist->threads); 677 int nr_threads = thread_map__nr(evlist->threads);
650 678
@@ -652,23 +680,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
652 for (thread = 0; thread < nr_threads; thread++) { 680 for (thread = 0; thread < nr_threads; thread++) {
653 int output = -1; 681 int output = -1;
654 682
655 list_for_each_entry(evsel, &evlist->entries, node) { 683 if (perf_evlist__mmap_per_evsel(evlist, thread, prot, mask, 0,
656 int fd = FD(evsel, 0, thread); 684 thread, &output))
657 685 goto out_unmap;
658 if (output == -1) {
659 output = fd;
660 if (__perf_evlist__mmap(evlist, thread,
661 prot, mask, output) < 0)
662 goto out_unmap;
663 } else {
664 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
665 goto out_unmap;
666 }
667
668 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
669 perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0)
670 goto out_unmap;
671 }
672 } 686 }
673 687
674 return 0; 688 return 0;
@@ -679,20 +693,76 @@ out_unmap:
679 return -1; 693 return -1;
680} 694}
681 695
682/** perf_evlist__mmap - Create per cpu maps to receive events 696static size_t perf_evlist__mmap_size(unsigned long pages)
683 * 697{
684 * @evlist - list of events 698 /* 512 kiB: default amount of unprivileged mlocked memory */
685 * @pages - map length in pages 699 if (pages == UINT_MAX)
686 * @overwrite - overwrite older events? 700 pages = (512 * 1024) / page_size;
687 * 701 else if (!is_power_of_2(pages))
688 * If overwrite is false the user needs to signal event consuption using: 702 return 0;
689 * 703
690 * struct perf_mmap *m = &evlist->mmap[cpu]; 704 return (pages + 1) * page_size;
691 * unsigned int head = perf_mmap__read_head(m); 705}
706
707int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
708 int unset __maybe_unused)
709{
710 unsigned int *mmap_pages = opt->value;
711 unsigned long pages, val;
712 size_t size;
713 static struct parse_tag tags[] = {
714 { .tag = 'B', .mult = 1 },
715 { .tag = 'K', .mult = 1 << 10 },
716 { .tag = 'M', .mult = 1 << 20 },
717 { .tag = 'G', .mult = 1 << 30 },
718 { .tag = 0 },
719 };
720
721 val = parse_tag_value(str, tags);
722 if (val != (unsigned long) -1) {
723 /* we got file size value */
724 pages = PERF_ALIGN(val, page_size) / page_size;
725 if (pages < (1UL << 31) && !is_power_of_2(pages)) {
726 pages = next_pow2(pages);
727 pr_info("rounding mmap pages size to %lu (%lu pages)\n",
728 pages * page_size, pages);
729 }
730 } else {
731 /* we got pages count value */
732 char *eptr;
733 pages = strtoul(str, &eptr, 10);
734 if (*eptr != '\0') {
735 pr_err("failed to parse --mmap_pages/-m value\n");
736 return -1;
737 }
738 }
739
740 if (pages > UINT_MAX || pages > SIZE_MAX / page_size) {
741 pr_err("--mmap_pages/-m value too big\n");
742 return -1;
743 }
744
745 size = perf_evlist__mmap_size(pages);
746 if (!size) {
747 pr_err("--mmap_pages/-m value must be a power of two.");
748 return -1;
749 }
750
751 *mmap_pages = pages;
752 return 0;
753}
754
755/**
756 * perf_evlist__mmap - Create mmaps to receive events.
757 * @evlist: list of events
758 * @pages: map length in pages
759 * @overwrite: overwrite older events?
692 * 760 *
693 * perf_mmap__write_tail(m, head) 761 * If @overwrite is %false the user needs to signal event consumption using
762 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this
763 * automatically.
694 * 764 *
695 * Using perf_evlist__read_on_cpu does this automatically. 765 * Return: %0 on success, negative error code otherwise.
696 */ 766 */
697int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 767int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
698 bool overwrite) 768 bool overwrite)
@@ -702,14 +772,6 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
702 const struct thread_map *threads = evlist->threads; 772 const struct thread_map *threads = evlist->threads;
703 int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask; 773 int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
704 774
705 /* 512 kiB: default amount of unprivileged mlocked memory */
706 if (pages == UINT_MAX)
707 pages = (512 * 1024) / page_size;
708 else if (!is_power_of_2(pages))
709 return -EINVAL;
710
711 mask = pages * page_size - 1;
712
713 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 775 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
714 return -ENOMEM; 776 return -ENOMEM;
715 777
@@ -717,7 +779,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
717 return -ENOMEM; 779 return -ENOMEM;
718 780
719 evlist->overwrite = overwrite; 781 evlist->overwrite = overwrite;
720 evlist->mmap_len = (pages + 1) * page_size; 782 evlist->mmap_len = perf_evlist__mmap_size(pages);
783 pr_debug("mmap size %zuB\n", evlist->mmap_len);
784 mask = evlist->mmap_len - page_size - 1;
721 785
722 list_for_each_entry(evsel, &evlist->entries, node) { 786 list_for_each_entry(evsel, &evlist->entries, node) {
723 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 787 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
@@ -1073,3 +1137,66 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1073 1137
1074 return printed + fprintf(fp, "\n");; 1138 return printed + fprintf(fp, "\n");;
1075} 1139}
1140
1141int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused,
1142 int err, char *buf, size_t size)
1143{
1144 char sbuf[128];
1145
1146 switch (err) {
1147 case ENOENT:
1148 scnprintf(buf, size, "%s",
1149 "Error:\tUnable to find debugfs\n"
1150 "Hint:\tWas your kernel was compiled with debugfs support?\n"
1151 "Hint:\tIs the debugfs filesystem mounted?\n"
1152 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
1153 break;
1154 case EACCES:
1155 scnprintf(buf, size,
1156 "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1157 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1158 debugfs_mountpoint, debugfs_mountpoint);
1159 break;
1160 default:
1161 scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf)));
1162 break;
1163 }
1164
1165 return 0;
1166}
1167
1168int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
1169 int err, char *buf, size_t size)
1170{
1171 int printed, value;
1172 char sbuf[128], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1173
1174 switch (err) {
1175 case EACCES:
1176 case EPERM:
1177 printed = scnprintf(buf, size,
1178 "Error:\t%s.\n"
1179 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1180
1181 if (filename__read_int("/proc/sys/kernel/perf_event_paranoid", &value))
1182 break;
1183
1184 printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1185
1186 if (value >= 2) {
1187 printed += scnprintf(buf + printed, size - printed,
1188 "For your workloads it needs to be <= 1\nHint:\t");
1189 }
1190 printed += scnprintf(buf + printed, size - printed,
1191 "For system wide tracing it needs to be set to -1");
1192
1193 printed += scnprintf(buf + printed, size - printed,
1194 ".\nHint:\tThe current value is %d.", value);
1195 break;
1196 default:
1197 scnprintf(buf, size, "%s", emsg);
1198 break;
1199 }
1200
1201 return 0;
1202}