aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-10-03 02:20:14 -0400
committerIngo Molnar <mingo@kernel.org>2015-10-03 02:20:14 -0400
commite3b0ac1b7a8a590440a2030e7d10d48c59ab8a2a (patch)
tree7c00b3eb48b51e28c3d80b56bdd9c2e6066a4b1b /tools/perf
parentc2365b9388e8ec19305e3f449c1826e7493d156d (diff)
parent19afd10410957b1c808c2c49a88e6dd8b23aa894 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Do event name substring search as last resort in 'perf list'. (Arnaldo Carvalho de Melo) E.g.: # perf list clock List of pre-defined events (to be used in -e): cpu-clock [Software event] task-clock [Software event] uncore_cbox_0/clockticks/ [Kernel PMU event] uncore_cbox_1/clockticks/ [Kernel PMU event] kvm:kvm_pvclock_update [Tracepoint event] kvm:kvm_update_master_clock [Tracepoint event] power:clock_disable [Tracepoint event] power:clock_enable [Tracepoint event] power:clock_set_rate [Tracepoint event] syscalls:sys_enter_clock_adjtime [Tracepoint event] syscalls:sys_enter_clock_getres [Tracepoint event] syscalls:sys_enter_clock_gettime [Tracepoint event] syscalls:sys_enter_clock_nanosleep [Tracepoint event] syscalls:sys_enter_clock_settime [Tracepoint event] syscalls:sys_exit_clock_adjtime [Tracepoint event] syscalls:sys_exit_clock_getres [Tracepoint event] syscalls:sys_exit_clock_gettime [Tracepoint event] syscalls:sys_exit_clock_nanosleep [Tracepoint event] syscalls:sys_exit_clock_settime [Tracepoint event] - Reduce min 'perf stat --interval-print/-I' to 10ms. (Kan Liang) perf stat --interval in action: # perf stat -e cycles -I 50 -a usleep $((200 * 1000)) print interval < 100ms. The overhead percentage could be high in some cases. Please proceed with caution. # time counts unit events 0.050233636 48,240,396 cycles 0.100557098 35,492,594 cycles 0.150804687 39,295,112 cycles 0.201032269 33,101,961 cycles 0.201980732 786,379 cycles # - Allow for max_stack greater than PERF_MAX_STACK_DEPTH, as when synthesizing callchains from Intel PT data. (Adrian Hunter) - Allow probing on kmodules without DWARF. (Masami Hiramatsu) - Fix a segfault when processing a perf.data file with callchains using "perf report --call-graph none". (Namhyung Kim) - Fix unresolved COMMs in 'perf top' when -s comm is used. (Namhyung Kim) - Register idle thread in 'perf top'. (Namhyung Kim) - Change 'record.samples' type to unsigned long long, fixing output of number of samples in 32-bit architectures. (Yang Shi) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-list.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt5
-rw-r--r--tools/perf/builtin-list.c18
-rw-r--r--tools/perf/builtin-probe.c8
-rw-r--r--tools/perf/builtin-record.c16
-rw-r--r--tools/perf/builtin-stat.c13
-rw-r--r--tools/perf/builtin-top.c10
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/pmu.c3
-rw-r--r--tools/perf/util/probe-event.c8
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/session.h2
13 files changed, 65 insertions, 26 deletions
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index bada8933fdd4..ad60c6ea1997 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -125,6 +125,8 @@ To limit the list use:
125. If none of the above is matched, it will apply the supplied glob to all 125. If none of the above is matched, it will apply the supplied glob to all
126 events, printing the ones that match. 126 events, printing the ones that match.
127 127
128. As a last resort, it will do a substring search in all event names.
129
128One or more types can be used at the same time, listing the events for the 130One or more types can be used at the same time, listing the events for the
129types specified. 131types specified.
130 132
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 47469abdcc1c..4e074a660826 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
128 128
129-I msecs:: 129-I msecs::
130--interval-print msecs:: 130--interval-print msecs::
131 Print count deltas every N milliseconds (minimum: 100ms) 131Print count deltas every N milliseconds (minimum: 10ms)
132 example: perf stat -I 1000 -e cycles -a sleep 5 132The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
133 example: 'perf stat -I 1000 -e cycles -a sleep 5'
133 134
134--per-socket:: 135--per-socket::
135Aggregate counts per processor socket for system-wide mode measurements. This 136Aggregate counts per processor socket for system-wide mode measurements. This
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 602414040344..bf679e2c978b 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
45 } 45 }
46 46
47 for (i = 0; i < argc; ++i) { 47 for (i = 0; i < argc; ++i) {
48 char *sep, *s;
49
48 if (strcmp(argv[i], "tracepoint") == 0) 50 if (strcmp(argv[i], "tracepoint") == 0)
49 print_tracepoint_events(NULL, NULL, raw_dump); 51 print_tracepoint_events(NULL, NULL, raw_dump);
50 else if (strcmp(argv[i], "hw") == 0 || 52 else if (strcmp(argv[i], "hw") == 0 ||
@@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
60 print_hwcache_events(NULL, raw_dump); 62 print_hwcache_events(NULL, raw_dump);
61 else if (strcmp(argv[i], "pmu") == 0) 63 else if (strcmp(argv[i], "pmu") == 0)
62 print_pmu_events(NULL, raw_dump); 64 print_pmu_events(NULL, raw_dump);
63 else { 65 else if ((sep = strchr(argv[i], ':')) != NULL) {
64 char *sep = strchr(argv[i], ':'), *s;
65 int sep_idx; 66 int sep_idx;
66 67
67 if (sep == NULL) { 68 if (sep == NULL) {
@@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
76 s[sep_idx] = '\0'; 77 s[sep_idx] = '\0';
77 print_tracepoint_events(s, s + sep_idx + 1, raw_dump); 78 print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
78 free(s); 79 free(s);
80 } else {
81 if (asprintf(&s, "*%s*", argv[i]) < 0) {
82 printf("Critical: Not enough memory! Trying to continue...\n");
83 continue;
84 }
85 print_symbol_events(s, PERF_TYPE_HARDWARE,
86 event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
87 print_symbol_events(s, PERF_TYPE_SOFTWARE,
88 event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
89 print_hwcache_events(s, raw_dump);
90 print_pmu_events(s, raw_dump);
91 print_tracepoint_events(NULL, s, raw_dump);
92 free(s);
79 } 93 }
80 } 94 }
81 return 0; 95 return 0;
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f7882ae9ebc6..530c3a28a58c 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str,
182 if (str) { 182 if (str) {
183 if (!strcmp(opt->long_name, "exec")) 183 if (!strcmp(opt->long_name, "exec"))
184 params.uprobes = true; 184 params.uprobes = true;
185#ifdef HAVE_DWARF_SUPPORT
186 else if (!strcmp(opt->long_name, "module")) 185 else if (!strcmp(opt->long_name, "module"))
187 params.uprobes = false; 186 params.uprobes = false;
188#endif
189 else 187 else
190 return ret; 188 return ret;
191 189
@@ -490,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
490 "file", "vmlinux pathname"), 488 "file", "vmlinux pathname"),
491 OPT_STRING('s', "source", &symbol_conf.source_prefix, 489 OPT_STRING('s', "source", &symbol_conf.source_prefix,
492 "directory", "path to kernel source"), 490 "directory", "path to kernel source"),
493 OPT_CALLBACK('m', "module", NULL, "modname|path",
494 "target module name (for online) or path (for offline)",
495 opt_set_target),
496 OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines, 491 OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
497 "Don't search inlined functions"), 492 "Don't search inlined functions"),
498#endif 493#endif
@@ -509,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
509 opt_set_filter), 504 opt_set_filter),
510 OPT_CALLBACK('x', "exec", NULL, "executable|path", 505 OPT_CALLBACK('x', "exec", NULL, "executable|path",
511 "target executable name or path", opt_set_target), 506 "target executable name or path", opt_set_target),
507 OPT_CALLBACK('m', "module", NULL, "modname|path",
508 "target module name (for online) or path (for offline)",
509 opt_set_target),
512 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 510 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
513 "Enable symbol demangling"), 511 "Enable symbol demangling"),
514 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, 512 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a01c8ae1ee07..24ace2f318c1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,7 +49,7 @@ struct record {
49 int realtime_prio; 49 int realtime_prio;
50 bool no_buildid; 50 bool no_buildid;
51 bool no_buildid_cache; 51 bool no_buildid_cache;
52 long samples; 52 unsigned long long samples;
53}; 53};
54 54
55static int record__write(struct record *rec, void *bf, size_t size) 55static int record__write(struct record *rec, void *bf, size_t size)
@@ -637,17 +637,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
637 * Let the child rip 637 * Let the child rip
638 */ 638 */
639 if (forks) { 639 if (forks) {
640 union perf_event event; 640 union perf_event *event;
641
642 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
643 if (event == NULL) {
644 err = -ENOMEM;
645 goto out_child;
646 }
647
641 /* 648 /*
642 * Some H/W events are generated before COMM event 649 * Some H/W events are generated before COMM event
643 * which is emitted during exec(), so perf script 650 * which is emitted during exec(), so perf script
644 * cannot see a correct process name for those events. 651 * cannot see a correct process name for those events.
645 * Synthesize COMM event to prevent it. 652 * Synthesize COMM event to prevent it.
646 */ 653 */
647 perf_event__synthesize_comm(tool, &event, 654 perf_event__synthesize_comm(tool, event,
648 rec->evlist->workload.pid, 655 rec->evlist->workload.pid,
649 process_synthesized_event, 656 process_synthesized_event,
650 machine); 657 machine);
658 free(event);
651 659
652 perf_evlist__start_workload(rec->evlist); 660 perf_evlist__start_workload(rec->evlist);
653 } 661 }
@@ -659,7 +667,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
659 667
660 auxtrace_snapshot_enabled = 1; 668 auxtrace_snapshot_enabled = 1;
661 for (;;) { 669 for (;;) {
662 int hits = rec->samples; 670 unsigned long long hits = rec->samples;
663 671
664 if (record__mmap_read_all(rec) < 0) { 672 if (record__mmap_read_all(rec) < 0) {
665 auxtrace_snapshot_enabled = 0; 673 auxtrace_snapshot_enabled = 0;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a96fb5c3bedb..5ef88f760b12 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1179,7 +1179,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1179 OPT_STRING(0, "post", &post_cmd, "command", 1179 OPT_STRING(0, "post", &post_cmd, "command",
1180 "command to run after to the measured command"), 1180 "command to run after to the measured command"),
1181 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1181 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1182 "print counts at regular interval in ms (>= 100)"), 1182 "print counts at regular interval in ms (>= 10)"),
1183 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1183 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1184 "aggregate counts per processor socket", AGGR_SOCKET), 1184 "aggregate counts per processor socket", AGGR_SOCKET),
1185 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1185 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@@ -1332,9 +1332,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1332 thread_map__read_comms(evsel_list->threads); 1332 thread_map__read_comms(evsel_list->threads);
1333 1333
1334 if (interval && interval < 100) { 1334 if (interval && interval < 100) {
1335 pr_err("print interval must be >= 100ms\n"); 1335 if (interval < 10) {
1336 parse_options_usage(stat_usage, options, "I", 1); 1336 pr_err("print interval must be >= 10ms\n");
1337 goto out; 1337 parse_options_usage(stat_usage, options, "I", 1);
1338 goto out;
1339 } else
1340 pr_warning("print interval < 100ms. "
1341 "The overhead percentage could be high in some cases. "
1342 "Please proceed with caution.\n");
1338 } 1343 }
1339 1344
1340 if (perf_evlist__alloc_stats(evsel_list, interval)) 1345 if (perf_evlist__alloc_stats(evsel_list, interval))
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 38d4d6cac823..6f641fd68296 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -857,9 +857,12 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
857 * TODO: we don't process guest user from host side 857 * TODO: we don't process guest user from host side
858 * except simple counting. 858 * except simple counting.
859 */ 859 */
860 /* Fall thru */
861 default:
862 goto next_event; 860 goto next_event;
861 default:
862 if (event->header.type == PERF_RECORD_SAMPLE)
863 goto next_event;
864 machine = &session->machines.host;
865 break;
863 } 866 }
864 867
865 868
@@ -961,6 +964,9 @@ static int __cmd_top(struct perf_top *top)
961 if (ret) 964 if (ret)
962 goto out_delete; 965 goto out_delete;
963 966
967 if (perf_session__register_idle_thread(top->session) == NULL)
968 goto out_delete;
969
964 machine__synthesize_threads(&top->session->machines.host, &opts->target, 970 machine__synthesize_threads(&top->session->machines.host, &opts->target,
965 top->evlist->threads, false, opts->proc_map_timeout); 971 top->evlist->threads, false, opts->proc_map_timeout);
966 972
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0cad9e07c5b4..c346b331b892 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1151,7 +1151,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
1151 struct perf_evsel *evsel = hists_to_evsel(hists); 1151 struct perf_evsel *evsel = hists_to_evsel(hists);
1152 bool use_callchain; 1152 bool use_callchain;
1153 1153
1154 if (evsel && !symbol_conf.show_ref_callgraph) 1154 if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
1155 use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; 1155 use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
1156 else 1156 else
1157 use_callchain = symbol_conf.use_callchain; 1157 use_callchain = symbol_conf.use_callchain;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 76fe167c359e..5ef90be2a249 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1831,7 +1831,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1831 } 1831 }
1832 1832
1833check_calls: 1833check_calls:
1834 if (chain->nr > PERF_MAX_STACK_DEPTH) { 1834 if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
1835 pr_warning("corrupted callchain. skipping...\n"); 1835 pr_warning("corrupted callchain. skipping...\n");
1836 return 0; 1836 return 0;
1837 } 1837 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 41a356ba3cfe..e4b173dec4b9 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1008,7 +1008,8 @@ void print_pmu_events(const char *event_glob, bool name_only)
1008 goto out_enomem; 1008 goto out_enomem;
1009 j++; 1009 j++;
1010 } 1010 }
1011 if (pmu->selectable) { 1011 if (pmu->selectable &&
1012 (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
1012 char *s; 1013 char *s;
1013 if (asprintf(&s, "%s//", pmu->name) < 0) 1014 if (asprintf(&s, "%s//", pmu->name) < 0)
1014 goto out_enomem; 1015 goto out_enomem;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 3010abc071ff..b51a8bfb40f9 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2543,7 +2543,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2543 goto out; 2543 goto out;
2544 } 2544 }
2545 2545
2546 if (!pev->uprobes && !pp->retprobe) { 2546 /* Note that the symbols in the kmodule are not relocated */
2547 if (!pev->uprobes && !pp->retprobe && !pev->target) {
2547 reloc_sym = kernel_get_ref_reloc_sym(); 2548 reloc_sym = kernel_get_ref_reloc_sym();
2548 if (!reloc_sym) { 2549 if (!reloc_sym) {
2549 pr_warning("Relocated base symbol is not found!\n"); 2550 pr_warning("Relocated base symbol is not found!\n");
@@ -2580,8 +2581,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2580 } 2581 }
2581 /* Add one probe point */ 2582 /* Add one probe point */
2582 tp->address = map->unmap_ip(map, sym->start) + pp->offset; 2583 tp->address = map->unmap_ip(map, sym->start) + pp->offset;
2583 /* If we found a wrong one, mark it by NULL symbol */ 2584
2584 if (!pev->uprobes && 2585 /* Check the kprobe (not in module) is within .text */
2586 if (!pev->uprobes && !pev->target &&
2585 kprobe_warn_out_range(sym->name, tp->address)) { 2587 kprobe_warn_out_range(sym->name, tp->address)) {
2586 tp->symbol = NULL; /* Skip it */ 2588 tp->symbol = NULL; /* Skip it */
2587 skipped++; 2589 skipped++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 84a02eae4394..428149bc64d2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1311,7 +1311,7 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
1311 return machine__findnew_thread(&session->machines.host, -1, pid); 1311 return machine__findnew_thread(&session->machines.host, -1, pid);
1312} 1312}
1313 1313
1314static struct thread *perf_session__register_idle_thread(struct perf_session *session) 1314struct thread *perf_session__register_idle_thread(struct perf_session *session)
1315{ 1315{
1316 struct thread *thread; 1316 struct thread *thread;
1317 1317
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index b44afc75d1cc..3e900c0efc73 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -89,6 +89,8 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_
89} 89}
90 90
91struct thread *perf_session__findnew(struct perf_session *session, pid_t pid); 91struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
92struct thread *perf_session__register_idle_thread(struct perf_session *session);
93
92size_t perf_session__fprintf(struct perf_session *session, FILE *fp); 94size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
93 95
94size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); 96size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);