aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-script.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
commit9c2b957db1772ebf942ae7a9346b14eba6c8ca66 (patch)
tree0dbb83e57260ea7fc0dc421f214d5f1b26262005 /tools/perf/builtin-script.c
parent0bbfcaff9b2a69c71a95e6902253487ab30cb498 (diff)
parentbea95c152dee1791dd02cbc708afbb115bb00f9a (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes for v3.4 from Ingo Molnar: - New "hardware based branch profiling" feature both on the kernel and the tooling side, on CPUs that support it. (modern x86 Intel CPUs with the 'LBR' hardware feature currently.) This new feature is basically a sophisticated 'magnifying glass' for branch execution - something that is pretty difficult to extract from regular, function histogram centric profiles. The simplest mode is activated via 'perf record -b', and the result looks like this in perf report: $ perf record -b any_call,u -e cycles:u branchy $ perf report -b --sort=symbol 52.34% [.] main [.] f1 24.04% [.] f1 [.] f3 23.60% [.] f1 [.] f2 0.01% [k] _IO_new_file_xsputn [k] _IO_file_overflow 0.01% [k] _IO_vfprintf_internal [k] _IO_new_file_xsputn 0.01% [k] _IO_vfprintf_internal [k] strchrnul 0.01% [k] __printf [k] _IO_vfprintf_internal 0.01% [k] main [k] __printf This output shows from/to branch columns and shows the highest percentage (from,to) jump combinations - i.e. the most likely taken branches in the system. "branches" can also include function calls and any other synchronous and asynchronous transitions of the instruction pointer that are not 'next instruction' - such as system calls, traps, interrupts, etc. This feature comes with (hopefully intuitive) flat ascii and TUI support in perf report. - Various 'perf annotate' visual improvements for us assembly junkies. It will now recognize function calls in the TUI and by hitting enter you can follow the call (recursively) and back, amongst other improvements. - Multiple threads/processes recording support in perf record, perf stat, perf top - which is activated via a comma-list of PIDs: perf top -p 21483,21485 perf stat -p 21483,21485 -ddd perf record -p 21483,21485 - Support for per UID views, via the --uid paramter to perf top, perf report, etc. For example 'perf top --uid mingo' will only show the tasks that I am running, excluding other users, root, etc. - Jump label restructurings and improvements - this includes the factoring out of the (hopefully much clearer) include/linux/static_key.h generic facility: struct static_key key = STATIC_KEY_INIT_FALSE; ... if (static_key_false(&key)) do unlikely code else do likely code ... static_key_slow_inc(); ... static_key_slow_inc(); ... The static_key_false() branch will be generated into the code with as little impact to the likely code path as possible. the static_key_slow_*() APIs flip the branch via live kernel code patching. This facility can now be used more widely within the kernel to micro-optimize hot branches whose likelihood matches the static-key usage and fast/slow cost patterns. - SW function tracer improvements: perf support and filtering support. - Various hardenings of the perf.data ABI, to make older perf.data's smoother on newer tool versions, to make new features integrate more smoothly, to support cross-endian recording/analyzing workflows better, etc. - Restructuring of the kprobes code, the splitting out of 'optprobes', and a corner case bugfix. - Allow the tracing of kernel console output (printk). - Improvements/fixes to user-space RDPMC support, allowing user-space self-profiling code to extract PMU counts without performing any system calls, while playing nice with the kernel side. - 'perf bench' improvements - ... and lots of internal restructurings, cleanups and fixes that made these features possible. And, as usual this list is incomplete as there were also lots of other improvements * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (120 commits) perf report: Fix annotate double quit issue in branch view mode perf report: Remove duplicate annotate choice in branch view mode perf/x86: Prettify pmu config literals perf report: Enable TUI in branch view mode perf report: Auto-detect branch stack sampling mode perf record: Add HEADER_BRANCH_STACK tag perf record: Provide default branch stack sampling mode option perf tools: Make perf able to read files from older ABIs perf tools: Fix ABI compatibility bug in print_event_desc() perf tools: Enable reading of perf.data files from different ABI rev perf: Add ABI reference sizes perf report: Add support for taken branch sampling perf record: Add support for sampling taken branch perf tools: Add code to support PERF_SAMPLE_BRANCH_STACK x86/kprobes: Split out optprobe related code to kprobes-opt.c x86/kprobes: Fix a bug which can modify kernel code permanently x86/kprobes: Fix instruction recovery on optimized path perf: Add callback to flush branch_stack on context switch perf: Disable PERF_SAMPLE_BRANCH_* when not supported perf/x86: Add LBR software filter support for Intel CPUs ...
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r--tools/perf/builtin-script.c80
1 files changed, 66 insertions, 14 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb68ddf257b7..d4ce733b9eba 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -40,6 +40,7 @@ enum perf_output_field {
40 PERF_OUTPUT_SYM = 1U << 8, 40 PERF_OUTPUT_SYM = 1U << 8,
41 PERF_OUTPUT_DSO = 1U << 9, 41 PERF_OUTPUT_DSO = 1U << 9,
42 PERF_OUTPUT_ADDR = 1U << 10, 42 PERF_OUTPUT_ADDR = 1U << 10,
43 PERF_OUTPUT_SYMOFFSET = 1U << 11,
43}; 44};
44 45
45struct output_option { 46struct output_option {
@@ -57,6 +58,7 @@ struct output_option {
57 {.str = "sym", .field = PERF_OUTPUT_SYM}, 58 {.str = "sym", .field = PERF_OUTPUT_SYM},
58 {.str = "dso", .field = PERF_OUTPUT_DSO}, 59 {.str = "dso", .field = PERF_OUTPUT_DSO},
59 {.str = "addr", .field = PERF_OUTPUT_ADDR}, 60 {.str = "addr", .field = PERF_OUTPUT_ADDR},
61 {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
60}; 62};
61 63
62/* default set to maintain compatibility with current format */ 64/* default set to maintain compatibility with current format */
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
193 "to symbols.\n"); 195 "to symbols.\n");
194 return -EINVAL; 196 return -EINVAL;
195 } 197 }
198 if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
199 pr_err("Display of offsets requested but symbol is not"
200 "selected.\n");
201 return -EINVAL;
202 }
196 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { 203 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
197 pr_err("Display of DSO requested but neither sample IP nor " 204 pr_err("Display of DSO requested but neither sample IP nor "
198 "sample address\nis selected. Hence, no addresses to convert " 205 "sample address\nis selected. Hence, no addresses to convert "
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample,
300 } else 307 } else
301 evname = __event_name(attr->type, attr->config); 308 evname = __event_name(attr->type, attr->config);
302 309
303 printf("%s: ", evname ? evname : "(unknown)"); 310 printf("%s: ", evname ? evname : "[unknown]");
304 } 311 }
305} 312}
306 313
314static bool is_bts_event(struct perf_event_attr *attr)
315{
316 return ((attr->type == PERF_TYPE_HARDWARE) &&
317 (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
318 (attr->sample_period == 1));
319}
320
307static bool sample_addr_correlates_sym(struct perf_event_attr *attr) 321static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
308{ 322{
309 if ((attr->type == PERF_TYPE_SOFTWARE) && 323 if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
312 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) 326 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
313 return true; 327 return true;
314 328
329 if (is_bts_event(attr))
330 return true;
331
315 return false; 332 return false;
316} 333}
317 334
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event,
323{ 340{
324 struct addr_location al; 341 struct addr_location al;
325 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 342 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
326 const char *symname, *dsoname;
327 343
328 printf("%16" PRIx64, sample->addr); 344 printf("%16" PRIx64, sample->addr);
329 345
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event,
343 al.sym = map__find_symbol(al.map, al.addr, NULL); 359 al.sym = map__find_symbol(al.map, al.addr, NULL);
344 360
345 if (PRINT_FIELD(SYM)) { 361 if (PRINT_FIELD(SYM)) {
346 if (al.sym && al.sym->name) 362 printf(" ");
347 symname = al.sym->name; 363 if (PRINT_FIELD(SYMOFFSET))
364 symbol__fprintf_symname_offs(al.sym, &al, stdout);
348 else 365 else
349 symname = ""; 366 symbol__fprintf_symname(al.sym, stdout);
350
351 printf(" %16s", symname);
352 } 367 }
353 368
354 if (PRINT_FIELD(DSO)) { 369 if (PRINT_FIELD(DSO)) {
355 if (al.map && al.map->dso && al.map->dso->name) 370 printf(" (");
356 dsoname = al.map->dso->name; 371 map__fprintf_dsoname(al.map, stdout);
357 else 372 printf(")");
358 dsoname = ""; 373 }
374}
359 375
360 printf(" (%s)", dsoname); 376static void print_sample_bts(union perf_event *event,
377 struct perf_sample *sample,
378 struct perf_evsel *evsel,
379 struct machine *machine,
380 struct thread *thread)
381{
382 struct perf_event_attr *attr = &evsel->attr;
383
384 /* print branch_from information */
385 if (PRINT_FIELD(IP)) {
386 if (!symbol_conf.use_callchain)
387 printf(" ");
388 else
389 printf("\n");
390 perf_event__print_ip(event, sample, machine, evsel,
391 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
392 PRINT_FIELD(SYMOFFSET));
361 } 393 }
394
395 printf(" => ");
396
397 /* print branch_to information */
398 if (PRINT_FIELD(ADDR))
399 print_sample_addr(event, sample, machine, thread, attr);
400
401 printf("\n");
362} 402}
363 403
364static void process_event(union perf_event *event __unused, 404static void process_event(union perf_event *event __unused,
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused,
374 414
375 print_sample_start(sample, thread, attr); 415 print_sample_start(sample, thread, attr);
376 416
417 if (is_bts_event(attr)) {
418 print_sample_bts(event, sample, evsel, machine, thread);
419 return;
420 }
421
377 if (PRINT_FIELD(TRACE)) 422 if (PRINT_FIELD(TRACE))
378 print_trace_event(sample->cpu, sample->raw_data, 423 print_trace_event(sample->cpu, sample->raw_data,
379 sample->raw_size); 424 sample->raw_size);
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused,
387 else 432 else
388 printf("\n"); 433 printf("\n");
389 perf_event__print_ip(event, sample, machine, evsel, 434 perf_event__print_ip(event, sample, machine, evsel,
390 PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 435 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
436 PRINT_FIELD(SYMOFFSET));
391 } 437 }
392 438
393 printf("\n"); 439 printf("\n");
@@ -1097,7 +1143,10 @@ static const struct option options[] = {
1097 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 1143 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
1098 "Look for files with symbols relative to this directory"), 1144 "Look for files with symbols relative to this directory"),
1099 OPT_CALLBACK('f', "fields", NULL, "str", 1145 OPT_CALLBACK('f', "fields", NULL, "str",
1100 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1146 "comma separated output fields prepend with 'type:'. "
1147 "Valid types: hw,sw,trace,raw. "
1148 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
1149 "addr,symoff",
1101 parse_output_fields), 1150 parse_output_fields),
1102 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1151 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1103 "system-wide collection from all CPUs"), 1152 "system-wide collection from all CPUs"),
@@ -1106,6 +1155,9 @@ static const struct option options[] = {
1106 "only display events for these comms"), 1155 "only display events for these comms"),
1107 OPT_BOOLEAN('I', "show-info", &show_full_info, 1156 OPT_BOOLEAN('I', "show-info", &show_full_info,
1108 "display extended information from perf.data file"), 1157 "display extended information from perf.data file"),
1158 OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
1159 "Show the path of [kernel.kallsyms]"),
1160
1109 OPT_END() 1161 OPT_END()
1110}; 1162};
1111 1163