aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2019-05-20 07:37:14 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-06-05 08:47:56 -0400
commit68fb45bf175e702aec6668c776050e5dbd2a6f1f (patch)
tree5a5be0f0e6cb6d3edb406dae6e3170fcfcd5c401
parent5b1dc0fd1da06d6e89f1ca8736cfe0ee84e34cc7 (diff)
perf script: Add output of IPC ratio
Add field 'ipc' to display instructions-per-cycle. Example: perf record -e intel_pt/cyc/u ls perf script --insn-trace --xed -F+ipc,-dso,-cpu,-tid ls 2670177.697113434: 7f0dfdbcd090 _start+0x0 mov %rsp, %rdi IPC: 0.00 (1/877) ls 2670177.697113434: 7f0dfdbcd093 _start+0x3 callq 0x7f0dfdbce030 ls 2670177.697113434: 7f0dfdbce030 _dl_start+0x0 pushq %rbp ls 2670177.697113434: 7f0dfdbce031 _dl_start+0x1 mov %rsp, %rbp ls 2670177.697113434: 7f0dfdbce034 _dl_start+0x4 pushq %r15 ls 2670177.697113434: 7f0dfdbce036 _dl_start+0x6 pushq %r14 ls 2670177.697113434: 7f0dfdbce038 _dl_start+0x8 pushq %r13 ls 2670177.697113434: 7f0dfdbce03a _dl_start+0xa pushq %r12 ls 2670177.697113434: 7f0dfdbce03c _dl_start+0xc mov %rdi, %r12 ls 2670177.697113434: 7f0dfdbce03f _dl_start+0xf pushq %rbx ls 2670177.697113434: 7f0dfdbce040 _dl_start+0x10 sub $0x38, %rsp ls 2670177.697113434: 7f0dfdbce044 _dl_start+0x14 rdtsc ls 2670177.697113434: 7f0dfdbce046 _dl_start+0x16 mov %eax, %eax ls 2670177.697113434: 7f0dfdbce048 _dl_start+0x18 shl $0x20, %rdx ls 2670177.697113434: 7f0dfdbce04c _dl_start+0x1c or %rax, %rdx ls 2670177.697114471: 7f0dfdbce04f _dl_start+0x1f movq 0x27e22(%rip), %rax IPC: 0.00 (15/1685) ls 2670177.697116177: 7f0dfdbce056 _dl_start+0x26 movq %rdx, 0x27683(%rip) IPC: 0.00 (1/881) Note, the IPC values are low due to page faults at the beginning of execution. The additional cycles are due to the time to enter the kernel, not the actual kernel page fault handler. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20190520113728.14389-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/builtin-script.c23
2 files changed, 26 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index af8282782911..c59fd52e9e91 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, 119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
120 brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. 120 brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
121 Field list can be prepended with the type, trace, sw or hw, 121 Field list can be prepended with the type, trace, sw or hw,
122 to indicate to which event type the field list applies. 122 to indicate to which event type the field list applies.
123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -203,6 +203,9 @@ OPTIONS
203 The synth field is used by synthesized events which may be created when 203 The synth field is used by synthesized events which may be created when
204 Instruction Trace decoding. 204 Instruction Trace decoding.
205 205
206 The ipc (instructions per cycle) field is synthesized and may have a value when
207 Instruction Trace decoding.
208
206 Finally, a user may not set fields to none for all event types. 209 Finally, a user may not set fields to none for all event types.
207 i.e., -F "" is not allowed. 210 i.e., -F "" is not allowed.
208 211
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3a48a2627670..80c722ade852 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -102,6 +102,7 @@ enum perf_output_field {
102 PERF_OUTPUT_METRIC = 1U << 28, 102 PERF_OUTPUT_METRIC = 1U << 28,
103 PERF_OUTPUT_MISC = 1U << 29, 103 PERF_OUTPUT_MISC = 1U << 29,
104 PERF_OUTPUT_SRCCODE = 1U << 30, 104 PERF_OUTPUT_SRCCODE = 1U << 30,
105 PERF_OUTPUT_IPC = 1U << 31,
105}; 106};
106 107
107struct output_option { 108struct output_option {
@@ -139,6 +140,7 @@ struct output_option {
139 {.str = "metric", .field = PERF_OUTPUT_METRIC}, 140 {.str = "metric", .field = PERF_OUTPUT_METRIC},
140 {.str = "misc", .field = PERF_OUTPUT_MISC}, 141 {.str = "misc", .field = PERF_OUTPUT_MISC},
141 {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, 142 {.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
143 {.str = "ipc", .field = PERF_OUTPUT_IPC},
142}; 144};
143 145
144enum { 146enum {
@@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
1268 return printed; 1270 return printed;
1269} 1271}
1270 1272
1273static int perf_sample__fprintf_ipc(struct perf_sample *sample,
1274 struct perf_event_attr *attr, FILE *fp)
1275{
1276 unsigned int ipc;
1277
1278 if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt)
1279 return 0;
1280
1281 ipc = (sample->insn_cnt * 100) / sample->cyc_cnt;
1282
1283 return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ",
1284 ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt);
1285}
1286
1271static int perf_sample__fprintf_bts(struct perf_sample *sample, 1287static int perf_sample__fprintf_bts(struct perf_sample *sample,
1272 struct perf_evsel *evsel, 1288 struct perf_evsel *evsel,
1273 struct thread *thread, 1289 struct thread *thread,
@@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
1312 printed += perf_sample__fprintf_addr(sample, thread, attr, fp); 1328 printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
1313 } 1329 }
1314 1330
1331 printed += perf_sample__fprintf_ipc(sample, attr, fp);
1332
1315 if (print_srcline_last) 1333 if (print_srcline_last)
1316 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); 1334 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
1317 1335
@@ -1859,6 +1877,9 @@ static void process_event(struct perf_script *script,
1859 1877
1860 if (PRINT_FIELD(PHYS_ADDR)) 1878 if (PRINT_FIELD(PHYS_ADDR))
1861 fprintf(fp, "%16" PRIx64, sample->phys_addr); 1879 fprintf(fp, "%16" PRIx64, sample->phys_addr);
1880
1881 perf_sample__fprintf_ipc(sample, attr, fp);
1882
1862 fprintf(fp, "\n"); 1883 fprintf(fp, "\n");
1863 1884
1864 if (PRINT_FIELD(SRCCODE)) { 1885 if (PRINT_FIELD(SRCCODE)) {
@@ -3433,7 +3454,7 @@ int cmd_script(int argc, const char **argv)
3433 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 3454 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
3434 "addr,symoff,srcline,period,iregs,uregs,brstack," 3455 "addr,symoff,srcline,period,iregs,uregs,brstack,"
3435 "brstacksym,flags,bpf-output,brstackinsn,brstackoff," 3456 "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
3436 "callindent,insn,insnlen,synth,phys_addr,metric,misc", 3457 "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
3437 parse_output_fields), 3458 parse_output_fields),
3438 OPT_BOOLEAN('a', "all-cpus", &system_wide, 3459 OPT_BOOLEAN('a', "all-cpus", &system_wide,
3439 "system-wide collection from all CPUs"), 3460 "system-wide collection from all CPUs"),