aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2017-08-03 07:21:14 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-11-17 10:16:04 -0500
commit93d10af26bb7159349158b721ba2e258291d53c3 (patch)
treee476300cb6e6f32e23e79c453bf259b036829cd3
parentdc83e1394083d6e12625a3158bf88396dfaec633 (diff)
perf tools: Optimize sample parsing for ordered events
Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-kvm.c8
-rw-r--r--tools/perf/util/session.c41
2 files changed, 22 insertions, 27 deletions
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index cd253db6917f..597c7de9bec9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
741 u64 *mmap_time) 741 u64 *mmap_time)
742{ 742{
743 union perf_event *event; 743 union perf_event *event;
744 struct perf_sample sample; 744 u64 timestamp;
745 s64 n = 0; 745 s64 n = 0;
746 int err; 746 int err;
747 747
748 *mmap_time = ULLONG_MAX; 748 *mmap_time = ULLONG_MAX;
749 while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { 749 while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
750 err = perf_evlist__parse_sample(kvm->evlist, event, &sample); 750 err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
751 if (err) { 751 if (err) {
752 perf_evlist__mmap_consume(kvm->evlist, idx); 752 perf_evlist__mmap_consume(kvm->evlist, idx);
753 pr_err("Failed to parse sample\n"); 753 pr_err("Failed to parse sample\n");
754 return -1; 754 return -1;
755 } 755 }
756 756
757 err = perf_session__queue_event(kvm->session, event, sample.time, 0); 757 err = perf_session__queue_event(kvm->session, event, timestamp, 0);
758 /* 758 /*
759 * FIXME: Here we can't consume the event, as perf_session__queue_event will 759 * FIXME: Here we can't consume the event, as perf_session__queue_event will
760 * point to it, and it'll get possibly overwritten by the kernel. 760 * point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
768 768
769 /* save time stamp of our first sample for this mmap */ 769 /* save time stamp of our first sample for this mmap */
770 if (n == 0) 770 if (n == 0)
771 *mmap_time = sample.time; 771 *mmap_time = timestamp;
772 772
773 /* limit events per mmap handled all at once */ 773 /* limit events per mmap handled all at once */
774 n++; 774 n++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8976e417eab2..df2857137908 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,7 +27,6 @@
27 27
28static int perf_session__deliver_event(struct perf_session *session, 28static int perf_session__deliver_event(struct perf_session *session,
29 union perf_event *event, 29 union perf_event *event,
30 struct perf_sample *sample,
31 struct perf_tool *tool, 30 struct perf_tool *tool,
32 u64 file_offset); 31 u64 file_offset);
33 32
@@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
107static int ordered_events__deliver_event(struct ordered_events *oe, 106static int ordered_events__deliver_event(struct ordered_events *oe,
108 struct ordered_event *event) 107 struct ordered_event *event)
109{ 108{
110 struct perf_sample sample;
111 struct perf_session *session = container_of(oe, struct perf_session, 109 struct perf_session *session = container_of(oe, struct perf_session,
112 ordered_events); 110 ordered_events);
113 int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
114
115 if (ret) {
116 pr_err("Can't parse sample, err = %d\n", ret);
117 return ret;
118 }
119 111
120 return perf_session__deliver_event(session, event->event, &sample, 112 return perf_session__deliver_event(session, event->event,
121 session->tool, event->file_offset); 113 session->tool, event->file_offset);
122} 114}
123 115
@@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,
1328 1320
1329static int perf_session__deliver_event(struct perf_session *session, 1321static int perf_session__deliver_event(struct perf_session *session,
1330 union perf_event *event, 1322 union perf_event *event,
1331 struct perf_sample *sample,
1332 struct perf_tool *tool, 1323 struct perf_tool *tool,
1333 u64 file_offset) 1324 u64 file_offset)
1334{ 1325{
1326 struct perf_sample sample;
1335 int ret; 1327 int ret;
1336 1328
1337 ret = auxtrace__process_event(session, event, sample, tool); 1329 ret = perf_evlist__parse_sample(session->evlist, event, &sample);
1330 if (ret) {
1331 pr_err("Can't parse sample, err = %d\n", ret);
1332 return ret;
1333 }
1334
1335 ret = auxtrace__process_event(session, event, &sample, tool);
1338 if (ret < 0) 1336 if (ret < 0)
1339 return ret; 1337 return ret;
1340 if (ret > 0) 1338 if (ret > 0)
1341 return 0; 1339 return 0;
1342 1340
1343 return machines__deliver_event(&session->machines, session->evlist, 1341 return machines__deliver_event(&session->machines, session->evlist,
1344 event, sample, tool, file_offset); 1342 event, &sample, tool, file_offset);
1345} 1343}
1346 1344
1347static s64 perf_session__process_user_event(struct perf_session *session, 1345static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session,
1495{ 1493{
1496 struct perf_evlist *evlist = session->evlist; 1494 struct perf_evlist *evlist = session->evlist;
1497 struct perf_tool *tool = session->tool; 1495 struct perf_tool *tool = session->tool;
1498 struct perf_sample sample;
1499 int ret; 1496 int ret;
1500 1497
1501 if (session->header.needs_swap) 1498 if (session->header.needs_swap)
@@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session,
1509 if (event->header.type >= PERF_RECORD_USER_TYPE_START) 1506 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1510 return perf_session__process_user_event(session, event, file_offset); 1507 return perf_session__process_user_event(session, event, file_offset);
1511 1508
1512 /*
1513 * For all kernel events we get the sample data
1514 */
1515 ret = perf_evlist__parse_sample(evlist, event, &sample);
1516 if (ret)
1517 return ret;
1518
1519 if (tool->ordered_events) { 1509 if (tool->ordered_events) {
1520 ret = perf_session__queue_event(session, event, sample.time, file_offset); 1510 u64 timestamp;
1511
1512 ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
1513 if (ret)
1514 return ret;
1515
1516 ret = perf_session__queue_event(session, event, timestamp, file_offset);
1521 if (ret != -ETIME) 1517 if (ret != -ETIME)
1522 return ret; 1518 return ret;
1523 } 1519 }
1524 1520
1525 return perf_session__deliver_event(session, event, &sample, tool, 1521 return perf_session__deliver_event(session, event, tool, file_offset);
1526 file_offset);
1527} 1522}
1528 1523
1529void perf_event_header__bswap(struct perf_event_header *hdr) 1524void perf_event_header__bswap(struct perf_event_header *hdr)