diff options
| author | Jiri Olsa <jolsa@kernel.org> | 2017-08-03 07:21:14 -0400 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-11-17 10:16:04 -0500 |
| commit | 93d10af26bb7159349158b721ba2e258291d53c3 (patch) | |
| tree | e476300cb6e6f32e23e79c453bf259b036829cd3 | |
| parent | dc83e1394083d6e12625a3158bf88396dfaec633 (diff) | |
perf tools: Optimize sample parsing for ordered events
Currently when using ordered events we parse the sample twice (the
perf_evlist__parse_sample function). Once before we queue the sample for
sorting:
perf_session__process_event
perf_evlist__parse_sample(sample)
perf_session__queue_event(sample.time)
And then when we deliver the sorted sample:
ordered_events__deliver_event
perf_evlist__parse_sample
perf_session__deliver_event
We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which got introduced
earlier. The new path looks like:
perf_session__process_event
perf_evlist__parse_sample_timestamp
perf_session__queue_event
ordered_events__deliver_event
perf_session__deliver_event
perf_evlist__parse_sample
It saves some instructions and is slightly faster:
Before:
Performance counter stats for './perf.old report --stdio' (5 runs):
64,396,007,225 cycles:u ( +- 0.97% )
105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% )
21.618103465 seconds time elapsed ( +- 1.12% )
After:
Performance counter stats for './perf report --stdio' (5 runs):
60,567,807,182 cycles:u ( +- 0.40% )
104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% )
20.168895243 seconds time elapsed ( +- 0.32% )
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
| -rw-r--r-- | tools/perf/builtin-kvm.c | 8 | ||||
| -rw-r--r-- | tools/perf/util/session.c | 41 |
2 files changed, 22 insertions, 27 deletions
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cd253db6917f..597c7de9bec9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c | |||
| @@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, | |||
| 741 | u64 *mmap_time) | 741 | u64 *mmap_time) |
| 742 | { | 742 | { |
| 743 | union perf_event *event; | 743 | union perf_event *event; |
| 744 | struct perf_sample sample; | 744 | u64 timestamp; |
| 745 | s64 n = 0; | 745 | s64 n = 0; |
| 746 | int err; | 746 | int err; |
| 747 | 747 | ||
| 748 | *mmap_time = ULLONG_MAX; | 748 | *mmap_time = ULLONG_MAX; |
| 749 | while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { | 749 | while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { |
| 750 | err = perf_evlist__parse_sample(kvm->evlist, event, &sample); | 750 | err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp); |
| 751 | if (err) { | 751 | if (err) { |
| 752 | perf_evlist__mmap_consume(kvm->evlist, idx); | 752 | perf_evlist__mmap_consume(kvm->evlist, idx); |
| 753 | pr_err("Failed to parse sample\n"); | 753 | pr_err("Failed to parse sample\n"); |
| 754 | return -1; | 754 | return -1; |
| 755 | } | 755 | } |
| 756 | 756 | ||
| 757 | err = perf_session__queue_event(kvm->session, event, sample.time, 0); | 757 | err = perf_session__queue_event(kvm->session, event, timestamp, 0); |
| 758 | /* | 758 | /* |
| 759 | * FIXME: Here we can't consume the event, as perf_session__queue_event will | 759 | * FIXME: Here we can't consume the event, as perf_session__queue_event will |
| 760 | * point to it, and it'll get possibly overwritten by the kernel. | 760 | * point to it, and it'll get possibly overwritten by the kernel. |
| @@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, | |||
| 768 | 768 | ||
| 769 | /* save time stamp of our first sample for this mmap */ | 769 | /* save time stamp of our first sample for this mmap */ |
| 770 | if (n == 0) | 770 | if (n == 0) |
| 771 | *mmap_time = sample.time; | 771 | *mmap_time = timestamp; |
| 772 | 772 | ||
| 773 | /* limit events per mmap handled all at once */ | 773 | /* limit events per mmap handled all at once */ |
| 774 | n++; | 774 | n++; |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8976e417eab2..df2857137908 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | 27 | ||
| 28 | static int perf_session__deliver_event(struct perf_session *session, | 28 | static int perf_session__deliver_event(struct perf_session *session, |
| 29 | union perf_event *event, | 29 | union perf_event *event, |
| 30 | struct perf_sample *sample, | ||
| 31 | struct perf_tool *tool, | 30 | struct perf_tool *tool, |
| 32 | u64 file_offset); | 31 | u64 file_offset); |
| 33 | 32 | ||
| @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) | |||
| 107 | static int ordered_events__deliver_event(struct ordered_events *oe, | 106 | static int ordered_events__deliver_event(struct ordered_events *oe, |
| 108 | struct ordered_event *event) | 107 | struct ordered_event *event) |
| 109 | { | 108 | { |
| 110 | struct perf_sample sample; | ||
| 111 | struct perf_session *session = container_of(oe, struct perf_session, | 109 | struct perf_session *session = container_of(oe, struct perf_session, |
| 112 | ordered_events); | 110 | ordered_events); |
| 113 | int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); | ||
| 114 | |||
| 115 | if (ret) { | ||
| 116 | pr_err("Can't parse sample, err = %d\n", ret); | ||
| 117 | return ret; | ||
| 118 | } | ||
| 119 | 111 | ||
| 120 | return perf_session__deliver_event(session, event->event, &sample, | 112 | return perf_session__deliver_event(session, event->event, |
| 121 | session->tool, event->file_offset); | 113 | session->tool, event->file_offset); |
| 122 | } | 114 | } |
| 123 | 115 | ||
| @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, | |||
| 1328 | 1320 | ||
| 1329 | static int perf_session__deliver_event(struct perf_session *session, | 1321 | static int perf_session__deliver_event(struct perf_session *session, |
| 1330 | union perf_event *event, | 1322 | union perf_event *event, |
| 1331 | struct perf_sample *sample, | ||
| 1332 | struct perf_tool *tool, | 1323 | struct perf_tool *tool, |
| 1333 | u64 file_offset) | 1324 | u64 file_offset) |
| 1334 | { | 1325 | { |
| 1326 | struct perf_sample sample; | ||
| 1335 | int ret; | 1327 | int ret; |
| 1336 | 1328 | ||
| 1337 | ret = auxtrace__process_event(session, event, sample, tool); | 1329 | ret = perf_evlist__parse_sample(session->evlist, event, &sample); |
| 1330 | if (ret) { | ||
| 1331 | pr_err("Can't parse sample, err = %d\n", ret); | ||
| 1332 | return ret; | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | ret = auxtrace__process_event(session, event, &sample, tool); | ||
| 1338 | if (ret < 0) | 1336 | if (ret < 0) |
| 1339 | return ret; | 1337 | return ret; |
| 1340 | if (ret > 0) | 1338 | if (ret > 0) |
| 1341 | return 0; | 1339 | return 0; |
| 1342 | 1340 | ||
| 1343 | return machines__deliver_event(&session->machines, session->evlist, | 1341 | return machines__deliver_event(&session->machines, session->evlist, |
| 1344 | event, sample, tool, file_offset); | 1342 | event, &sample, tool, file_offset); |
| 1345 | } | 1343 | } |
| 1346 | 1344 | ||
| 1347 | static s64 perf_session__process_user_event(struct perf_session *session, | 1345 | static s64 perf_session__process_user_event(struct perf_session *session, |
| @@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session, | |||
| 1495 | { | 1493 | { |
| 1496 | struct perf_evlist *evlist = session->evlist; | 1494 | struct perf_evlist *evlist = session->evlist; |
| 1497 | struct perf_tool *tool = session->tool; | 1495 | struct perf_tool *tool = session->tool; |
| 1498 | struct perf_sample sample; | ||
| 1499 | int ret; | 1496 | int ret; |
| 1500 | 1497 | ||
| 1501 | if (session->header.needs_swap) | 1498 | if (session->header.needs_swap) |
| @@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session, | |||
| 1509 | if (event->header.type >= PERF_RECORD_USER_TYPE_START) | 1506 | if (event->header.type >= PERF_RECORD_USER_TYPE_START) |
| 1510 | return perf_session__process_user_event(session, event, file_offset); | 1507 | return perf_session__process_user_event(session, event, file_offset); |
| 1511 | 1508 | ||
| 1512 | /* | ||
| 1513 | * For all kernel events we get the sample data | ||
| 1514 | */ | ||
| 1515 | ret = perf_evlist__parse_sample(evlist, event, &sample); | ||
| 1516 | if (ret) | ||
| 1517 | return ret; | ||
| 1518 | |||
| 1519 | if (tool->ordered_events) { | 1509 | if (tool->ordered_events) { |
| 1520 | ret = perf_session__queue_event(session, event, sample.time, file_offset); | 1510 | u64 timestamp; |
| 1511 | |||
| 1512 | ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); | ||
| 1513 | if (ret) | ||
| 1514 | return ret; | ||
| 1515 | |||
| 1516 | ret = perf_session__queue_event(session, event, timestamp, file_offset); | ||
| 1521 | if (ret != -ETIME) | 1517 | if (ret != -ETIME) |
| 1522 | return ret; | 1518 | return ret; |
| 1523 | } | 1519 | } |
| 1524 | 1520 | ||
| 1525 | return perf_session__deliver_event(session, event, &sample, tool, | 1521 | return perf_session__deliver_event(session, event, tool, file_offset); |
| 1526 | file_offset); | ||
| 1527 | } | 1522 | } |
| 1528 | 1523 | ||
| 1529 | void perf_event_header__bswap(struct perf_event_header *hdr) | 1524 | void perf_event_header__bswap(struct perf_event_header *hdr) |
