diff options
author | Jiri Olsa <jolsa@kernel.org> | 2014-04-17 13:39:10 -0400 |
---|---|---|
committer | Jiri Olsa <jolsa@kernel.org> | 2014-06-12 10:53:19 -0400 |
commit | 0c4e774fad0202b91dea8d99c04e9bdf2c2c6647 (patch) | |
tree | 9a94e5f06914e70d8093e43ea4f85f0843aaba1c | |
parent | 17314e2385c6627fcab4b8f97bd6668bb63495c0 (diff) |
perf tools: Cache register accesses for unwind processing
Caching registers value into an array. Got about 4% speed up
of perf_reg_value function for report command processing
dwarf unwind stacks.
Output from report over 1.5 GB data with DWARF unwind stacks:
(TODO fix perf diff)
current code:
5.84% perf perf [.] perf_reg_value
change:
1.94% perf perf [.] perf_reg_value
And little bit of overall speed up:
(perf stat -r 5 -e '{cycles,instructions}:u' ...)
current code:
310,298,611,754 cycles ( +- 0.33% )
439,669,689,341 instructions ( +- 0.03% )
188.656753166 seconds time elapsed ( +- 0.82% )
change:
291,315,329,878 cycles ( +- 0.22% )
391,763,485,304 instructions ( +- 0.03% )
180.742249687 seconds time elapsed ( +- 0.64% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1401892622-30848-2-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
-rw-r--r-- | tools/perf/util/event.h | 5 | ||||
-rw-r--r-- | tools/perf/util/perf_regs.c | 10 | ||||
-rw-r--r-- | tools/perf/util/perf_regs.h | 4 |
3 files changed, 17 insertions, 2 deletions
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9ba2eb3bdcfd..e5dd40addb30 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include "../perf.h" | 7 | #include "../perf.h" |
8 | #include "map.h" | 8 | #include "map.h" |
9 | #include "build-id.h" | 9 | #include "build-id.h" |
10 | #include "perf_regs.h" | ||
10 | 11 | ||
11 | struct mmap_event { | 12 | struct mmap_event { |
12 | struct perf_event_header header; | 13 | struct perf_event_header header; |
@@ -89,6 +90,10 @@ struct regs_dump { | |||
89 | u64 abi; | 90 | u64 abi; |
90 | u64 mask; | 91 | u64 mask; |
91 | u64 *regs; | 92 | u64 *regs; |
93 | |||
94 | /* Cached values/mask filled by first register access. */ | ||
95 | u64 cache_regs[PERF_REGS_MAX]; | ||
96 | u64 cache_mask; | ||
92 | }; | 97 | }; |
93 | 98 | ||
94 | struct stack_dump { | 99 | struct stack_dump { |
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a3539ef30b15..43168fb0d9a2 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c | |||
@@ -1,11 +1,15 @@ | |||
1 | #include <errno.h> | 1 | #include <errno.h> |
2 | #include "perf_regs.h" | 2 | #include "perf_regs.h" |
3 | #include "event.h" | ||
3 | 4 | ||
4 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) | 5 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) |
5 | { | 6 | { |
6 | int i, idx = 0; | 7 | int i, idx = 0; |
7 | u64 mask = regs->mask; | 8 | u64 mask = regs->mask; |
8 | 9 | ||
10 | if (regs->cache_mask & (1 << id)) | ||
11 | goto out; | ||
12 | |||
9 | if (!(mask & (1 << id))) | 13 | if (!(mask & (1 << id))) |
10 | return -EINVAL; | 14 | return -EINVAL; |
11 | 15 | ||
@@ -14,6 +18,10 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) | |||
14 | idx++; | 18 | idx++; |
15 | } | 19 | } |
16 | 20 | ||
17 | *valp = regs->regs[idx]; | 21 | regs->cache_mask |= (1 << id); |
22 | regs->cache_regs[id] = regs->regs[idx]; | ||
23 | |||
24 | out: | ||
25 | *valp = regs->cache_regs[id]; | ||
18 | return 0; | 26 | return 0; |
19 | } | 27 | } |
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 79c78f74e0cf..980dbf76bc98 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h | |||
@@ -2,7 +2,8 @@ | |||
2 | #define __PERF_REGS_H | 2 | #define __PERF_REGS_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include "event.h" | 5 | |
6 | struct regs_dump; | ||
6 | 7 | ||
7 | #ifdef HAVE_PERF_REGS_SUPPORT | 8 | #ifdef HAVE_PERF_REGS_SUPPORT |
8 | #include <perf_regs.h> | 9 | #include <perf_regs.h> |
@@ -11,6 +12,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); | |||
11 | 12 | ||
12 | #else | 13 | #else |
13 | #define PERF_REGS_MASK 0 | 14 | #define PERF_REGS_MASK 0 |
15 | #define PERF_REGS_MAX 0 | ||
14 | 16 | ||
15 | static inline const char *perf_reg_name(int id __maybe_unused) | 17 | static inline const char *perf_reg_name(int id __maybe_unused) |
16 | { | 18 | { |