diff options
| author | Jiri Olsa <jolsa@kernel.org> | 2014-04-17 13:39:10 -0400 |
|---|---|---|
| committer | Jiri Olsa <jolsa@kernel.org> | 2014-06-12 10:53:19 -0400 |
| commit | 0c4e774fad0202b91dea8d99c04e9bdf2c2c6647 (patch) | |
| tree | 9a94e5f06914e70d8093e43ea4f85f0843aaba1c /tools/perf/util | |
| parent | 17314e2385c6627fcab4b8f97bd6668bb63495c0 (diff) | |
perf tools: Cache register accesses for unwind processing
Caching registers value into an array. Got about 4% speed up
of perf_reg_value function for report command processing
dwarf unwind stacks.
Output from report over 1.5 GB data with DWARF unwind stacks:
(TODO fix perf diff)
current code:
5.84% perf perf [.] perf_reg_value
change:
1.94% perf perf [.] perf_reg_value
And little bit of overall speed up:
(perf stat -r 5 -e '{cycles,instructions}:u' ...)
current code:
310,298,611,754 cycles ( +- 0.33% )
439,669,689,341 instructions ( +- 0.03% )
188.656753166 seconds time elapsed ( +- 0.82% )
change:
291,315,329,878 cycles ( +- 0.22% )
391,763,485,304 instructions ( +- 0.03% )
180.742249687 seconds time elapsed ( +- 0.64% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1401892622-30848-2-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Diffstat (limited to 'tools/perf/util')
| -rw-r--r-- | tools/perf/util/event.h | 5 | ||||
| -rw-r--r-- | tools/perf/util/perf_regs.c | 10 | ||||
| -rw-r--r-- | tools/perf/util/perf_regs.h | 4 |
3 files changed, 17 insertions, 2 deletions
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9ba2eb3bdcfd..e5dd40addb30 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "../perf.h" | 7 | #include "../perf.h" |
| 8 | #include "map.h" | 8 | #include "map.h" |
| 9 | #include "build-id.h" | 9 | #include "build-id.h" |
| 10 | #include "perf_regs.h" | ||
| 10 | 11 | ||
| 11 | struct mmap_event { | 12 | struct mmap_event { |
| 12 | struct perf_event_header header; | 13 | struct perf_event_header header; |
| @@ -89,6 +90,10 @@ struct regs_dump { | |||
| 89 | u64 abi; | 90 | u64 abi; |
| 90 | u64 mask; | 91 | u64 mask; |
| 91 | u64 *regs; | 92 | u64 *regs; |
| 93 | |||
| 94 | /* Cached values/mask filled by first register access. */ | ||
| 95 | u64 cache_regs[PERF_REGS_MAX]; | ||
| 96 | u64 cache_mask; | ||
| 92 | }; | 97 | }; |
| 93 | 98 | ||
| 94 | struct stack_dump { | 99 | struct stack_dump { |
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a3539ef30b15..43168fb0d9a2 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c | |||
| @@ -1,11 +1,15 @@ | |||
| 1 | #include <errno.h> | 1 | #include <errno.h> |
| 2 | #include "perf_regs.h" | 2 | #include "perf_regs.h" |
| 3 | #include "event.h" | ||
| 3 | 4 | ||
| 4 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) | 5 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) |
| 5 | { | 6 | { |
| 6 | int i, idx = 0; | 7 | int i, idx = 0; |
| 7 | u64 mask = regs->mask; | 8 | u64 mask = regs->mask; |
| 8 | 9 | ||
| 10 | if (regs->cache_mask & (1 << id)) | ||
| 11 | goto out; | ||
| 12 | |||
| 9 | if (!(mask & (1 << id))) | 13 | if (!(mask & (1 << id))) |
| 10 | return -EINVAL; | 14 | return -EINVAL; |
| 11 | 15 | ||
| @@ -14,6 +18,10 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) | |||
| 14 | idx++; | 18 | idx++; |
| 15 | } | 19 | } |
| 16 | 20 | ||
| 17 | *valp = regs->regs[idx]; | 21 | regs->cache_mask |= (1 << id); |
| 22 | regs->cache_regs[id] = regs->regs[idx]; | ||
| 23 | |||
| 24 | out: | ||
| 25 | *valp = regs->cache_regs[id]; | ||
| 18 | return 0; | 26 | return 0; |
| 19 | } | 27 | } |
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 79c78f74e0cf..980dbf76bc98 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h | |||
| @@ -2,7 +2,8 @@ | |||
| 2 | #define __PERF_REGS_H | 2 | #define __PERF_REGS_H |
| 3 | 3 | ||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | #include "event.h" | 5 | |
| 6 | struct regs_dump; | ||
| 6 | 7 | ||
| 7 | #ifdef HAVE_PERF_REGS_SUPPORT | 8 | #ifdef HAVE_PERF_REGS_SUPPORT |
| 8 | #include <perf_regs.h> | 9 | #include <perf_regs.h> |
| @@ -11,6 +12,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); | |||
| 11 | 12 | ||
| 12 | #else | 13 | #else |
| 13 | #define PERF_REGS_MASK 0 | 14 | #define PERF_REGS_MASK 0 |
| 15 | #define PERF_REGS_MAX 0 | ||
| 14 | 16 | ||
| 15 | static inline const char *perf_reg_name(int id __maybe_unused) | 17 | static inline const char *perf_reg_name(int id __maybe_unused) |
| 16 | { | 18 | { |
