diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-06-09 05:46:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-06-09 05:46:04 -0400 |
commit | 028c63b56795206464263fa3bc47094704c2a840 (patch) | |
tree | 5b89c379e629ed3c23f22102be66a99025c496da | |
parent | a3d86542de8850be52e8589da22b24002941dfb7 (diff) | |
parent | d3a7c489c7fd2463e3b2c3a2179c7be879dd9cb4 (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Fix perf.data size reporting in 'perf record' in no-buildid mode (He Kuang)
Infrastructure changes:
- Protect accesses the DSO rbtrees/lists with a rw lock and reference
count struct dso instances (Arnaldo Carvalho de Melo)
- Export dynamic symbols used by traceevent plugins (He Kuang)
- Add libtrace-dynamic-list file to libtraceevent's .gitignore (He Kuang)
- Refactor shadow stats code in 'perf stat', prep work for further
patchkits (Jiri Olsa)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/lib/traceevent/.gitignore | 1 | ||||
-rw-r--r-- | tools/lib/traceevent/Makefile | 14 | ||||
-rw-r--r-- | tools/perf/Makefile.perf | 14 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 6 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 506 | ||||
-rw-r--r-- | tools/perf/tests/dso-data.c | 4 | ||||
-rw-r--r-- | tools/perf/tests/hists_common.c | 6 | ||||
-rw-r--r-- | tools/perf/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/util/dso.c | 87 | ||||
-rw-r--r-- | tools/perf/util/dso.h | 24 | ||||
-rw-r--r-- | tools/perf/util/header.c | 1 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 58 | ||||
-rw-r--r-- | tools/perf/util/machine.h | 4 | ||||
-rw-r--r-- | tools/perf/util/map.c | 11 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 2 | ||||
-rw-r--r-- | tools/perf/util/probe-finder.c | 2 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 434 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 35 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 40 | ||||
-rw-r--r-- | tools/perf/util/symbol-elf.c | 2 | ||||
-rw-r--r-- | tools/perf/util/symbol.c | 4 | ||||
-rw-r--r-- | tools/perf/util/vdso.c | 54 |
22 files changed, 737 insertions, 573 deletions
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore index 35f56be5a4cd..3c60335fe7be 100644 --- a/tools/lib/traceevent/.gitignore +++ b/tools/lib/traceevent/.gitignore | |||
@@ -1 +1,2 @@ | |||
1 | TRACEEVENT-CFLAGS | 1 | TRACEEVENT-CFLAGS |
2 | libtraceevent-dynamic-list | ||
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 84640394ebf9..6daaff652aff 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile | |||
@@ -23,6 +23,7 @@ endef | |||
23 | # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. | 23 | # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. |
24 | $(call allow-override,CC,$(CROSS_COMPILE)gcc) | 24 | $(call allow-override,CC,$(CROSS_COMPILE)gcc) |
25 | $(call allow-override,AR,$(CROSS_COMPILE)ar) | 25 | $(call allow-override,AR,$(CROSS_COMPILE)ar) |
26 | $(call allow-override,NM,$(CROSS_COMPILE)nm) | ||
26 | 27 | ||
27 | EXT = -std=gnu99 | 28 | EXT = -std=gnu99 |
28 | INSTALL = install | 29 | INSTALL = install |
@@ -157,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o) | |||
157 | 158 | ||
158 | TE_IN := $(OUTPUT)libtraceevent-in.o | 159 | TE_IN := $(OUTPUT)libtraceevent-in.o |
159 | LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) | 160 | LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) |
161 | DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list | ||
160 | 162 | ||
161 | CMD_TARGETS = $(LIB_FILE) $(PLUGINS) | 163 | CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE) |
162 | 164 | ||
163 | TARGETS = $(CMD_TARGETS) | 165 | TARGETS = $(CMD_TARGETS) |
164 | 166 | ||
@@ -175,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN) | |||
175 | $(OUTPUT)libtraceevent.a: $(TE_IN) | 177 | $(OUTPUT)libtraceevent.a: $(TE_IN) |
176 | $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ | 178 | $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ |
177 | 179 | ||
180 | $(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) | ||
181 | $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) | ||
182 | |||
178 | plugins: $(PLUGINS) | 183 | plugins: $(PLUGINS) |
179 | 184 | ||
180 | __plugin_obj = $(notdir $@) | 185 | __plugin_obj = $(notdir $@) |
@@ -244,6 +249,13 @@ define do_install_plugins | |||
244 | done | 249 | done |
245 | endef | 250 | endef |
246 | 251 | ||
252 | define do_generate_dynamic_list_file | ||
253 | (echo '{'; \ | ||
254 | $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u; \ | ||
255 | echo '};'; \ | ||
256 | ) > $2 | ||
257 | endef | ||
258 | |||
247 | install_lib: all_cmd install_plugins | 259 | install_lib: all_cmd install_plugins |
248 | $(call QUIET_INSTALL, $(LIB_FILE)) \ | 260 | $(call QUIET_INSTALL, $(LIB_FILE)) \ |
249 | $(call do_install,$(LIB_FILE),$(libdir_SQ)) | 261 | $(call do_install,$(LIB_FILE),$(libdir_SQ)) |
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5816a3bb7e9f..b1dfcd8e93e3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf | |||
@@ -173,6 +173,9 @@ endif | |||
173 | LIBTRACEEVENT = $(TE_PATH)libtraceevent.a | 173 | LIBTRACEEVENT = $(TE_PATH)libtraceevent.a |
174 | export LIBTRACEEVENT | 174 | export LIBTRACEEVENT |
175 | 175 | ||
176 | LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list | ||
177 | LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) | ||
178 | |||
176 | LIBAPI = $(LIB_PATH)libapi.a | 179 | LIBAPI = $(LIB_PATH)libapi.a |
177 | export LIBAPI | 180 | export LIBAPI |
178 | 181 | ||
@@ -278,7 +281,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj | |||
278 | $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE | 281 | $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE |
279 | $(Q)$(MAKE) $(build)=perf | 282 | $(Q)$(MAKE) $(build)=perf |
280 | 283 | ||
281 | $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) | 284 | $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) |
282 | $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ | 285 | $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ |
283 | 286 | ||
284 | $(GTK_IN): FORCE | 287 | $(GTK_IN): FORCE |
@@ -373,7 +376,13 @@ $(LIB_FILE): $(LIBPERF_IN) | |||
373 | LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) | 376 | LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) |
374 | 377 | ||
375 | $(LIBTRACEEVENT): FORCE | 378 | $(LIBTRACEEVENT): FORCE |
376 | $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins | 379 | $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a |
380 | |||
381 | libtraceevent_plugins: FORCE | ||
382 | $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins | ||
383 | |||
384 | $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins | ||
385 | $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list | ||
377 | 386 | ||
378 | $(LIBTRACEEVENT)-clean: | 387 | $(LIBTRACEEVENT)-clean: |
379 | $(call QUIET_CLEAN, libtraceevent) | 388 | $(call QUIET_CLEAN, libtraceevent) |
@@ -555,4 +564,5 @@ FORCE: | |||
555 | .PHONY: all install clean config-clean strip install-gtk | 564 | .PHONY: all install clean config-clean strip install-gtk |
556 | .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell | 565 | .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell |
557 | .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep | 566 | .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep |
567 | .PHONY: libtraceevent_plugins | ||
558 | 568 | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 91aa2a3dcf19..d3731cce7c1c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -345,12 +345,9 @@ static int process_buildids(struct record *rec) | |||
345 | struct perf_data_file *file = &rec->file; | 345 | struct perf_data_file *file = &rec->file; |
346 | struct perf_session *session = rec->session; | 346 | struct perf_session *session = rec->session; |
347 | 347 | ||
348 | u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); | 348 | if (file->size == 0) |
349 | if (size == 0) | ||
350 | return 0; | 349 | return 0; |
351 | 350 | ||
352 | file->size = size; | ||
353 | |||
354 | /* | 351 | /* |
355 | * During this process, it'll load kernel map and replace the | 352 | * During this process, it'll load kernel map and replace the |
356 | * dso->long_name to a real pathname it found. In this case | 353 | * dso->long_name to a real pathname it found. In this case |
@@ -719,6 +716,7 @@ out_child: | |||
719 | 716 | ||
720 | if (!err && !file->is_pipe) { | 717 | if (!err && !file->is_pipe) { |
721 | rec->session->header.data_size += rec->bytes_written; | 718 | rec->session->header.data_size += rec->bytes_written; |
719 | file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); | ||
722 | 720 | ||
723 | if (!rec->no_buildid) { | 721 | if (!rec->no_buildid) { |
724 | process_buildids(rec); | 722 | process_buildids(rec); |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fd577f725d23..ff3d25803400 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix); | |||
73 | static void print_aggr(char *prefix); | 73 | static void print_aggr(char *prefix); |
74 | 74 | ||
75 | /* Default events used for perf stat -T */ | 75 | /* Default events used for perf stat -T */ |
76 | static const char * const transaction_attrs[] = { | 76 | static const char *transaction_attrs = { |
77 | "task-clock", | 77 | "task-clock," |
78 | "{" | 78 | "{" |
79 | "instructions," | 79 | "instructions," |
80 | "cycles," | 80 | "cycles," |
@@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = { | |||
86 | }; | 86 | }; |
87 | 87 | ||
88 | /* More limited version when the CPU does not have all events. */ | 88 | /* More limited version when the CPU does not have all events. */ |
89 | static const char * const transaction_limited_attrs[] = { | 89 | static const char * transaction_limited_attrs = { |
90 | "task-clock", | 90 | "task-clock," |
91 | "{" | 91 | "{" |
92 | "instructions," | 92 | "instructions," |
93 | "cycles," | 93 | "cycles," |
@@ -96,30 +96,12 @@ static const char * const transaction_limited_attrs[] = { | |||
96 | "}" | 96 | "}" |
97 | }; | 97 | }; |
98 | 98 | ||
99 | /* must match transaction_attrs and the beginning limited_attrs */ | ||
100 | enum { | ||
101 | T_TASK_CLOCK, | ||
102 | T_INSTRUCTIONS, | ||
103 | T_CYCLES, | ||
104 | T_CYCLES_IN_TX, | ||
105 | T_TRANSACTION_START, | ||
106 | T_ELISION_START, | ||
107 | T_CYCLES_IN_TX_CP, | ||
108 | }; | ||
109 | |||
110 | static struct perf_evlist *evsel_list; | 99 | static struct perf_evlist *evsel_list; |
111 | 100 | ||
112 | static struct target target = { | 101 | static struct target target = { |
113 | .uid = UINT_MAX, | 102 | .uid = UINT_MAX, |
114 | }; | 103 | }; |
115 | 104 | ||
116 | enum aggr_mode { | ||
117 | AGGR_NONE, | ||
118 | AGGR_GLOBAL, | ||
119 | AGGR_SOCKET, | ||
120 | AGGR_CORE, | ||
121 | }; | ||
122 | |||
123 | static int run_count = 1; | 105 | static int run_count = 1; |
124 | static bool no_inherit = false; | 106 | static bool no_inherit = false; |
125 | static bool scale = true; | 107 | static bool scale = true; |
@@ -147,10 +129,6 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); | |||
147 | 129 | ||
148 | static volatile int done = 0; | 130 | static volatile int done = 0; |
149 | 131 | ||
150 | struct perf_stat { | ||
151 | struct stats res_stats[3]; | ||
152 | }; | ||
153 | |||
154 | static inline void diff_timespec(struct timespec *r, struct timespec *a, | 132 | static inline void diff_timespec(struct timespec *r, struct timespec *a, |
155 | struct timespec *b) | 133 | struct timespec *b) |
156 | { | 134 | { |
@@ -180,6 +158,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) | |||
180 | 158 | ||
181 | for (i = 0; i < 3; i++) | 159 | for (i = 0; i < 3; i++) |
182 | init_stats(&ps->res_stats[i]); | 160 | init_stats(&ps->res_stats[i]); |
161 | |||
162 | perf_stat_evsel_id_init(evsel); | ||
183 | } | 163 | } |
184 | 164 | ||
185 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | 165 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) |
@@ -247,51 +227,6 @@ out_free: | |||
247 | return -1; | 227 | return -1; |
248 | } | 228 | } |
249 | 229 | ||
250 | enum { | ||
251 | CTX_BIT_USER = 1 << 0, | ||
252 | CTX_BIT_KERNEL = 1 << 1, | ||
253 | CTX_BIT_HV = 1 << 2, | ||
254 | CTX_BIT_HOST = 1 << 3, | ||
255 | CTX_BIT_IDLE = 1 << 4, | ||
256 | CTX_BIT_MAX = 1 << 5, | ||
257 | }; | ||
258 | |||
259 | #define NUM_CTX CTX_BIT_MAX | ||
260 | |||
261 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | ||
262 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | ||
263 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | ||
264 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | ||
265 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | ||
266 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | ||
267 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
268 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
269 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
270 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
271 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
272 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | ||
273 | static struct stats walltime_nsecs_stats; | ||
274 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | ||
275 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | ||
276 | |||
277 | static int evsel_context(struct perf_evsel *evsel) | ||
278 | { | ||
279 | int ctx = 0; | ||
280 | |||
281 | if (evsel->attr.exclude_kernel) | ||
282 | ctx |= CTX_BIT_KERNEL; | ||
283 | if (evsel->attr.exclude_user) | ||
284 | ctx |= CTX_BIT_USER; | ||
285 | if (evsel->attr.exclude_hv) | ||
286 | ctx |= CTX_BIT_HV; | ||
287 | if (evsel->attr.exclude_host) | ||
288 | ctx |= CTX_BIT_HOST; | ||
289 | if (evsel->attr.exclude_idle) | ||
290 | ctx |= CTX_BIT_IDLE; | ||
291 | |||
292 | return ctx; | ||
293 | } | ||
294 | |||
295 | static void perf_stat__reset_stats(struct perf_evlist *evlist) | 230 | static void perf_stat__reset_stats(struct perf_evlist *evlist) |
296 | { | 231 | { |
297 | struct perf_evsel *evsel; | 232 | struct perf_evsel *evsel; |
@@ -301,23 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) | |||
301 | perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); | 236 | perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); |
302 | } | 237 | } |
303 | 238 | ||
304 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | 239 | perf_stat__reset_shadow_stats(); |
305 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | ||
306 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
307 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
308 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
309 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
310 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
311 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
312 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
313 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
314 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
315 | memset(runtime_cycles_in_tx_stats, 0, | ||
316 | sizeof(runtime_cycles_in_tx_stats)); | ||
317 | memset(runtime_transaction_stats, 0, | ||
318 | sizeof(runtime_transaction_stats)); | ||
319 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
320 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
321 | } | 240 | } |
322 | 241 | ||
323 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 242 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -354,72 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
354 | return 0; | 273 | return 0; |
355 | } | 274 | } |
356 | 275 | ||
357 | static struct perf_evsel *nth_evsel(int n) | ||
358 | { | ||
359 | static struct perf_evsel **array; | ||
360 | static int array_len; | ||
361 | struct perf_evsel *ev; | ||
362 | int j; | ||
363 | |||
364 | /* Assumes this only called when evsel_list does not change anymore. */ | ||
365 | if (!array) { | ||
366 | evlist__for_each(evsel_list, ev) | ||
367 | array_len++; | ||
368 | array = malloc(array_len * sizeof(void *)); | ||
369 | if (!array) | ||
370 | exit(ENOMEM); | ||
371 | j = 0; | ||
372 | evlist__for_each(evsel_list, ev) | ||
373 | array[j++] = ev; | ||
374 | } | ||
375 | if (n < array_len) | ||
376 | return array[n]; | ||
377 | return NULL; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Update various tracking values we maintain to print | ||
382 | * more semantic information such as miss/hit ratios, | ||
383 | * instruction rates, etc: | ||
384 | */ | ||
385 | static void update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
386 | int cpu) | ||
387 | { | ||
388 | int ctx = evsel_context(counter); | ||
389 | |||
390 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
391 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
392 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
393 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | ||
394 | else if (transaction_run && | ||
395 | perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) | ||
396 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
397 | else if (transaction_run && | ||
398 | perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) | ||
399 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
400 | else if (transaction_run && | ||
401 | perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) | ||
402 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | ||
403 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
404 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | ||
405 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
406 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | ||
407 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
408 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | ||
409 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
410 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | ||
411 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
412 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | ||
413 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
414 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
415 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
416 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
417 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
418 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | ||
419 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
420 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | ||
421 | } | ||
422 | |||
423 | static void zero_per_pkg(struct perf_evsel *counter) | 276 | static void zero_per_pkg(struct perf_evsel *counter) |
424 | { | 277 | { |
425 | if (counter->per_pkg_mask) | 278 | if (counter->per_pkg_mask) |
@@ -480,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, | |||
480 | perf_counts_values__scale(count, scale, NULL); | 333 | perf_counts_values__scale(count, scale, NULL); |
481 | evsel->counts->cpu[cpu] = *count; | 334 | evsel->counts->cpu[cpu] = *count; |
482 | if (aggr_mode == AGGR_NONE) | 335 | if (aggr_mode == AGGR_NONE) |
483 | update_shadow_stats(evsel, count->values, cpu); | 336 | perf_stat__update_shadow_stats(evsel, count->values, cpu); |
484 | break; | 337 | break; |
485 | case AGGR_GLOBAL: | 338 | case AGGR_GLOBAL: |
486 | aggr->val += count->val; | 339 | aggr->val += count->val; |
@@ -528,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
528 | /* | 381 | /* |
529 | * Save the full runtime - to allow normalization during printout: | 382 | * Save the full runtime - to allow normalization during printout: |
530 | */ | 383 | */ |
531 | update_shadow_stats(counter, count, 0); | 384 | perf_stat__update_shadow_stats(counter, count, 0); |
532 | 385 | ||
533 | return 0; | 386 | return 0; |
534 | } | 387 | } |
@@ -906,200 +759,11 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
906 | fprintf(output, " "); | 759 | fprintf(output, " "); |
907 | } | 760 | } |
908 | 761 | ||
909 | /* used for get_ratio_color() */ | ||
910 | enum grc_type { | ||
911 | GRC_STALLED_CYCLES_FE, | ||
912 | GRC_STALLED_CYCLES_BE, | ||
913 | GRC_CACHE_MISSES, | ||
914 | GRC_MAX_NR | ||
915 | }; | ||
916 | |||
917 | static const char *get_ratio_color(enum grc_type type, double ratio) | ||
918 | { | ||
919 | static const double grc_table[GRC_MAX_NR][3] = { | ||
920 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | ||
921 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | ||
922 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | ||
923 | }; | ||
924 | const char *color = PERF_COLOR_NORMAL; | ||
925 | |||
926 | if (ratio > grc_table[type][0]) | ||
927 | color = PERF_COLOR_RED; | ||
928 | else if (ratio > grc_table[type][1]) | ||
929 | color = PERF_COLOR_MAGENTA; | ||
930 | else if (ratio > grc_table[type][2]) | ||
931 | color = PERF_COLOR_YELLOW; | ||
932 | |||
933 | return color; | ||
934 | } | ||
935 | |||
936 | static void print_stalled_cycles_frontend(int cpu, | ||
937 | struct perf_evsel *evsel | ||
938 | __maybe_unused, double avg) | ||
939 | { | ||
940 | double total, ratio = 0.0; | ||
941 | const char *color; | ||
942 | int ctx = evsel_context(evsel); | ||
943 | |||
944 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
945 | |||
946 | if (total) | ||
947 | ratio = avg / total * 100.0; | ||
948 | |||
949 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | ||
950 | |||
951 | fprintf(output, " # "); | ||
952 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
953 | fprintf(output, " frontend cycles idle "); | ||
954 | } | ||
955 | |||
956 | static void print_stalled_cycles_backend(int cpu, | ||
957 | struct perf_evsel *evsel | ||
958 | __maybe_unused, double avg) | ||
959 | { | ||
960 | double total, ratio = 0.0; | ||
961 | const char *color; | ||
962 | int ctx = evsel_context(evsel); | ||
963 | |||
964 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
965 | |||
966 | if (total) | ||
967 | ratio = avg / total * 100.0; | ||
968 | |||
969 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | ||
970 | |||
971 | fprintf(output, " # "); | ||
972 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
973 | fprintf(output, " backend cycles idle "); | ||
974 | } | ||
975 | |||
976 | static void print_branch_misses(int cpu, | ||
977 | struct perf_evsel *evsel __maybe_unused, | ||
978 | double avg) | ||
979 | { | ||
980 | double total, ratio = 0.0; | ||
981 | const char *color; | ||
982 | int ctx = evsel_context(evsel); | ||
983 | |||
984 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | ||
985 | |||
986 | if (total) | ||
987 | ratio = avg / total * 100.0; | ||
988 | |||
989 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
990 | |||
991 | fprintf(output, " # "); | ||
992 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
993 | fprintf(output, " of all branches "); | ||
994 | } | ||
995 | |||
996 | static void print_l1_dcache_misses(int cpu, | ||
997 | struct perf_evsel *evsel __maybe_unused, | ||
998 | double avg) | ||
999 | { | ||
1000 | double total, ratio = 0.0; | ||
1001 | const char *color; | ||
1002 | int ctx = evsel_context(evsel); | ||
1003 | |||
1004 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | ||
1005 | |||
1006 | if (total) | ||
1007 | ratio = avg / total * 100.0; | ||
1008 | |||
1009 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1010 | |||
1011 | fprintf(output, " # "); | ||
1012 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
1013 | fprintf(output, " of all L1-dcache hits "); | ||
1014 | } | ||
1015 | |||
1016 | static void print_l1_icache_misses(int cpu, | ||
1017 | struct perf_evsel *evsel __maybe_unused, | ||
1018 | double avg) | ||
1019 | { | ||
1020 | double total, ratio = 0.0; | ||
1021 | const char *color; | ||
1022 | int ctx = evsel_context(evsel); | ||
1023 | |||
1024 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | ||
1025 | |||
1026 | if (total) | ||
1027 | ratio = avg / total * 100.0; | ||
1028 | |||
1029 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1030 | |||
1031 | fprintf(output, " # "); | ||
1032 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
1033 | fprintf(output, " of all L1-icache hits "); | ||
1034 | } | ||
1035 | |||
1036 | static void print_dtlb_cache_misses(int cpu, | ||
1037 | struct perf_evsel *evsel __maybe_unused, | ||
1038 | double avg) | ||
1039 | { | ||
1040 | double total, ratio = 0.0; | ||
1041 | const char *color; | ||
1042 | int ctx = evsel_context(evsel); | ||
1043 | |||
1044 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | ||
1045 | |||
1046 | if (total) | ||
1047 | ratio = avg / total * 100.0; | ||
1048 | |||
1049 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1050 | |||
1051 | fprintf(output, " # "); | ||
1052 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
1053 | fprintf(output, " of all dTLB cache hits "); | ||
1054 | } | ||
1055 | |||
1056 | static void print_itlb_cache_misses(int cpu, | ||
1057 | struct perf_evsel *evsel __maybe_unused, | ||
1058 | double avg) | ||
1059 | { | ||
1060 | double total, ratio = 0.0; | ||
1061 | const char *color; | ||
1062 | int ctx = evsel_context(evsel); | ||
1063 | |||
1064 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | ||
1065 | |||
1066 | if (total) | ||
1067 | ratio = avg / total * 100.0; | ||
1068 | |||
1069 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1070 | |||
1071 | fprintf(output, " # "); | ||
1072 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
1073 | fprintf(output, " of all iTLB cache hits "); | ||
1074 | } | ||
1075 | |||
1076 | static void print_ll_cache_misses(int cpu, | ||
1077 | struct perf_evsel *evsel __maybe_unused, | ||
1078 | double avg) | ||
1079 | { | ||
1080 | double total, ratio = 0.0; | ||
1081 | const char *color; | ||
1082 | int ctx = evsel_context(evsel); | ||
1083 | |||
1084 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | ||
1085 | |||
1086 | if (total) | ||
1087 | ratio = avg / total * 100.0; | ||
1088 | |||
1089 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1090 | |||
1091 | fprintf(output, " # "); | ||
1092 | color_fprintf(output, color, "%6.2f%%", ratio); | ||
1093 | fprintf(output, " of all LL-cache hits "); | ||
1094 | } | ||
1095 | |||
1096 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 762 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
1097 | { | 763 | { |
1098 | double total, ratio = 0.0, total2; | ||
1099 | double sc = evsel->scale; | 764 | double sc = evsel->scale; |
1100 | const char *fmt; | 765 | const char *fmt; |
1101 | int cpu = cpu_map__id_to_cpu(id); | 766 | int cpu = cpu_map__id_to_cpu(id); |
1102 | int ctx = evsel_context(evsel); | ||
1103 | 767 | ||
1104 | if (csv_output) { | 768 | if (csv_output) { |
1105 | fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; | 769 | fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; |
@@ -1130,138 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
1130 | if (csv_output || interval) | 794 | if (csv_output || interval) |
1131 | return; | 795 | return; |
1132 | 796 | ||
1133 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | 797 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); |
1134 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1135 | if (total) { | ||
1136 | ratio = avg / total; | ||
1137 | fprintf(output, " # %5.2f insns per cycle ", ratio); | ||
1138 | } else { | ||
1139 | fprintf(output, " "); | ||
1140 | } | ||
1141 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | ||
1142 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | ||
1143 | |||
1144 | if (total && avg) { | ||
1145 | ratio = total / avg; | ||
1146 | fprintf(output, "\n"); | ||
1147 | if (aggr_mode == AGGR_NONE) | ||
1148 | fprintf(output, " "); | ||
1149 | fprintf(output, " # %5.2f stalled cycles per insn", ratio); | ||
1150 | } | ||
1151 | |||
1152 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | ||
1153 | runtime_branches_stats[ctx][cpu].n != 0) { | ||
1154 | print_branch_misses(cpu, evsel, avg); | ||
1155 | } else if ( | ||
1156 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1157 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
1158 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1159 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1160 | runtime_l1_dcache_stats[ctx][cpu].n != 0) { | ||
1161 | print_l1_dcache_misses(cpu, evsel, avg); | ||
1162 | } else if ( | ||
1163 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1164 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
1165 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1166 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1167 | runtime_l1_icache_stats[ctx][cpu].n != 0) { | ||
1168 | print_l1_icache_misses(cpu, evsel, avg); | ||
1169 | } else if ( | ||
1170 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1171 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
1172 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1173 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1174 | runtime_dtlb_cache_stats[ctx][cpu].n != 0) { | ||
1175 | print_dtlb_cache_misses(cpu, evsel, avg); | ||
1176 | } else if ( | ||
1177 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1178 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
1179 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1180 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1181 | runtime_itlb_cache_stats[ctx][cpu].n != 0) { | ||
1182 | print_itlb_cache_misses(cpu, evsel, avg); | ||
1183 | } else if ( | ||
1184 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1185 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
1186 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1187 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1188 | runtime_ll_cache_stats[ctx][cpu].n != 0) { | ||
1189 | print_ll_cache_misses(cpu, evsel, avg); | ||
1190 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
1191 | runtime_cacherefs_stats[ctx][cpu].n != 0) { | ||
1192 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); | ||
1193 | |||
1194 | if (total) | ||
1195 | ratio = avg * 100 / total; | ||
1196 | |||
1197 | fprintf(output, " # %8.3f %% of all cache refs ", ratio); | ||
1198 | |||
1199 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
1200 | print_stalled_cycles_frontend(cpu, evsel, avg); | ||
1201 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
1202 | print_stalled_cycles_backend(cpu, evsel, avg); | ||
1203 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
1204 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
1205 | |||
1206 | if (total) { | ||
1207 | ratio = avg / total; | ||
1208 | fprintf(output, " # %8.3f GHz ", ratio); | ||
1209 | } else { | ||
1210 | fprintf(output, " "); | ||
1211 | } | ||
1212 | } else if (transaction_run && | ||
1213 | perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { | ||
1214 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1215 | if (total) | ||
1216 | fprintf(output, | ||
1217 | " # %5.2f%% transactional cycles ", | ||
1218 | 100.0 * (avg / total)); | ||
1219 | } else if (transaction_run && | ||
1220 | perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { | ||
1221 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1222 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1223 | if (total2 < avg) | ||
1224 | total2 = avg; | ||
1225 | if (total) | ||
1226 | fprintf(output, | ||
1227 | " # %5.2f%% aborted cycles ", | ||
1228 | 100.0 * ((total2-avg) / total)); | ||
1229 | } else if (transaction_run && | ||
1230 | perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && | ||
1231 | avg > 0 && | ||
1232 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
1233 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1234 | |||
1235 | if (total) | ||
1236 | ratio = total / avg; | ||
1237 | |||
1238 | fprintf(output, " # %8.0f cycles / transaction ", ratio); | ||
1239 | } else if (transaction_run && | ||
1240 | perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && | ||
1241 | avg > 0 && | ||
1242 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
1243 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1244 | |||
1245 | if (total) | ||
1246 | ratio = total / avg; | ||
1247 | |||
1248 | fprintf(output, " # %8.0f cycles / elision ", ratio); | ||
1249 | } else if (runtime_nsecs_stats[cpu].n != 0) { | ||
1250 | char unit = 'M'; | ||
1251 | |||
1252 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
1253 | |||
1254 | if (total) | ||
1255 | ratio = 1000.0 * avg / total; | ||
1256 | if (ratio < 0.001) { | ||
1257 | ratio *= 1000; | ||
1258 | unit = 'K'; | ||
1259 | } | ||
1260 | |||
1261 | fprintf(output, " # %8.3f %c/sec ", ratio, unit); | ||
1262 | } else { | ||
1263 | fprintf(output, " "); | ||
1264 | } | ||
1265 | } | 798 | } |
1266 | 799 | ||
1267 | static void print_aggr(char *prefix) | 800 | static void print_aggr(char *prefix) |
@@ -1576,17 +1109,6 @@ static int perf_stat_init_aggr_mode(void) | |||
1576 | return 0; | 1109 | return 0; |
1577 | } | 1110 | } |
1578 | 1111 | ||
1579 | static int setup_events(const char * const *attrs, unsigned len) | ||
1580 | { | ||
1581 | unsigned i; | ||
1582 | |||
1583 | for (i = 0; i < len; i++) { | ||
1584 | if (parse_events(evsel_list, attrs[i], NULL)) | ||
1585 | return -1; | ||
1586 | } | ||
1587 | return 0; | ||
1588 | } | ||
1589 | |||
1590 | /* | 1112 | /* |
1591 | * Add default attributes, if there were no attributes specified or | 1113 | * Add default attributes, if there were no attributes specified or |
1592 | * if -d/--detailed, -d -d or -d -d -d is used: | 1114 | * if -d/--detailed, -d -d or -d -d -d is used: |
@@ -1708,12 +1230,10 @@ static int add_default_attributes(void) | |||
1708 | int err; | 1230 | int err; |
1709 | if (pmu_have_event("cpu", "cycles-ct") && | 1231 | if (pmu_have_event("cpu", "cycles-ct") && |
1710 | pmu_have_event("cpu", "el-start")) | 1232 | pmu_have_event("cpu", "el-start")) |
1711 | err = setup_events(transaction_attrs, | 1233 | err = parse_events(evsel_list, transaction_attrs, NULL); |
1712 | ARRAY_SIZE(transaction_attrs)); | ||
1713 | else | 1234 | else |
1714 | err = setup_events(transaction_limited_attrs, | 1235 | err = parse_events(evsel_list, transaction_limited_attrs, NULL); |
1715 | ARRAY_SIZE(transaction_limited_attrs)); | 1236 | if (err) { |
1716 | if (err < 0) { | ||
1717 | fprintf(stderr, "Cannot set up transaction events\n"); | 1237 | fprintf(stderr, "Cannot set up transaction events\n"); |
1718 | return -1; | 1238 | return -1; |
1719 | } | 1239 | } |
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3e41c61bd861..a218aeaf56a0 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c | |||
@@ -166,7 +166,7 @@ int test__dso_data(void) | |||
166 | free(buf); | 166 | free(buf); |
167 | } | 167 | } |
168 | 168 | ||
169 | dso__delete(dso); | 169 | dso__put(dso); |
170 | unlink(file); | 170 | unlink(file); |
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
@@ -226,7 +226,7 @@ static void dsos__delete(int cnt) | |||
226 | struct dso *dso = dsos[i]; | 226 | struct dso *dso = dsos[i]; |
227 | 227 | ||
228 | unlink(dso->name); | 228 | unlink(dso->name); |
229 | dso__delete(dso); | 229 | dso__put(dso); |
230 | } | 230 | } |
231 | 231 | ||
232 | free(dsos); | 232 | free(dsos); |
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 915f60af6a0e..ce80b274b097 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c | |||
@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines) | |||
134 | 134 | ||
135 | sym = symbol__new(fsym->start, fsym->length, | 135 | sym = symbol__new(fsym->start, fsym->length, |
136 | STB_GLOBAL, fsym->name); | 136 | STB_GLOBAL, fsym->name); |
137 | if (sym == NULL) | 137 | if (sym == NULL) { |
138 | dso__put(dso); | ||
138 | goto out; | 139 | goto out; |
140 | } | ||
139 | 141 | ||
140 | symbols__insert(&dso->symbols[MAP__FUNCTION], sym); | 142 | symbols__insert(&dso->symbols[MAP__FUNCTION], sym); |
141 | } | 143 | } |
144 | |||
145 | dso__put(dso); | ||
142 | } | 146 | } |
143 | 147 | ||
144 | return machine; | 148 | return machine; |
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e4b676de2f64..586a59d46022 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
@@ -68,6 +68,7 @@ libperf-y += rblist.o | |||
68 | libperf-y += intlist.o | 68 | libperf-y += intlist.o |
69 | libperf-y += vdso.o | 69 | libperf-y += vdso.o |
70 | libperf-y += stat.o | 70 | libperf-y += stat.o |
71 | libperf-y += stat-shadow.o | ||
71 | libperf-y += record.o | 72 | libperf-y += record.o |
72 | libperf-y += srcline.o | 73 | libperf-y += srcline.o |
73 | libperf-y += data.o | 74 | libperf-y += data.o |
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5ec9e892c89b..7c0c08386a1d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c | |||
@@ -889,8 +889,8 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, | |||
889 | * Either one of the dso or name parameter must be non-NULL or the | 889 | * Either one of the dso or name parameter must be non-NULL or the |
890 | * function will not work. | 890 | * function will not work. |
891 | */ | 891 | */ |
892 | static struct dso *dso__findlink_by_longname(struct rb_root *root, | 892 | static struct dso *__dso__findlink_by_longname(struct rb_root *root, |
893 | struct dso *dso, const char *name) | 893 | struct dso *dso, const char *name) |
894 | { | 894 | { |
895 | struct rb_node **p = &root->rb_node; | 895 | struct rb_node **p = &root->rb_node; |
896 | struct rb_node *parent = NULL; | 896 | struct rb_node *parent = NULL; |
@@ -937,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root, | |||
937 | return NULL; | 937 | return NULL; |
938 | } | 938 | } |
939 | 939 | ||
940 | static inline struct dso * | 940 | static inline struct dso *__dso__find_by_longname(struct rb_root *root, |
941 | dso__find_by_longname(const struct rb_root *root, const char *name) | 941 | const char *name) |
942 | { | 942 | { |
943 | return dso__findlink_by_longname((struct rb_root *)root, NULL, name); | 943 | return __dso__findlink_by_longname(root, NULL, name); |
944 | } | 944 | } |
945 | 945 | ||
946 | void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) | 946 | void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) |
@@ -1049,6 +1049,7 @@ struct dso *dso__new(const char *name) | |||
1049 | INIT_LIST_HEAD(&dso->node); | 1049 | INIT_LIST_HEAD(&dso->node); |
1050 | INIT_LIST_HEAD(&dso->data.open_entry); | 1050 | INIT_LIST_HEAD(&dso->data.open_entry); |
1051 | pthread_mutex_init(&dso->lock, NULL); | 1051 | pthread_mutex_init(&dso->lock, NULL); |
1052 | atomic_set(&dso->refcnt, 1); | ||
1052 | } | 1053 | } |
1053 | 1054 | ||
1054 | return dso; | 1055 | return dso; |
@@ -1083,6 +1084,19 @@ void dso__delete(struct dso *dso) | |||
1083 | free(dso); | 1084 | free(dso); |
1084 | } | 1085 | } |
1085 | 1086 | ||
1087 | struct dso *dso__get(struct dso *dso) | ||
1088 | { | ||
1089 | if (dso) | ||
1090 | atomic_inc(&dso->refcnt); | ||
1091 | return dso; | ||
1092 | } | ||
1093 | |||
1094 | void dso__put(struct dso *dso) | ||
1095 | { | ||
1096 | if (dso && atomic_dec_and_test(&dso->refcnt)) | ||
1097 | dso__delete(dso); | ||
1098 | } | ||
1099 | |||
1086 | void dso__set_build_id(struct dso *dso, void *build_id) | 1100 | void dso__set_build_id(struct dso *dso, void *build_id) |
1087 | { | 1101 | { |
1088 | memcpy(dso->build_id, build_id, sizeof(dso->build_id)); | 1102 | memcpy(dso->build_id, build_id, sizeof(dso->build_id)); |
@@ -1149,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) | |||
1149 | return have_build_id; | 1163 | return have_build_id; |
1150 | } | 1164 | } |
1151 | 1165 | ||
1152 | void dsos__add(struct dsos *dsos, struct dso *dso) | 1166 | void __dsos__add(struct dsos *dsos, struct dso *dso) |
1153 | { | 1167 | { |
1154 | list_add_tail(&dso->node, &dsos->head); | 1168 | list_add_tail(&dso->node, &dsos->head); |
1155 | dso__findlink_by_longname(&dsos->root, dso, NULL); | 1169 | __dso__findlink_by_longname(&dsos->root, dso, NULL); |
1170 | /* | ||
1171 | * It is now in the linked list, grab a reference, then garbage collect | ||
1172 | * this when needing memory, by looking at LRU dso instances in the | ||
1173 | * list with atomic_read(&dso->refcnt) == 1, i.e. no references | ||
1174 | * anywhere besides the one for the list, do, under a lock for the | ||
1175 | * list: remove it from the list, then a dso__put(), that probably will | ||
1176 | * be the last and will then call dso__delete(), end of life. | ||
1177 | * | ||
1178 | * That, or at the end of the 'struct machine' lifetime, when all | ||
1179 | * 'struct dso' instances will be removed from the list, in | ||
1180 | * dsos__exit(), if they have no other reference from some other data | ||
1181 | * structure. | ||
1182 | * | ||
1183 | * E.g.: after processing a 'perf.data' file and storing references | ||
1184 | * to objects instantiated while processing events, we will have | ||
1185 | * references to the 'thread', 'map', 'dso' structs all from 'struct | ||
1186 | * hist_entry' instances, but we may not need anything not referenced, | ||
1187 | * so we might as well call machines__exit()/machines__delete() and | ||
1188 | * garbage collect it. | ||
1189 | */ | ||
1190 | dso__get(dso); | ||
1156 | } | 1191 | } |
1157 | 1192 | ||
1158 | struct dso *dsos__find(const struct dsos *dsos, const char *name, | 1193 | void dsos__add(struct dsos *dsos, struct dso *dso) |
1159 | bool cmp_short) | 1194 | { |
1195 | pthread_rwlock_wrlock(&dsos->lock); | ||
1196 | __dsos__add(dsos, dso); | ||
1197 | pthread_rwlock_unlock(&dsos->lock); | ||
1198 | } | ||
1199 | |||
1200 | struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) | ||
1160 | { | 1201 | { |
1161 | struct dso *pos; | 1202 | struct dso *pos; |
1162 | 1203 | ||
@@ -1166,15 +1207,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, | |||
1166 | return pos; | 1207 | return pos; |
1167 | return NULL; | 1208 | return NULL; |
1168 | } | 1209 | } |
1169 | return dso__find_by_longname(&dsos->root, name); | 1210 | return __dso__find_by_longname(&dsos->root, name); |
1170 | } | 1211 | } |
1171 | 1212 | ||
1172 | struct dso *dsos__addnew(struct dsos *dsos, const char *name) | 1213 | struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) |
1214 | { | ||
1215 | struct dso *dso; | ||
1216 | pthread_rwlock_rdlock(&dsos->lock); | ||
1217 | dso = __dsos__find(dsos, name, cmp_short); | ||
1218 | pthread_rwlock_unlock(&dsos->lock); | ||
1219 | return dso; | ||
1220 | } | ||
1221 | |||
1222 | struct dso *__dsos__addnew(struct dsos *dsos, const char *name) | ||
1173 | { | 1223 | { |
1174 | struct dso *dso = dso__new(name); | 1224 | struct dso *dso = dso__new(name); |
1175 | 1225 | ||
1176 | if (dso != NULL) { | 1226 | if (dso != NULL) { |
1177 | dsos__add(dsos, dso); | 1227 | __dsos__add(dsos, dso); |
1178 | dso__set_basename(dso); | 1228 | dso__set_basename(dso); |
1179 | } | 1229 | } |
1180 | return dso; | 1230 | return dso; |
@@ -1182,9 +1232,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name) | |||
1182 | 1232 | ||
1183 | struct dso *__dsos__findnew(struct dsos *dsos, const char *name) | 1233 | struct dso *__dsos__findnew(struct dsos *dsos, const char *name) |
1184 | { | 1234 | { |
1185 | struct dso *dso = dsos__find(dsos, name, false); | 1235 | struct dso *dso = __dsos__find(dsos, name, false); |
1236 | |||
1237 | return dso ? dso : __dsos__addnew(dsos, name); | ||
1238 | } | ||
1186 | 1239 | ||
1187 | return dso ? dso : dsos__addnew(dsos, name); | 1240 | struct dso *dsos__findnew(struct dsos *dsos, const char *name) |
1241 | { | ||
1242 | struct dso *dso; | ||
1243 | pthread_rwlock_wrlock(&dsos->lock); | ||
1244 | dso = dso__get(__dsos__findnew(dsos, name)); | ||
1245 | pthread_rwlock_unlock(&dsos->lock); | ||
1246 | return dso; | ||
1188 | } | 1247 | } |
1189 | 1248 | ||
1190 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, | 1249 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, |
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ba2d90ed881f..2fe98bb0e95b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h | |||
@@ -1,9 +1,11 @@ | |||
1 | #ifndef __PERF_DSO | 1 | #ifndef __PERF_DSO |
2 | #define __PERF_DSO | 2 | #define __PERF_DSO |
3 | 3 | ||
4 | #include <linux/atomic.h> | ||
4 | #include <linux/types.h> | 5 | #include <linux/types.h> |
5 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
6 | #include <stdbool.h> | 7 | #include <stdbool.h> |
8 | #include <pthread.h> | ||
7 | #include <linux/types.h> | 9 | #include <linux/types.h> |
8 | #include <linux/bitops.h> | 10 | #include <linux/bitops.h> |
9 | #include "map.h" | 11 | #include "map.h" |
@@ -124,6 +126,7 @@ struct dso_cache { | |||
124 | struct dsos { | 126 | struct dsos { |
125 | struct list_head head; | 127 | struct list_head head; |
126 | struct rb_root root; /* rbtree root sorted by long name */ | 128 | struct rb_root root; /* rbtree root sorted by long name */ |
129 | pthread_rwlock_t lock; | ||
127 | }; | 130 | }; |
128 | 131 | ||
129 | struct auxtrace_cache; | 132 | struct auxtrace_cache; |
@@ -177,7 +180,7 @@ struct dso { | |||
177 | void *priv; | 180 | void *priv; |
178 | u64 db_id; | 181 | u64 db_id; |
179 | }; | 182 | }; |
180 | 183 | atomic_t refcnt; | |
181 | char name[0]; | 184 | char name[0]; |
182 | }; | 185 | }; |
183 | 186 | ||
@@ -204,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); | |||
204 | 207 | ||
205 | int dso__name_len(const struct dso *dso); | 208 | int dso__name_len(const struct dso *dso); |
206 | 209 | ||
210 | struct dso *dso__get(struct dso *dso); | ||
211 | void dso__put(struct dso *dso); | ||
212 | |||
213 | static inline void __dso__zput(struct dso **dso) | ||
214 | { | ||
215 | dso__put(*dso); | ||
216 | *dso = NULL; | ||
217 | } | ||
218 | |||
219 | #define dso__zput(dso) __dso__zput(&dso) | ||
220 | |||
207 | bool dso__loaded(const struct dso *dso, enum map_type type); | 221 | bool dso__loaded(const struct dso *dso, enum map_type type); |
208 | 222 | ||
209 | bool dso__sorted_by_name(const struct dso *dso, enum map_type type); | 223 | bool dso__sorted_by_name(const struct dso *dso, enum map_type type); |
@@ -297,11 +311,13 @@ struct map *dso__new_map(const char *name); | |||
297 | struct dso *machine__findnew_kernel(struct machine *machine, const char *name, | 311 | struct dso *machine__findnew_kernel(struct machine *machine, const char *name, |
298 | const char *short_name, int dso_type); | 312 | const char *short_name, int dso_type); |
299 | 313 | ||
314 | void __dsos__add(struct dsos *dsos, struct dso *dso); | ||
300 | void dsos__add(struct dsos *dsos, struct dso *dso); | 315 | void dsos__add(struct dsos *dsos, struct dso *dso); |
301 | struct dso *dsos__addnew(struct dsos *dsos, const char *name); | 316 | struct dso *__dsos__addnew(struct dsos *dsos, const char *name); |
302 | struct dso *dsos__find(const struct dsos *dsos, const char *name, | 317 | struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); |
303 | bool cmp_short); | 318 | struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); |
304 | struct dso *__dsos__findnew(struct dsos *dsos, const char *name); | 319 | struct dso *__dsos__findnew(struct dsos *dsos, const char *name); |
320 | struct dso *dsos__findnew(struct dsos *dsos, const char *name); | ||
305 | bool __dsos__read_build_ids(struct list_head *head, bool with_hits); | 321 | bool __dsos__read_build_ids(struct list_head *head, bool with_hits); |
306 | 322 | ||
307 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, | 323 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ac5aaaeed7ff..21a77e7a171e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -1277,6 +1277,7 @@ static int __event_process_build_id(struct build_id_event *bev, | |||
1277 | sbuild_id); | 1277 | sbuild_id); |
1278 | pr_debug("build id event received for %s: %s\n", | 1278 | pr_debug("build id event received for %s: %s\n", |
1279 | dso->long_name, sbuild_id); | 1279 | dso->long_name, sbuild_id); |
1280 | dso__put(dso); | ||
1280 | } | 1281 | } |
1281 | 1282 | ||
1282 | err = 0; | 1283 | err = 0; |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f15ed24a22ac..132e35765101 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -20,6 +20,7 @@ static void dsos__init(struct dsos *dsos) | |||
20 | { | 20 | { |
21 | INIT_LIST_HEAD(&dsos->head); | 21 | INIT_LIST_HEAD(&dsos->head); |
22 | dsos->root = RB_ROOT; | 22 | dsos->root = RB_ROOT; |
23 | pthread_rwlock_init(&dsos->lock, NULL); | ||
23 | } | 24 | } |
24 | 25 | ||
25 | int machine__init(struct machine *machine, const char *root_dir, pid_t pid) | 26 | int machine__init(struct machine *machine, const char *root_dir, pid_t pid) |
@@ -81,15 +82,25 @@ out_delete: | |||
81 | return NULL; | 82 | return NULL; |
82 | } | 83 | } |
83 | 84 | ||
84 | static void dsos__delete(struct dsos *dsos) | 85 | static void dsos__purge(struct dsos *dsos) |
85 | { | 86 | { |
86 | struct dso *pos, *n; | 87 | struct dso *pos, *n; |
87 | 88 | ||
89 | pthread_rwlock_wrlock(&dsos->lock); | ||
90 | |||
88 | list_for_each_entry_safe(pos, n, &dsos->head, node) { | 91 | list_for_each_entry_safe(pos, n, &dsos->head, node) { |
89 | RB_CLEAR_NODE(&pos->rb_node); | 92 | RB_CLEAR_NODE(&pos->rb_node); |
90 | list_del(&pos->node); | 93 | list_del_init(&pos->node); |
91 | dso__delete(pos); | 94 | dso__put(pos); |
92 | } | 95 | } |
96 | |||
97 | pthread_rwlock_unlock(&dsos->lock); | ||
98 | } | ||
99 | |||
100 | static void dsos__exit(struct dsos *dsos) | ||
101 | { | ||
102 | dsos__purge(dsos); | ||
103 | pthread_rwlock_destroy(&dsos->lock); | ||
93 | } | 104 | } |
94 | 105 | ||
95 | void machine__delete_threads(struct machine *machine) | 106 | void machine__delete_threads(struct machine *machine) |
@@ -110,7 +121,7 @@ void machine__delete_threads(struct machine *machine) | |||
110 | void machine__exit(struct machine *machine) | 121 | void machine__exit(struct machine *machine) |
111 | { | 122 | { |
112 | map_groups__exit(&machine->kmaps); | 123 | map_groups__exit(&machine->kmaps); |
113 | dsos__delete(&machine->dsos); | 124 | dsos__exit(&machine->dsos); |
114 | machine__exit_vdso(machine); | 125 | machine__exit_vdso(machine); |
115 | zfree(&machine->root_dir); | 126 | zfree(&machine->root_dir); |
116 | zfree(&machine->current_tid); | 127 | zfree(&machine->current_tid); |
@@ -490,17 +501,19 @@ int machine__process_lost_samples_event(struct machine *machine __maybe_unused, | |||
490 | return 0; | 501 | return 0; |
491 | } | 502 | } |
492 | 503 | ||
493 | static struct dso* | 504 | static struct dso *machine__findnew_module_dso(struct machine *machine, |
494 | machine__module_dso(struct machine *machine, struct kmod_path *m, | 505 | struct kmod_path *m, |
495 | const char *filename) | 506 | const char *filename) |
496 | { | 507 | { |
497 | struct dso *dso; | 508 | struct dso *dso; |
498 | 509 | ||
499 | dso = dsos__find(&machine->dsos, m->name, true); | 510 | pthread_rwlock_wrlock(&machine->dsos.lock); |
511 | |||
512 | dso = __dsos__find(&machine->dsos, m->name, true); | ||
500 | if (!dso) { | 513 | if (!dso) { |
501 | dso = dsos__addnew(&machine->dsos, m->name); | 514 | dso = __dsos__addnew(&machine->dsos, m->name); |
502 | if (dso == NULL) | 515 | if (dso == NULL) |
503 | return NULL; | 516 | goto out_unlock; |
504 | 517 | ||
505 | if (machine__is_host(machine)) | 518 | if (machine__is_host(machine)) |
506 | dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; | 519 | dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; |
@@ -515,6 +528,9 @@ machine__module_dso(struct machine *machine, struct kmod_path *m, | |||
515 | dso__set_long_name(dso, strdup(filename), true); | 528 | dso__set_long_name(dso, strdup(filename), true); |
516 | } | 529 | } |
517 | 530 | ||
531 | dso__get(dso); | ||
532 | out_unlock: | ||
533 | pthread_rwlock_unlock(&machine->dsos.lock); | ||
518 | return dso; | 534 | return dso; |
519 | } | 535 | } |
520 | 536 | ||
@@ -534,8 +550,8 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, | |||
534 | return 0; | 550 | return 0; |
535 | } | 551 | } |
536 | 552 | ||
537 | struct map *machine__new_module(struct machine *machine, u64 start, | 553 | struct map *machine__findnew_module_map(struct machine *machine, u64 start, |
538 | const char *filename) | 554 | const char *filename) |
539 | { | 555 | { |
540 | struct map *map = NULL; | 556 | struct map *map = NULL; |
541 | struct dso *dso; | 557 | struct dso *dso; |
@@ -549,7 +565,7 @@ struct map *machine__new_module(struct machine *machine, u64 start, | |||
549 | if (map) | 565 | if (map) |
550 | goto out; | 566 | goto out; |
551 | 567 | ||
552 | dso = machine__module_dso(machine, &m, filename); | 568 | dso = machine__findnew_module_dso(machine, &m, filename); |
553 | if (dso == NULL) | 569 | if (dso == NULL) |
554 | goto out; | 570 | goto out; |
555 | 571 | ||
@@ -1017,7 +1033,7 @@ static int machine__create_module(void *arg, const char *name, u64 start) | |||
1017 | struct machine *machine = arg; | 1033 | struct machine *machine = arg; |
1018 | struct map *map; | 1034 | struct map *map; |
1019 | 1035 | ||
1020 | map = machine__new_module(machine, start, name); | 1036 | map = machine__findnew_module_map(machine, start, name); |
1021 | if (map == NULL) | 1037 | if (map == NULL) |
1022 | return -1; | 1038 | return -1; |
1023 | 1039 | ||
@@ -1140,8 +1156,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, | |||
1140 | strlen(kmmap_prefix) - 1) == 0; | 1156 | strlen(kmmap_prefix) - 1) == 0; |
1141 | if (event->mmap.filename[0] == '/' || | 1157 | if (event->mmap.filename[0] == '/' || |
1142 | (!is_kernel_mmap && event->mmap.filename[0] == '[')) { | 1158 | (!is_kernel_mmap && event->mmap.filename[0] == '[')) { |
1143 | map = machine__new_module(machine, event->mmap.start, | 1159 | map = machine__findnew_module_map(machine, event->mmap.start, |
1144 | event->mmap.filename); | 1160 | event->mmap.filename); |
1145 | if (map == NULL) | 1161 | if (map == NULL) |
1146 | goto out_problem; | 1162 | goto out_problem; |
1147 | 1163 | ||
@@ -1156,6 +1172,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, | |||
1156 | struct dso *kernel = NULL; | 1172 | struct dso *kernel = NULL; |
1157 | struct dso *dso; | 1173 | struct dso *dso; |
1158 | 1174 | ||
1175 | pthread_rwlock_rdlock(&machine->dsos.lock); | ||
1176 | |||
1159 | list_for_each_entry(dso, &machine->dsos.head, node) { | 1177 | list_for_each_entry(dso, &machine->dsos.head, node) { |
1160 | 1178 | ||
1161 | /* | 1179 | /* |
@@ -1184,14 +1202,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine, | |||
1184 | break; | 1202 | break; |
1185 | } | 1203 | } |
1186 | 1204 | ||
1205 | pthread_rwlock_unlock(&machine->dsos.lock); | ||
1206 | |||
1187 | if (kernel == NULL) | 1207 | if (kernel == NULL) |
1188 | kernel = machine__findnew_dso(machine, kmmap_prefix); | 1208 | kernel = machine__findnew_dso(machine, kmmap_prefix); |
1189 | if (kernel == NULL) | 1209 | if (kernel == NULL) |
1190 | goto out_problem; | 1210 | goto out_problem; |
1191 | 1211 | ||
1192 | kernel->kernel = kernel_type; | 1212 | kernel->kernel = kernel_type; |
1193 | if (__machine__create_kernel_maps(machine, kernel) < 0) | 1213 | if (__machine__create_kernel_maps(machine, kernel) < 0) { |
1214 | dso__put(kernel); | ||
1194 | goto out_problem; | 1215 | goto out_problem; |
1216 | } | ||
1195 | 1217 | ||
1196 | if (strstr(kernel->long_name, "vmlinux")) | 1218 | if (strstr(kernel->long_name, "vmlinux")) |
1197 | dso__set_short_name(kernel, "[kernel.vmlinux]", false); | 1219 | dso__set_short_name(kernel, "[kernel.vmlinux]", false); |
@@ -1948,5 +1970,5 @@ int machine__get_kernel_start(struct machine *machine) | |||
1948 | 1970 | ||
1949 | struct dso *machine__findnew_dso(struct machine *machine, const char *filename) | 1971 | struct dso *machine__findnew_dso(struct machine *machine, const char *filename) |
1950 | { | 1972 | { |
1951 | return __dsos__findnew(&machine->dsos, filename); | 1973 | return dsos__findnew(&machine->dsos, filename); |
1952 | } | 1974 | } |
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 8e1f796fd137..ca267c41f28d 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h | |||
@@ -189,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, | |||
189 | filter); | 189 | filter); |
190 | } | 190 | } |
191 | 191 | ||
192 | struct map *machine__new_module(struct machine *machine, u64 start, | 192 | struct map *machine__findnew_module_map(struct machine *machine, u64 start, |
193 | const char *filename); | 193 | const char *filename); |
194 | 194 | ||
195 | int machine__load_kallsyms(struct machine *machine, const char *filename, | 195 | int machine__load_kallsyms(struct machine *machine, const char *filename, |
196 | enum map_type type, symbol_filter_t filter); | 196 | enum map_type type, symbol_filter_t filter); |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 365011c233a6..1241ab989cf5 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -132,7 +132,7 @@ void map__init(struct map *map, enum map_type type, | |||
132 | map->end = end; | 132 | map->end = end; |
133 | map->pgoff = pgoff; | 133 | map->pgoff = pgoff; |
134 | map->reloc = 0; | 134 | map->reloc = 0; |
135 | map->dso = dso; | 135 | map->dso = dso__get(dso); |
136 | map->map_ip = map__map_ip; | 136 | map->map_ip = map__map_ip; |
137 | map->unmap_ip = map__unmap_ip; | 137 | map->unmap_ip = map__unmap_ip; |
138 | RB_CLEAR_NODE(&map->rb_node); | 138 | RB_CLEAR_NODE(&map->rb_node); |
@@ -198,6 +198,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, | |||
198 | if (type != MAP__FUNCTION) | 198 | if (type != MAP__FUNCTION) |
199 | dso__set_loaded(dso, map->type); | 199 | dso__set_loaded(dso, map->type); |
200 | } | 200 | } |
201 | dso__put(dso); | ||
201 | } | 202 | } |
202 | return map; | 203 | return map; |
203 | out_delete: | 204 | out_delete: |
@@ -224,9 +225,15 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) | |||
224 | return map; | 225 | return map; |
225 | } | 226 | } |
226 | 227 | ||
227 | void map__delete(struct map *map) | 228 | static void map__exit(struct map *map) |
228 | { | 229 | { |
229 | BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); | 230 | BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); |
231 | dso__zput(map->dso); | ||
232 | } | ||
233 | |||
234 | void map__delete(struct map *map) | ||
235 | { | ||
236 | map__exit(map); | ||
230 | free(map); | 237 | free(map); |
231 | } | 238 | } |
232 | 239 | ||
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e6f215b7a052..d4cf50b91839 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -168,7 +168,7 @@ static struct map *kernel_get_module_map(const char *module) | |||
168 | 168 | ||
169 | /* A file path -- this is an offline module */ | 169 | /* A file path -- this is an offline module */ |
170 | if (module && strchr(module, '/')) | 170 | if (module && strchr(module, '/')) |
171 | return machine__new_module(host_machine, 0, module); | 171 | return machine__findnew_module_map(host_machine, 0, module); |
172 | 172 | ||
173 | if (!module) | 173 | if (!module) |
174 | module = "kernel"; | 174 | module = "kernel"; |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c50da392e256..2da65a710893 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path) | |||
130 | continue; | 130 | continue; |
131 | dinfo = __debuginfo__new(buf); | 131 | dinfo = __debuginfo__new(buf); |
132 | } | 132 | } |
133 | dso__delete(dso); | 133 | dso__put(dso); |
134 | 134 | ||
135 | out: | 135 | out: |
136 | /* if failed to open all distro debuginfo, open given binary */ | 136 | /* if failed to open all distro debuginfo, open given binary */ |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c new file mode 100644 index 000000000000..53e8bb7bc852 --- /dev/null +++ b/tools/perf/util/stat-shadow.c | |||
@@ -0,0 +1,434 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "evsel.h" | ||
3 | #include "stat.h" | ||
4 | #include "color.h" | ||
5 | |||
6 | enum { | ||
7 | CTX_BIT_USER = 1 << 0, | ||
8 | CTX_BIT_KERNEL = 1 << 1, | ||
9 | CTX_BIT_HV = 1 << 2, | ||
10 | CTX_BIT_HOST = 1 << 3, | ||
11 | CTX_BIT_IDLE = 1 << 4, | ||
12 | CTX_BIT_MAX = 1 << 5, | ||
13 | }; | ||
14 | |||
15 | #define NUM_CTX CTX_BIT_MAX | ||
16 | |||
17 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | ||
18 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | ||
19 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | ||
20 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | ||
21 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | ||
22 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | ||
23 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
24 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
25 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
26 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
27 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
28 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | ||
29 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | ||
30 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | ||
31 | |||
32 | struct stats walltime_nsecs_stats; | ||
33 | |||
34 | static int evsel_context(struct perf_evsel *evsel) | ||
35 | { | ||
36 | int ctx = 0; | ||
37 | |||
38 | if (evsel->attr.exclude_kernel) | ||
39 | ctx |= CTX_BIT_KERNEL; | ||
40 | if (evsel->attr.exclude_user) | ||
41 | ctx |= CTX_BIT_USER; | ||
42 | if (evsel->attr.exclude_hv) | ||
43 | ctx |= CTX_BIT_HV; | ||
44 | if (evsel->attr.exclude_host) | ||
45 | ctx |= CTX_BIT_HOST; | ||
46 | if (evsel->attr.exclude_idle) | ||
47 | ctx |= CTX_BIT_IDLE; | ||
48 | |||
49 | return ctx; | ||
50 | } | ||
51 | |||
52 | void perf_stat__reset_shadow_stats(void) | ||
53 | { | ||
54 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | ||
55 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | ||
56 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
57 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
58 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
59 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
60 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
61 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
62 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
63 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
64 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
65 | memset(runtime_cycles_in_tx_stats, 0, | ||
66 | sizeof(runtime_cycles_in_tx_stats)); | ||
67 | memset(runtime_transaction_stats, 0, | ||
68 | sizeof(runtime_transaction_stats)); | ||
69 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
70 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Update various tracking values we maintain to print | ||
75 | * more semantic information such as miss/hit ratios, | ||
76 | * instruction rates, etc: | ||
77 | */ | ||
78 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
79 | int cpu) | ||
80 | { | ||
81 | int ctx = evsel_context(counter); | ||
82 | |||
83 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
84 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
85 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
86 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | ||
87 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | ||
88 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
89 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | ||
90 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
91 | else if (perf_stat_evsel__is(counter, ELISION_START)) | ||
92 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | ||
93 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
94 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | ||
95 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
96 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | ||
97 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
98 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | ||
99 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
100 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | ||
101 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
102 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | ||
103 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
104 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
105 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
106 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
107 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
108 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | ||
109 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
110 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | ||
111 | } | ||
112 | |||
113 | /* used for get_ratio_color() */ | ||
114 | enum grc_type { | ||
115 | GRC_STALLED_CYCLES_FE, | ||
116 | GRC_STALLED_CYCLES_BE, | ||
117 | GRC_CACHE_MISSES, | ||
118 | GRC_MAX_NR | ||
119 | }; | ||
120 | |||
121 | static const char *get_ratio_color(enum grc_type type, double ratio) | ||
122 | { | ||
123 | static const double grc_table[GRC_MAX_NR][3] = { | ||
124 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | ||
125 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | ||
126 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | ||
127 | }; | ||
128 | const char *color = PERF_COLOR_NORMAL; | ||
129 | |||
130 | if (ratio > grc_table[type][0]) | ||
131 | color = PERF_COLOR_RED; | ||
132 | else if (ratio > grc_table[type][1]) | ||
133 | color = PERF_COLOR_MAGENTA; | ||
134 | else if (ratio > grc_table[type][2]) | ||
135 | color = PERF_COLOR_YELLOW; | ||
136 | |||
137 | return color; | ||
138 | } | ||
139 | |||
140 | static void print_stalled_cycles_frontend(FILE *out, int cpu, | ||
141 | struct perf_evsel *evsel | ||
142 | __maybe_unused, double avg) | ||
143 | { | ||
144 | double total, ratio = 0.0; | ||
145 | const char *color; | ||
146 | int ctx = evsel_context(evsel); | ||
147 | |||
148 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
149 | |||
150 | if (total) | ||
151 | ratio = avg / total * 100.0; | ||
152 | |||
153 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | ||
154 | |||
155 | fprintf(out, " # "); | ||
156 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
157 | fprintf(out, " frontend cycles idle "); | ||
158 | } | ||
159 | |||
160 | static void print_stalled_cycles_backend(FILE *out, int cpu, | ||
161 | struct perf_evsel *evsel | ||
162 | __maybe_unused, double avg) | ||
163 | { | ||
164 | double total, ratio = 0.0; | ||
165 | const char *color; | ||
166 | int ctx = evsel_context(evsel); | ||
167 | |||
168 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
169 | |||
170 | if (total) | ||
171 | ratio = avg / total * 100.0; | ||
172 | |||
173 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | ||
174 | |||
175 | fprintf(out, " # "); | ||
176 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
177 | fprintf(out, " backend cycles idle "); | ||
178 | } | ||
179 | |||
180 | static void print_branch_misses(FILE *out, int cpu, | ||
181 | struct perf_evsel *evsel __maybe_unused, | ||
182 | double avg) | ||
183 | { | ||
184 | double total, ratio = 0.0; | ||
185 | const char *color; | ||
186 | int ctx = evsel_context(evsel); | ||
187 | |||
188 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | ||
189 | |||
190 | if (total) | ||
191 | ratio = avg / total * 100.0; | ||
192 | |||
193 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
194 | |||
195 | fprintf(out, " # "); | ||
196 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
197 | fprintf(out, " of all branches "); | ||
198 | } | ||
199 | |||
200 | static void print_l1_dcache_misses(FILE *out, int cpu, | ||
201 | struct perf_evsel *evsel __maybe_unused, | ||
202 | double avg) | ||
203 | { | ||
204 | double total, ratio = 0.0; | ||
205 | const char *color; | ||
206 | int ctx = evsel_context(evsel); | ||
207 | |||
208 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | ||
209 | |||
210 | if (total) | ||
211 | ratio = avg / total * 100.0; | ||
212 | |||
213 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
214 | |||
215 | fprintf(out, " # "); | ||
216 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
217 | fprintf(out, " of all L1-dcache hits "); | ||
218 | } | ||
219 | |||
220 | static void print_l1_icache_misses(FILE *out, int cpu, | ||
221 | struct perf_evsel *evsel __maybe_unused, | ||
222 | double avg) | ||
223 | { | ||
224 | double total, ratio = 0.0; | ||
225 | const char *color; | ||
226 | int ctx = evsel_context(evsel); | ||
227 | |||
228 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | ||
229 | |||
230 | if (total) | ||
231 | ratio = avg / total * 100.0; | ||
232 | |||
233 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
234 | |||
235 | fprintf(out, " # "); | ||
236 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
237 | fprintf(out, " of all L1-icache hits "); | ||
238 | } | ||
239 | |||
240 | static void print_dtlb_cache_misses(FILE *out, int cpu, | ||
241 | struct perf_evsel *evsel __maybe_unused, | ||
242 | double avg) | ||
243 | { | ||
244 | double total, ratio = 0.0; | ||
245 | const char *color; | ||
246 | int ctx = evsel_context(evsel); | ||
247 | |||
248 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | ||
249 | |||
250 | if (total) | ||
251 | ratio = avg / total * 100.0; | ||
252 | |||
253 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
254 | |||
255 | fprintf(out, " # "); | ||
256 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
257 | fprintf(out, " of all dTLB cache hits "); | ||
258 | } | ||
259 | |||
260 | static void print_itlb_cache_misses(FILE *out, int cpu, | ||
261 | struct perf_evsel *evsel __maybe_unused, | ||
262 | double avg) | ||
263 | { | ||
264 | double total, ratio = 0.0; | ||
265 | const char *color; | ||
266 | int ctx = evsel_context(evsel); | ||
267 | |||
268 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | ||
269 | |||
270 | if (total) | ||
271 | ratio = avg / total * 100.0; | ||
272 | |||
273 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
274 | |||
275 | fprintf(out, " # "); | ||
276 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
277 | fprintf(out, " of all iTLB cache hits "); | ||
278 | } | ||
279 | |||
280 | static void print_ll_cache_misses(FILE *out, int cpu, | ||
281 | struct perf_evsel *evsel __maybe_unused, | ||
282 | double avg) | ||
283 | { | ||
284 | double total, ratio = 0.0; | ||
285 | const char *color; | ||
286 | int ctx = evsel_context(evsel); | ||
287 | |||
288 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | ||
289 | |||
290 | if (total) | ||
291 | ratio = avg / total * 100.0; | ||
292 | |||
293 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
294 | |||
295 | fprintf(out, " # "); | ||
296 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
297 | fprintf(out, " of all LL-cache hits "); | ||
298 | } | ||
299 | |||
300 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | ||
301 | double avg, int cpu, enum aggr_mode aggr) | ||
302 | { | ||
303 | double total, ratio = 0.0, total2; | ||
304 | int ctx = evsel_context(evsel); | ||
305 | |||
306 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | ||
307 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
308 | if (total) { | ||
309 | ratio = avg / total; | ||
310 | fprintf(out, " # %5.2f insns per cycle ", ratio); | ||
311 | } else { | ||
312 | fprintf(out, " "); | ||
313 | } | ||
314 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | ||
315 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | ||
316 | |||
317 | if (total && avg) { | ||
318 | ratio = total / avg; | ||
319 | fprintf(out, "\n"); | ||
320 | if (aggr == AGGR_NONE) | ||
321 | fprintf(out, " "); | ||
322 | fprintf(out, " # %5.2f stalled cycles per insn", ratio); | ||
323 | } | ||
324 | |||
325 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | ||
326 | runtime_branches_stats[ctx][cpu].n != 0) { | ||
327 | print_branch_misses(out, cpu, evsel, avg); | ||
328 | } else if ( | ||
329 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
330 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
331 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
332 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
333 | runtime_l1_dcache_stats[ctx][cpu].n != 0) { | ||
334 | print_l1_dcache_misses(out, cpu, evsel, avg); | ||
335 | } else if ( | ||
336 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
337 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
338 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
339 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
340 | runtime_l1_icache_stats[ctx][cpu].n != 0) { | ||
341 | print_l1_icache_misses(out, cpu, evsel, avg); | ||
342 | } else if ( | ||
343 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
344 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
345 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
346 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
347 | runtime_dtlb_cache_stats[ctx][cpu].n != 0) { | ||
348 | print_dtlb_cache_misses(out, cpu, evsel, avg); | ||
349 | } else if ( | ||
350 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
351 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
352 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
353 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
354 | runtime_itlb_cache_stats[ctx][cpu].n != 0) { | ||
355 | print_itlb_cache_misses(out, cpu, evsel, avg); | ||
356 | } else if ( | ||
357 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
358 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
359 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
360 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
361 | runtime_ll_cache_stats[ctx][cpu].n != 0) { | ||
362 | print_ll_cache_misses(out, cpu, evsel, avg); | ||
363 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
364 | runtime_cacherefs_stats[ctx][cpu].n != 0) { | ||
365 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); | ||
366 | |||
367 | if (total) | ||
368 | ratio = avg * 100 / total; | ||
369 | |||
370 | fprintf(out, " # %8.3f %% of all cache refs ", ratio); | ||
371 | |||
372 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
373 | print_stalled_cycles_frontend(out, cpu, evsel, avg); | ||
374 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
375 | print_stalled_cycles_backend(out, cpu, evsel, avg); | ||
376 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
377 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
378 | |||
379 | if (total) { | ||
380 | ratio = avg / total; | ||
381 | fprintf(out, " # %8.3f GHz ", ratio); | ||
382 | } else { | ||
383 | fprintf(out, " "); | ||
384 | } | ||
385 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | ||
386 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
387 | if (total) | ||
388 | fprintf(out, | ||
389 | " # %5.2f%% transactional cycles ", | ||
390 | 100.0 * (avg / total)); | ||
391 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { | ||
392 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
393 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
394 | if (total2 < avg) | ||
395 | total2 = avg; | ||
396 | if (total) | ||
397 | fprintf(out, | ||
398 | " # %5.2f%% aborted cycles ", | ||
399 | 100.0 * ((total2-avg) / total)); | ||
400 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && | ||
401 | avg > 0 && | ||
402 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
403 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
404 | |||
405 | if (total) | ||
406 | ratio = total / avg; | ||
407 | |||
408 | fprintf(out, " # %8.0f cycles / transaction ", ratio); | ||
409 | } else if (perf_stat_evsel__is(evsel, ELISION_START) && | ||
410 | avg > 0 && | ||
411 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
412 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
413 | |||
414 | if (total) | ||
415 | ratio = total / avg; | ||
416 | |||
417 | fprintf(out, " # %8.0f cycles / elision ", ratio); | ||
418 | } else if (runtime_nsecs_stats[cpu].n != 0) { | ||
419 | char unit = 'M'; | ||
420 | |||
421 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
422 | |||
423 | if (total) | ||
424 | ratio = 1000.0 * avg / total; | ||
425 | if (ratio < 0.001) { | ||
426 | ratio *= 1000; | ||
427 | unit = 'K'; | ||
428 | } | ||
429 | |||
430 | fprintf(out, " # %8.3f %c/sec ", ratio, unit); | ||
431 | } else { | ||
432 | fprintf(out, " "); | ||
433 | } | ||
434 | } | ||
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 6506b3dfb605..60b92822f655 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <math.h> | 1 | #include <math.h> |
2 | |||
3 | #include "stat.h" | 2 | #include "stat.h" |
3 | #include "evsel.h" | ||
4 | 4 | ||
5 | void update_stats(struct stats *stats, u64 val) | 5 | void update_stats(struct stats *stats, u64 val) |
6 | { | 6 | { |
@@ -61,3 +61,36 @@ double rel_stddev_stats(double stddev, double avg) | |||
61 | 61 | ||
62 | return pct; | 62 | return pct; |
63 | } | 63 | } |
64 | |||
65 | bool __perf_evsel_stat__is(struct perf_evsel *evsel, | ||
66 | enum perf_stat_evsel_id id) | ||
67 | { | ||
68 | struct perf_stat *ps = evsel->priv; | ||
69 | |||
70 | return ps->id == id; | ||
71 | } | ||
72 | |||
73 | #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name | ||
74 | static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { | ||
75 | ID(NONE, x), | ||
76 | ID(CYCLES_IN_TX, cpu/cycles-t/), | ||
77 | ID(TRANSACTION_START, cpu/tx-start/), | ||
78 | ID(ELISION_START, cpu/el-start/), | ||
79 | ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), | ||
80 | }; | ||
81 | #undef ID | ||
82 | |||
83 | void perf_stat_evsel_id_init(struct perf_evsel *evsel) | ||
84 | { | ||
85 | struct perf_stat *ps = evsel->priv; | ||
86 | int i; | ||
87 | |||
88 | /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ | ||
89 | |||
90 | for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { | ||
91 | if (!strcmp(perf_evsel__name(evsel), id_str[i])) { | ||
92 | ps->id = i; | ||
93 | break; | ||
94 | } | ||
95 | } | ||
96 | } | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5667fc3e39cf..615c779eb42a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __PERF_STATS_H | 2 | #define __PERF_STATS_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <stdio.h> | ||
5 | 6 | ||
6 | struct stats | 7 | struct stats |
7 | { | 8 | { |
@@ -9,6 +10,27 @@ struct stats | |||
9 | u64 max, min; | 10 | u64 max, min; |
10 | }; | 11 | }; |
11 | 12 | ||
13 | enum perf_stat_evsel_id { | ||
14 | PERF_STAT_EVSEL_ID__NONE = 0, | ||
15 | PERF_STAT_EVSEL_ID__CYCLES_IN_TX, | ||
16 | PERF_STAT_EVSEL_ID__TRANSACTION_START, | ||
17 | PERF_STAT_EVSEL_ID__ELISION_START, | ||
18 | PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, | ||
19 | PERF_STAT_EVSEL_ID__MAX, | ||
20 | }; | ||
21 | |||
22 | struct perf_stat { | ||
23 | struct stats res_stats[3]; | ||
24 | enum perf_stat_evsel_id id; | ||
25 | }; | ||
26 | |||
27 | enum aggr_mode { | ||
28 | AGGR_NONE, | ||
29 | AGGR_GLOBAL, | ||
30 | AGGR_SOCKET, | ||
31 | AGGR_CORE, | ||
32 | }; | ||
33 | |||
12 | void update_stats(struct stats *stats, u64 val); | 34 | void update_stats(struct stats *stats, u64 val); |
13 | double avg_stats(struct stats *stats); | 35 | double avg_stats(struct stats *stats); |
14 | double stddev_stats(struct stats *stats); | 36 | double stddev_stats(struct stats *stats); |
@@ -22,4 +44,22 @@ static inline void init_stats(struct stats *stats) | |||
22 | stats->min = (u64) -1; | 44 | stats->min = (u64) -1; |
23 | stats->max = 0; | 45 | stats->max = 0; |
24 | } | 46 | } |
47 | |||
48 | struct perf_evsel; | ||
49 | bool __perf_evsel_stat__is(struct perf_evsel *evsel, | ||
50 | enum perf_stat_evsel_id id); | ||
51 | |||
52 | #define perf_stat_evsel__is(evsel, id) \ | ||
53 | __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) | ||
54 | |||
55 | void perf_stat_evsel_id_init(struct perf_evsel *evsel); | ||
56 | |||
57 | extern struct stats walltime_nsecs_stats; | ||
58 | |||
59 | void perf_stat__reset_shadow_stats(void); | ||
60 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
61 | int cpu); | ||
62 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | ||
63 | double avg, int cpu, enum aggr_mode aggr); | ||
64 | |||
25 | #endif | 65 | #endif |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a93ba85509b2..65f7e389ae09 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
@@ -1016,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map, | |||
1016 | curr_map = map__new2(start, curr_dso, | 1016 | curr_map = map__new2(start, curr_dso, |
1017 | map->type); | 1017 | map->type); |
1018 | if (curr_map == NULL) { | 1018 | if (curr_map == NULL) { |
1019 | dso__delete(curr_dso); | 1019 | dso__put(curr_dso); |
1020 | goto out_elf_end; | 1020 | goto out_elf_end; |
1021 | } | 1021 | } |
1022 | if (adjust_kernel_syms) { | 1022 | if (adjust_kernel_syms) { |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a3e80d6ad70a..504f2d73b7ee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -786,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, | |||
786 | 786 | ||
787 | curr_map = map__new2(pos->start, ndso, map->type); | 787 | curr_map = map__new2(pos->start, ndso, map->type); |
788 | if (curr_map == NULL) { | 788 | if (curr_map == NULL) { |
789 | dso__delete(ndso); | 789 | dso__put(ndso); |
790 | return -1; | 790 | return -1; |
791 | } | 791 | } |
792 | 792 | ||
@@ -1364,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, | |||
1364 | case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: | 1364 | case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: |
1365 | /* | 1365 | /* |
1366 | * kernel modules know their symtab type - it's set when | 1366 | * kernel modules know their symtab type - it's set when |
1367 | * creating a module dso in machine__new_module(). | 1367 | * creating a module dso in machine__findnew_module_map(). |
1368 | */ | 1368 | */ |
1369 | return kmod && dso->symtab_type == type; | 1369 | return kmod && dso->symtab_type == type; |
1370 | 1370 | ||
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 2e8f6886ca72..4b89118f158d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c | |||
@@ -120,14 +120,14 @@ void machine__exit_vdso(struct machine *machine) | |||
120 | zfree(&machine->vdso_info); | 120 | zfree(&machine->vdso_info); |
121 | } | 121 | } |
122 | 122 | ||
123 | static struct dso *machine__addnew_vdso(struct machine *machine, const char *short_name, | 123 | static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name, |
124 | const char *long_name) | 124 | const char *long_name) |
125 | { | 125 | { |
126 | struct dso *dso; | 126 | struct dso *dso; |
127 | 127 | ||
128 | dso = dso__new(short_name); | 128 | dso = dso__new(short_name); |
129 | if (dso != NULL) { | 129 | if (dso != NULL) { |
130 | dsos__add(&machine->dsos, dso); | 130 | __dsos__add(&machine->dsos, dso); |
131 | dso__set_long_name(dso, long_name, false); | 131 | dso__set_long_name(dso, long_name, false); |
132 | } | 132 | } |
133 | 133 | ||
@@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file) | |||
230 | return vdso_file->temp_file_name; | 230 | return vdso_file->temp_file_name; |
231 | } | 231 | } |
232 | 232 | ||
233 | static struct dso *vdso__findnew_compat(struct machine *machine, | 233 | static struct dso *__machine__findnew_compat(struct machine *machine, |
234 | struct vdso_file *vdso_file) | 234 | struct vdso_file *vdso_file) |
235 | { | 235 | { |
236 | const char *file_name; | 236 | const char *file_name; |
237 | struct dso *dso; | 237 | struct dso *dso; |
238 | 238 | ||
239 | dso = dsos__find(&machine->dsos, vdso_file->dso_name, true); | 239 | pthread_rwlock_wrlock(&machine->dsos.lock); |
240 | dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); | ||
240 | if (dso) | 241 | if (dso) |
241 | return dso; | 242 | goto out_unlock; |
242 | 243 | ||
243 | file_name = vdso__get_compat_file(vdso_file); | 244 | file_name = vdso__get_compat_file(vdso_file); |
244 | if (!file_name) | 245 | if (!file_name) |
245 | return NULL; | 246 | goto out_unlock; |
246 | 247 | ||
247 | return machine__addnew_vdso(machine, vdso_file->dso_name, file_name); | 248 | dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); |
249 | out_unlock: | ||
250 | pthread_rwlock_unlock(&machine->dsos.lock); | ||
251 | return dso; | ||
248 | } | 252 | } |
249 | 253 | ||
250 | static int machine__findnew_vdso_compat(struct machine *machine, | 254 | static int __machine__findnew_vdso_compat(struct machine *machine, |
251 | struct thread *thread, | 255 | struct thread *thread, |
252 | struct vdso_info *vdso_info, | 256 | struct vdso_info *vdso_info, |
253 | struct dso **dso) | 257 | struct dso **dso) |
254 | { | 258 | { |
255 | enum dso_type dso_type; | 259 | enum dso_type dso_type; |
256 | 260 | ||
@@ -267,10 +271,10 @@ static int machine__findnew_vdso_compat(struct machine *machine, | |||
267 | 271 | ||
268 | switch (dso_type) { | 272 | switch (dso_type) { |
269 | case DSO__TYPE_32BIT: | 273 | case DSO__TYPE_32BIT: |
270 | *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); | 274 | *dso = __machine__findnew_compat(machine, &vdso_info->vdso32); |
271 | return 1; | 275 | return 1; |
272 | case DSO__TYPE_X32BIT: | 276 | case DSO__TYPE_X32BIT: |
273 | *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32); | 277 | *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32); |
274 | return 1; | 278 | return 1; |
275 | case DSO__TYPE_UNKNOWN: | 279 | case DSO__TYPE_UNKNOWN: |
276 | case DSO__TYPE_64BIT: | 280 | case DSO__TYPE_64BIT: |
@@ -285,31 +289,33 @@ struct dso *machine__findnew_vdso(struct machine *machine, | |||
285 | struct thread *thread __maybe_unused) | 289 | struct thread *thread __maybe_unused) |
286 | { | 290 | { |
287 | struct vdso_info *vdso_info; | 291 | struct vdso_info *vdso_info; |
288 | struct dso *dso; | 292 | struct dso *dso = NULL; |
289 | 293 | ||
294 | pthread_rwlock_wrlock(&machine->dsos.lock); | ||
290 | if (!machine->vdso_info) | 295 | if (!machine->vdso_info) |
291 | machine->vdso_info = vdso_info__new(); | 296 | machine->vdso_info = vdso_info__new(); |
292 | 297 | ||
293 | vdso_info = machine->vdso_info; | 298 | vdso_info = machine->vdso_info; |
294 | if (!vdso_info) | 299 | if (!vdso_info) |
295 | return NULL; | 300 | goto out_unlock; |
296 | 301 | ||
297 | #if BITS_PER_LONG == 64 | 302 | #if BITS_PER_LONG == 64 |
298 | if (machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) | 303 | if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) |
299 | return dso; | 304 | goto out_unlock; |
300 | #endif | 305 | #endif |
301 | 306 | ||
302 | dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); | 307 | dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); |
303 | if (!dso) { | 308 | if (!dso) { |
304 | char *file; | 309 | char *file; |
305 | 310 | ||
306 | file = get_file(&vdso_info->vdso); | 311 | file = get_file(&vdso_info->vdso); |
307 | if (!file) | 312 | if (file) |
308 | return NULL; | 313 | dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file); |
309 | |||
310 | dso = machine__addnew_vdso(machine, DSO__NAME_VDSO, file); | ||
311 | } | 314 | } |
312 | 315 | ||
316 | out_unlock: | ||
317 | dso__get(dso); | ||
318 | pthread_rwlock_unlock(&machine->dsos.lock); | ||
313 | return dso; | 319 | return dso; |
314 | } | 320 | } |
315 | 321 | ||