diff options
author | Jin Yao <yao.jin@linux.intel.com> | 2017-12-05 09:03:04 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-12-27 10:15:43 -0500 |
commit | 1fcd03946b52b8a57a6692fedd4406b45baedfe6 (patch) | |
tree | 6055638961dc9cd5adef9b65e301bd5c18555b8a | |
parent | 8efb2df1288bc1bcc3711a97028620717319f138 (diff) |
perf stat: Update per-thread shadow stats
The functions perf_stat__update_shadow_stats() is called to update the
shadow stats on a set of static variables.
But the static variables are the limitations to be extended to support
per-thread shadow stats.
This patch lets the perf_stat__update_shadow_stats() support to update
the shadow stats on a input parameter 'st' and uses
update_runtime_stat() to update the stats. It will not directly update
the static variables as before.
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com
[ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/builtin-script.c | 3 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 3 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 86 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 8 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 2 |
5 files changed, 68 insertions, 34 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..81b395040298 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, | |||
1548 | val = sample->period * evsel->scale; | 1548 | val = sample->period * evsel->scale; |
1549 | perf_stat__update_shadow_stats(evsel, | 1549 | perf_stat__update_shadow_stats(evsel, |
1550 | val, | 1550 | val, |
1551 | sample->cpu); | 1551 | sample->cpu, |
1552 | &rt_stat); | ||
1552 | evsel_script(evsel)->val = val; | 1553 | evsel_script(evsel)->val = val; |
1553 | if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { | 1554 | if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { |
1554 | for_each_group_member (ev2, evsel->leader) { | 1555 | for_each_group_member (ev2, evsel->leader) { |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a027b4712e48..3f4a2c21b824 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -1214,7 +1214,8 @@ static void aggr_update_shadow(void) | |||
1214 | val += perf_counts(counter->counts, cpu, 0)->val; | 1214 | val += perf_counts(counter->counts, cpu, 0)->val; |
1215 | } | 1215 | } |
1216 | perf_stat__update_shadow_stats(counter, val, | 1216 | perf_stat__update_shadow_stats(counter, val, |
1217 | first_shadow_cpu(counter, id)); | 1217 | first_shadow_cpu(counter, id), |
1218 | &rt_stat); | ||
1218 | } | 1219 | } |
1219 | } | 1220 | } |
1220 | } | 1221 | } |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07cfbf613bdc..4b28c40de927 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -116,19 +116,29 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, | |||
116 | 116 | ||
117 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, | 117 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, |
118 | int cpu, | 118 | int cpu, |
119 | bool create) | 119 | bool create, |
120 | enum stat_type type, | ||
121 | int ctx, | ||
122 | struct runtime_stat *st) | ||
120 | { | 123 | { |
124 | struct rblist *rblist; | ||
121 | struct rb_node *nd; | 125 | struct rb_node *nd; |
122 | struct saved_value dm = { | 126 | struct saved_value dm = { |
123 | .cpu = cpu, | 127 | .cpu = cpu, |
124 | .evsel = evsel, | 128 | .evsel = evsel, |
129 | .type = type, | ||
130 | .ctx = ctx, | ||
131 | .stat = st, | ||
125 | }; | 132 | }; |
126 | nd = rblist__find(&runtime_saved_values, &dm); | 133 | |
134 | rblist = &st->value_list; | ||
135 | |||
136 | nd = rblist__find(rblist, &dm); | ||
127 | if (nd) | 137 | if (nd) |
128 | return container_of(nd, struct saved_value, rb_node); | 138 | return container_of(nd, struct saved_value, rb_node); |
129 | if (create) { | 139 | if (create) { |
130 | rblist__add_node(&runtime_saved_values, &dm); | 140 | rblist__add_node(rblist, &dm); |
131 | nd = rblist__find(&runtime_saved_values, &dm); | 141 | nd = rblist__find(rblist, &dm); |
132 | if (nd) | 142 | if (nd) |
133 | return container_of(nd, struct saved_value, rb_node); | 143 | return container_of(nd, struct saved_value, rb_node); |
134 | } | 144 | } |
@@ -217,13 +227,24 @@ void perf_stat__reset_shadow_stats(void) | |||
217 | } | 227 | } |
218 | } | 228 | } |
219 | 229 | ||
230 | static void update_runtime_stat(struct runtime_stat *st, | ||
231 | enum stat_type type, | ||
232 | int ctx, int cpu, u64 count) | ||
233 | { | ||
234 | struct saved_value *v = saved_value_lookup(NULL, cpu, true, | ||
235 | type, ctx, st); | ||
236 | |||
237 | if (v) | ||
238 | update_stats(&v->stats, count); | ||
239 | } | ||
240 | |||
220 | /* | 241 | /* |
221 | * Update various tracking values we maintain to print | 242 | * Update various tracking values we maintain to print |
222 | * more semantic information such as miss/hit ratios, | 243 | * more semantic information such as miss/hit ratios, |
223 | * instruction rates, etc: | 244 | * instruction rates, etc: |
224 | */ | 245 | */ |
225 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, | 246 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, |
226 | int cpu) | 247 | int cpu, struct runtime_stat *st) |
227 | { | 248 | { |
228 | int ctx = evsel_context(counter); | 249 | int ctx = evsel_context(counter); |
229 | 250 | ||
@@ -231,50 +252,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, | |||
231 | 252 | ||
232 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || | 253 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || |
233 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) | 254 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) |
234 | update_stats(&runtime_nsecs_stats[cpu], count); | 255 | update_runtime_stat(st, STAT_NSECS, 0, cpu, count); |
235 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | 256 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
236 | update_stats(&runtime_cycles_stats[ctx][cpu], count); | 257 | update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); |
237 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | 258 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) |
238 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); | 259 | update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); |
239 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | 260 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
240 | update_stats(&runtime_transaction_stats[ctx][cpu], count); | 261 | update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); |
241 | else if (perf_stat_evsel__is(counter, ELISION_START)) | 262 | else if (perf_stat_evsel__is(counter, ELISION_START)) |
242 | update_stats(&runtime_elision_stats[ctx][cpu], count); | 263 | update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); |
243 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) | 264 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) |
244 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count); | 265 | update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, |
266 | ctx, cpu, count); | ||
245 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | 267 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) |
246 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); | 268 | update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, |
269 | ctx, cpu, count); | ||
247 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | 270 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) |
248 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); | 271 | update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, |
272 | ctx, cpu, count); | ||
249 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | 273 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) |
250 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); | 274 | update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, |
275 | ctx, cpu, count); | ||
251 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | 276 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) |
252 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); | 277 | update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, |
278 | ctx, cpu, count); | ||
253 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | 279 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
254 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); | 280 | update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, |
281 | ctx, cpu, count); | ||
255 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | 282 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) |
256 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); | 283 | update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, |
284 | ctx, cpu, count); | ||
257 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | 285 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
258 | update_stats(&runtime_branches_stats[ctx][cpu], count); | 286 | update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); |
259 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | 287 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) |
260 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count); | 288 | update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); |
261 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | 289 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) |
262 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); | 290 | update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); |
263 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | 291 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) |
264 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count); | 292 | update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); |
265 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | 293 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) |
266 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count); | 294 | update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); |
267 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | 295 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) |
268 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); | 296 | update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); |
269 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | 297 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) |
270 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); | 298 | update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); |
271 | else if (perf_stat_evsel__is(counter, SMI_NUM)) | 299 | else if (perf_stat_evsel__is(counter, SMI_NUM)) |
272 | update_stats(&runtime_smi_num_stats[ctx][cpu], count); | 300 | update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); |
273 | else if (perf_stat_evsel__is(counter, APERF)) | 301 | else if (perf_stat_evsel__is(counter, APERF)) |
274 | update_stats(&runtime_aperf_stats[ctx][cpu], count); | 302 | update_runtime_stat(st, STAT_APERF, ctx, cpu, count); |
275 | 303 | ||
276 | if (counter->collect_stat) { | 304 | if (counter->collect_stat) { |
277 | struct saved_value *v = saved_value_lookup(counter, cpu, true); | 305 | struct saved_value *v = saved_value_lookup(counter, cpu, true, |
306 | STAT_NONE, 0, st); | ||
278 | update_stats(&v->stats, count); | 307 | update_stats(&v->stats, count); |
279 | } | 308 | } |
280 | } | 309 | } |
@@ -694,7 +723,8 @@ static void generic_metric(const char *metric_expr, | |||
694 | stats = &walltime_nsecs_stats; | 723 | stats = &walltime_nsecs_stats; |
695 | scale = 1e-9; | 724 | scale = 1e-9; |
696 | } else { | 725 | } else { |
697 | v = saved_value_lookup(metric_events[i], cpu, false); | 726 | v = saved_value_lookup(metric_events[i], cpu, false, |
727 | STAT_NONE, 0, &rt_stat); | ||
698 | if (!v) | 728 | if (!v) |
699 | break; | 729 | break; |
700 | stats = &v->stats; | 730 | stats = &v->stats; |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..78abfd40b135 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -278,9 +278,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel | |||
278 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | 278 | perf_evsel__compute_deltas(evsel, cpu, thread, count); |
279 | perf_counts_values__scale(count, config->scale, NULL); | 279 | perf_counts_values__scale(count, config->scale, NULL); |
280 | if (config->aggr_mode == AGGR_NONE) | 280 | if (config->aggr_mode == AGGR_NONE) |
281 | perf_stat__update_shadow_stats(evsel, count->val, cpu); | 281 | perf_stat__update_shadow_stats(evsel, count->val, cpu, |
282 | &rt_stat); | ||
282 | if (config->aggr_mode == AGGR_THREAD) | 283 | if (config->aggr_mode == AGGR_THREAD) |
283 | perf_stat__update_shadow_stats(evsel, count->val, 0); | 284 | perf_stat__update_shadow_stats(evsel, count->val, 0, |
285 | &rt_stat); | ||
284 | break; | 286 | break; |
285 | case AGGR_GLOBAL: | 287 | case AGGR_GLOBAL: |
286 | aggr->val += count->val; | 288 | aggr->val += count->val; |
@@ -362,7 +364,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, | |||
362 | /* | 364 | /* |
363 | * Save the full runtime - to allow normalization during printout: | 365 | * Save the full runtime - to allow normalization during printout: |
364 | */ | 366 | */ |
365 | perf_stat__update_shadow_stats(counter, *count, 0); | 367 | perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); |
366 | 368 | ||
367 | return 0; | 369 | return 0; |
368 | } | 370 | } |
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f20240037377..bb9902ad3a79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -129,7 +129,7 @@ void runtime_stat__exit(struct runtime_stat *st); | |||
129 | void perf_stat__init_shadow_stats(void); | 129 | void perf_stat__init_shadow_stats(void); |
130 | void perf_stat__reset_shadow_stats(void); | 130 | void perf_stat__reset_shadow_stats(void); |
131 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, | 131 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, |
132 | int cpu); | 132 | int cpu, struct runtime_stat *st); |
133 | struct perf_stat_output_ctx { | 133 | struct perf_stat_output_ctx { |
134 | void *ctx; | 134 | void *ctx; |
135 | print_metric_t print_metric; | 135 | print_metric_t print_metric; |