diff options
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
-rw-r--r-- | tools/perf/util/stat-shadow.c | 426 |
1 files changed, 258 insertions, 168 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -9,17 +9,6 @@ | |||
9 | #include "expr.h" | 9 | #include "expr.h" |
10 | #include "metricgroup.h" | 10 | #include "metricgroup.h" |
11 | 11 | ||
12 | enum { | ||
13 | CTX_BIT_USER = 1 << 0, | ||
14 | CTX_BIT_KERNEL = 1 << 1, | ||
15 | CTX_BIT_HV = 1 << 2, | ||
16 | CTX_BIT_HOST = 1 << 3, | ||
17 | CTX_BIT_IDLE = 1 << 4, | ||
18 | CTX_BIT_MAX = 1 << 5, | ||
19 | }; | ||
20 | |||
21 | #define NUM_CTX CTX_BIT_MAX | ||
22 | |||
23 | /* | 12 | /* |
24 | * AGGR_GLOBAL: Use CPU 0 | 13 | * AGGR_GLOBAL: Use CPU 0 |
25 | * AGGR_SOCKET: Use first CPU of socket | 14 | * AGGR_SOCKET: Use first CPU of socket |
@@ -27,36 +16,18 @@ enum { | |||
27 | * AGGR_NONE: Use matching CPU | 16 | * AGGR_NONE: Use matching CPU |
28 | * AGGR_THREAD: Not supported? | 17 | * AGGR_THREAD: Not supported? |
29 | */ | 18 | */ |
30 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | ||
31 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | ||
32 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | ||
33 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | ||
34 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | ||
35 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | ||
36 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
37 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
38 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
39 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
40 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
41 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | ||
42 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | ||
43 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | ||
44 | static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; | ||
45 | static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | ||
46 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | ||
47 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | ||
48 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | ||
49 | static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; | ||
50 | static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; | ||
51 | static struct rblist runtime_saved_values; | ||
52 | static bool have_frontend_stalled; | 19 | static bool have_frontend_stalled; |
53 | 20 | ||
21 | struct runtime_stat rt_stat; | ||
54 | struct stats walltime_nsecs_stats; | 22 | struct stats walltime_nsecs_stats; |
55 | 23 | ||
56 | struct saved_value { | 24 | struct saved_value { |
57 | struct rb_node rb_node; | 25 | struct rb_node rb_node; |
58 | struct perf_evsel *evsel; | 26 | struct perf_evsel *evsel; |
27 | enum stat_type type; | ||
28 | int ctx; | ||
59 | int cpu; | 29 | int cpu; |
30 | struct runtime_stat *stat; | ||
60 | struct stats stats; | 31 | struct stats stats; |
61 | }; | 32 | }; |
62 | 33 | ||
@@ -69,6 +40,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) | |||
69 | 40 | ||
70 | if (a->cpu != b->cpu) | 41 | if (a->cpu != b->cpu) |
71 | return a->cpu - b->cpu; | 42 | return a->cpu - b->cpu; |
43 | |||
44 | /* | ||
45 | * Previously the rbtree was used to link generic metrics. | ||
46 | * The keys were evsel/cpu. Now the rbtree is extended to support | ||
47 | * per-thread shadow stats. For shadow stats case, the keys | ||
48 | * are cpu/type/ctx/stat (evsel is NULL). For generic metrics | ||
49 | * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). | ||
50 | */ | ||
51 | if (a->type != b->type) | ||
52 | return a->type - b->type; | ||
53 | |||
54 | if (a->ctx != b->ctx) | ||
55 | return a->ctx - b->ctx; | ||
56 | |||
57 | if (a->evsel == NULL && b->evsel == NULL) { | ||
58 | if (a->stat == b->stat) | ||
59 | return 0; | ||
60 | |||
61 | if ((char *)a->stat < (char *)b->stat) | ||
62 | return -1; | ||
63 | |||
64 | return 1; | ||
65 | } | ||
66 | |||
72 | if (a->evsel == b->evsel) | 67 | if (a->evsel == b->evsel) |
73 | return 0; | 68 | return 0; |
74 | if ((char *)a->evsel < (char *)b->evsel) | 69 | if ((char *)a->evsel < (char *)b->evsel) |
@@ -87,34 +82,66 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, | |||
87 | return &nd->rb_node; | 82 | return &nd->rb_node; |
88 | } | 83 | } |
89 | 84 | ||
85 | static void saved_value_delete(struct rblist *rblist __maybe_unused, | ||
86 | struct rb_node *rb_node) | ||
87 | { | ||
88 | struct saved_value *v; | ||
89 | |||
90 | BUG_ON(!rb_node); | ||
91 | v = container_of(rb_node, struct saved_value, rb_node); | ||
92 | free(v); | ||
93 | } | ||
94 | |||
90 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, | 95 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, |
91 | int cpu, | 96 | int cpu, |
92 | bool create) | 97 | bool create, |
98 | enum stat_type type, | ||
99 | int ctx, | ||
100 | struct runtime_stat *st) | ||
93 | { | 101 | { |
102 | struct rblist *rblist; | ||
94 | struct rb_node *nd; | 103 | struct rb_node *nd; |
95 | struct saved_value dm = { | 104 | struct saved_value dm = { |
96 | .cpu = cpu, | 105 | .cpu = cpu, |
97 | .evsel = evsel, | 106 | .evsel = evsel, |
107 | .type = type, | ||
108 | .ctx = ctx, | ||
109 | .stat = st, | ||
98 | }; | 110 | }; |
99 | nd = rblist__find(&runtime_saved_values, &dm); | 111 | |
112 | rblist = &st->value_list; | ||
113 | |||
114 | nd = rblist__find(rblist, &dm); | ||
100 | if (nd) | 115 | if (nd) |
101 | return container_of(nd, struct saved_value, rb_node); | 116 | return container_of(nd, struct saved_value, rb_node); |
102 | if (create) { | 117 | if (create) { |
103 | rblist__add_node(&runtime_saved_values, &dm); | 118 | rblist__add_node(rblist, &dm); |
104 | nd = rblist__find(&runtime_saved_values, &dm); | 119 | nd = rblist__find(rblist, &dm); |
105 | if (nd) | 120 | if (nd) |
106 | return container_of(nd, struct saved_value, rb_node); | 121 | return container_of(nd, struct saved_value, rb_node); |
107 | } | 122 | } |
108 | return NULL; | 123 | return NULL; |
109 | } | 124 | } |
110 | 125 | ||
126 | void runtime_stat__init(struct runtime_stat *st) | ||
127 | { | ||
128 | struct rblist *rblist = &st->value_list; | ||
129 | |||
130 | rblist__init(rblist); | ||
131 | rblist->node_cmp = saved_value_cmp; | ||
132 | rblist->node_new = saved_value_new; | ||
133 | rblist->node_delete = saved_value_delete; | ||
134 | } | ||
135 | |||
136 | void runtime_stat__exit(struct runtime_stat *st) | ||
137 | { | ||
138 | rblist__exit(&st->value_list); | ||
139 | } | ||
140 | |||
111 | void perf_stat__init_shadow_stats(void) | 141 | void perf_stat__init_shadow_stats(void) |
112 | { | 142 | { |
113 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); | 143 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); |
114 | rblist__init(&runtime_saved_values); | 144 | runtime_stat__init(&rt_stat); |
115 | runtime_saved_values.node_cmp = saved_value_cmp; | ||
116 | runtime_saved_values.node_new = saved_value_new; | ||
117 | /* No delete for now */ | ||
118 | } | 145 | } |
119 | 146 | ||
120 | static int evsel_context(struct perf_evsel *evsel) | 147 | static int evsel_context(struct perf_evsel *evsel) |
@@ -135,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) | |||
135 | return ctx; | 162 | return ctx; |
136 | } | 163 | } |
137 | 164 | ||
138 | void perf_stat__reset_shadow_stats(void) | 165 | static void reset_stat(struct runtime_stat *st) |
139 | { | 166 | { |
167 | struct rblist *rblist; | ||
140 | struct rb_node *pos, *next; | 168 | struct rb_node *pos, *next; |
141 | 169 | ||
142 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | 170 | rblist = &st->value_list; |
143 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | 171 | next = rb_first(&rblist->entries); |
144 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
145 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
146 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
147 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
148 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
149 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
150 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
151 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
152 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
153 | memset(runtime_cycles_in_tx_stats, 0, | ||
154 | sizeof(runtime_cycles_in_tx_stats)); | ||
155 | memset(runtime_transaction_stats, 0, | ||
156 | sizeof(runtime_transaction_stats)); | ||
157 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
158 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
159 | memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); | ||
160 | memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); | ||
161 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | ||
162 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | ||
163 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | ||
164 | memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); | ||
165 | memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); | ||
166 | |||
167 | next = rb_first(&runtime_saved_values.entries); | ||
168 | while (next) { | 172 | while (next) { |
169 | pos = next; | 173 | pos = next; |
170 | next = rb_next(pos); | 174 | next = rb_next(pos); |
@@ -174,13 +178,35 @@ void perf_stat__reset_shadow_stats(void) | |||
174 | } | 178 | } |
175 | } | 179 | } |
176 | 180 | ||
181 | void perf_stat__reset_shadow_stats(void) | ||
182 | { | ||
183 | reset_stat(&rt_stat); | ||
184 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
185 | } | ||
186 | |||
187 | void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) | ||
188 | { | ||
189 | reset_stat(st); | ||
190 | } | ||
191 | |||
192 | static void update_runtime_stat(struct runtime_stat *st, | ||
193 | enum stat_type type, | ||
194 | int ctx, int cpu, u64 count) | ||
195 | { | ||
196 | struct saved_value *v = saved_value_lookup(NULL, cpu, true, | ||
197 | type, ctx, st); | ||
198 | |||
199 | if (v) | ||
200 | update_stats(&v->stats, count); | ||
201 | } | ||
202 | |||
177 | /* | 203 | /* |
178 | * Update various tracking values we maintain to print | 204 | * Update various tracking values we maintain to print |
179 | * more semantic information such as miss/hit ratios, | 205 | * more semantic information such as miss/hit ratios, |
180 | * instruction rates, etc: | 206 | * instruction rates, etc: |
181 | */ | 207 | */ |
182 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, | 208 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, |
183 | int cpu) | 209 | int cpu, struct runtime_stat *st) |
184 | { | 210 | { |
185 | int ctx = evsel_context(counter); | 211 | int ctx = evsel_context(counter); |
186 | 212 | ||
@@ -188,50 +214,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, | |||
188 | 214 | ||
189 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || | 215 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || |
190 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) | 216 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) |
191 | update_stats(&runtime_nsecs_stats[cpu], count); | 217 | update_runtime_stat(st, STAT_NSECS, 0, cpu, count); |
192 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | 218 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
193 | update_stats(&runtime_cycles_stats[ctx][cpu], count); | 219 | update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); |
194 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | 220 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) |
195 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); | 221 | update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); |
196 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | 222 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
197 | update_stats(&runtime_transaction_stats[ctx][cpu], count); | 223 | update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); |
198 | else if (perf_stat_evsel__is(counter, ELISION_START)) | 224 | else if (perf_stat_evsel__is(counter, ELISION_START)) |
199 | update_stats(&runtime_elision_stats[ctx][cpu], count); | 225 | update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); |
200 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) | 226 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) |
201 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count); | 227 | update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, |
228 | ctx, cpu, count); | ||
202 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | 229 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) |
203 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); | 230 | update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, |
231 | ctx, cpu, count); | ||
204 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | 232 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) |
205 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); | 233 | update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, |
234 | ctx, cpu, count); | ||
206 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | 235 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) |
207 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); | 236 | update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, |
237 | ctx, cpu, count); | ||
208 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | 238 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) |
209 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); | 239 | update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, |
240 | ctx, cpu, count); | ||
210 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | 241 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
211 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); | 242 | update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, |
243 | ctx, cpu, count); | ||
212 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | 244 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) |
213 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); | 245 | update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, |
246 | ctx, cpu, count); | ||
214 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | 247 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
215 | update_stats(&runtime_branches_stats[ctx][cpu], count); | 248 | update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); |
216 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | 249 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) |
217 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count); | 250 | update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); |
218 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | 251 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) |
219 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); | 252 | update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); |
220 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | 253 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) |
221 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count); | 254 | update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); |
222 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | 255 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) |
223 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count); | 256 | update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); |
224 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | 257 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) |
225 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); | 258 | update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); |
226 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | 259 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) |
227 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); | 260 | update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); |
228 | else if (perf_stat_evsel__is(counter, SMI_NUM)) | 261 | else if (perf_stat_evsel__is(counter, SMI_NUM)) |
229 | update_stats(&runtime_smi_num_stats[ctx][cpu], count); | 262 | update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); |
230 | else if (perf_stat_evsel__is(counter, APERF)) | 263 | else if (perf_stat_evsel__is(counter, APERF)) |
231 | update_stats(&runtime_aperf_stats[ctx][cpu], count); | 264 | update_runtime_stat(st, STAT_APERF, ctx, cpu, count); |
232 | 265 | ||
233 | if (counter->collect_stat) { | 266 | if (counter->collect_stat) { |
234 | struct saved_value *v = saved_value_lookup(counter, cpu, true); | 267 | struct saved_value *v = saved_value_lookup(counter, cpu, true, |
268 | STAT_NONE, 0, st); | ||
235 | update_stats(&v->stats, count); | 269 | update_stats(&v->stats, count); |
236 | } | 270 | } |
237 | } | 271 | } |
@@ -352,15 +386,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) | |||
352 | } | 386 | } |
353 | } | 387 | } |
354 | 388 | ||
389 | static double runtime_stat_avg(struct runtime_stat *st, | ||
390 | enum stat_type type, int ctx, int cpu) | ||
391 | { | ||
392 | struct saved_value *v; | ||
393 | |||
394 | v = saved_value_lookup(NULL, cpu, false, type, ctx, st); | ||
395 | if (!v) | ||
396 | return 0.0; | ||
397 | |||
398 | return avg_stats(&v->stats); | ||
399 | } | ||
400 | |||
401 | static double runtime_stat_n(struct runtime_stat *st, | ||
402 | enum stat_type type, int ctx, int cpu) | ||
403 | { | ||
404 | struct saved_value *v; | ||
405 | |||
406 | v = saved_value_lookup(NULL, cpu, false, type, ctx, st); | ||
407 | if (!v) | ||
408 | return 0.0; | ||
409 | |||
410 | return v->stats.n; | ||
411 | } | ||
412 | |||
355 | static void print_stalled_cycles_frontend(int cpu, | 413 | static void print_stalled_cycles_frontend(int cpu, |
356 | struct perf_evsel *evsel, double avg, | 414 | struct perf_evsel *evsel, double avg, |
357 | struct perf_stat_output_ctx *out) | 415 | struct perf_stat_output_ctx *out, |
416 | struct runtime_stat *st) | ||
358 | { | 417 | { |
359 | double total, ratio = 0.0; | 418 | double total, ratio = 0.0; |
360 | const char *color; | 419 | const char *color; |
361 | int ctx = evsel_context(evsel); | 420 | int ctx = evsel_context(evsel); |
362 | 421 | ||
363 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 422 | total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
364 | 423 | ||
365 | if (total) | 424 | if (total) |
366 | ratio = avg / total * 100.0; | 425 | ratio = avg / total * 100.0; |
@@ -376,13 +435,14 @@ static void print_stalled_cycles_frontend(int cpu, | |||
376 | 435 | ||
377 | static void print_stalled_cycles_backend(int cpu, | 436 | static void print_stalled_cycles_backend(int cpu, |
378 | struct perf_evsel *evsel, double avg, | 437 | struct perf_evsel *evsel, double avg, |
379 | struct perf_stat_output_ctx *out) | 438 | struct perf_stat_output_ctx *out, |
439 | struct runtime_stat *st) | ||
380 | { | 440 | { |
381 | double total, ratio = 0.0; | 441 | double total, ratio = 0.0; |
382 | const char *color; | 442 | const char *color; |
383 | int ctx = evsel_context(evsel); | 443 | int ctx = evsel_context(evsel); |
384 | 444 | ||
385 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 445 | total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
386 | 446 | ||
387 | if (total) | 447 | if (total) |
388 | ratio = avg / total * 100.0; | 448 | ratio = avg / total * 100.0; |
@@ -395,13 +455,14 @@ static void print_stalled_cycles_backend(int cpu, | |||
395 | static void print_branch_misses(int cpu, | 455 | static void print_branch_misses(int cpu, |
396 | struct perf_evsel *evsel, | 456 | struct perf_evsel *evsel, |
397 | double avg, | 457 | double avg, |
398 | struct perf_stat_output_ctx *out) | 458 | struct perf_stat_output_ctx *out, |
459 | struct runtime_stat *st) | ||
399 | { | 460 | { |
400 | double total, ratio = 0.0; | 461 | double total, ratio = 0.0; |
401 | const char *color; | 462 | const char *color; |
402 | int ctx = evsel_context(evsel); | 463 | int ctx = evsel_context(evsel); |
403 | 464 | ||
404 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | 465 | total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); |
405 | 466 | ||
406 | if (total) | 467 | if (total) |
407 | ratio = avg / total * 100.0; | 468 | ratio = avg / total * 100.0; |
@@ -414,13 +475,15 @@ static void print_branch_misses(int cpu, | |||
414 | static void print_l1_dcache_misses(int cpu, | 475 | static void print_l1_dcache_misses(int cpu, |
415 | struct perf_evsel *evsel, | 476 | struct perf_evsel *evsel, |
416 | double avg, | 477 | double avg, |
417 | struct perf_stat_output_ctx *out) | 478 | struct perf_stat_output_ctx *out, |
479 | struct runtime_stat *st) | ||
480 | |||
418 | { | 481 | { |
419 | double total, ratio = 0.0; | 482 | double total, ratio = 0.0; |
420 | const char *color; | 483 | const char *color; |
421 | int ctx = evsel_context(evsel); | 484 | int ctx = evsel_context(evsel); |
422 | 485 | ||
423 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | 486 | total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); |
424 | 487 | ||
425 | if (total) | 488 | if (total) |
426 | ratio = avg / total * 100.0; | 489 | ratio = avg / total * 100.0; |
@@ -433,13 +496,15 @@ static void print_l1_dcache_misses(int cpu, | |||
433 | static void print_l1_icache_misses(int cpu, | 496 | static void print_l1_icache_misses(int cpu, |
434 | struct perf_evsel *evsel, | 497 | struct perf_evsel *evsel, |
435 | double avg, | 498 | double avg, |
436 | struct perf_stat_output_ctx *out) | 499 | struct perf_stat_output_ctx *out, |
500 | struct runtime_stat *st) | ||
501 | |||
437 | { | 502 | { |
438 | double total, ratio = 0.0; | 503 | double total, ratio = 0.0; |
439 | const char *color; | 504 | const char *color; |
440 | int ctx = evsel_context(evsel); | 505 | int ctx = evsel_context(evsel); |
441 | 506 | ||
442 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | 507 | total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); |
443 | 508 | ||
444 | if (total) | 509 | if (total) |
445 | ratio = avg / total * 100.0; | 510 | ratio = avg / total * 100.0; |
@@ -451,13 +516,14 @@ static void print_l1_icache_misses(int cpu, | |||
451 | static void print_dtlb_cache_misses(int cpu, | 516 | static void print_dtlb_cache_misses(int cpu, |
452 | struct perf_evsel *evsel, | 517 | struct perf_evsel *evsel, |
453 | double avg, | 518 | double avg, |
454 | struct perf_stat_output_ctx *out) | 519 | struct perf_stat_output_ctx *out, |
520 | struct runtime_stat *st) | ||
455 | { | 521 | { |
456 | double total, ratio = 0.0; | 522 | double total, ratio = 0.0; |
457 | const char *color; | 523 | const char *color; |
458 | int ctx = evsel_context(evsel); | 524 | int ctx = evsel_context(evsel); |
459 | 525 | ||
460 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | 526 | total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); |
461 | 527 | ||
462 | if (total) | 528 | if (total) |
463 | ratio = avg / total * 100.0; | 529 | ratio = avg / total * 100.0; |
@@ -469,13 +535,14 @@ static void print_dtlb_cache_misses(int cpu, | |||
469 | static void print_itlb_cache_misses(int cpu, | 535 | static void print_itlb_cache_misses(int cpu, |
470 | struct perf_evsel *evsel, | 536 | struct perf_evsel *evsel, |
471 | double avg, | 537 | double avg, |
472 | struct perf_stat_output_ctx *out) | 538 | struct perf_stat_output_ctx *out, |
539 | struct runtime_stat *st) | ||
473 | { | 540 | { |
474 | double total, ratio = 0.0; | 541 | double total, ratio = 0.0; |
475 | const char *color; | 542 | const char *color; |
476 | int ctx = evsel_context(evsel); | 543 | int ctx = evsel_context(evsel); |
477 | 544 | ||
478 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | 545 | total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); |
479 | 546 | ||
480 | if (total) | 547 | if (total) |
481 | ratio = avg / total * 100.0; | 548 | ratio = avg / total * 100.0; |
@@ -487,13 +554,14 @@ static void print_itlb_cache_misses(int cpu, | |||
487 | static void print_ll_cache_misses(int cpu, | 554 | static void print_ll_cache_misses(int cpu, |
488 | struct perf_evsel *evsel, | 555 | struct perf_evsel *evsel, |
489 | double avg, | 556 | double avg, |
490 | struct perf_stat_output_ctx *out) | 557 | struct perf_stat_output_ctx *out, |
558 | struct runtime_stat *st) | ||
491 | { | 559 | { |
492 | double total, ratio = 0.0; | 560 | double total, ratio = 0.0; |
493 | const char *color; | 561 | const char *color; |
494 | int ctx = evsel_context(evsel); | 562 | int ctx = evsel_context(evsel); |
495 | 563 | ||
496 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | 564 | total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); |
497 | 565 | ||
498 | if (total) | 566 | if (total) |
499 | ratio = avg / total * 100.0; | 567 | ratio = avg / total * 100.0; |
@@ -551,68 +619,72 @@ static double sanitize_val(double x) | |||
551 | return x; | 619 | return x; |
552 | } | 620 | } |
553 | 621 | ||
554 | static double td_total_slots(int ctx, int cpu) | 622 | static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) |
555 | { | 623 | { |
556 | return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); | 624 | return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); |
557 | } | 625 | } |
558 | 626 | ||
559 | static double td_bad_spec(int ctx, int cpu) | 627 | static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) |
560 | { | 628 | { |
561 | double bad_spec = 0; | 629 | double bad_spec = 0; |
562 | double total_slots; | 630 | double total_slots; |
563 | double total; | 631 | double total; |
564 | 632 | ||
565 | total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - | 633 | total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - |
566 | avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + | 634 | runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + |
567 | avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); | 635 | runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); |
568 | total_slots = td_total_slots(ctx, cpu); | 636 | |
637 | total_slots = td_total_slots(ctx, cpu, st); | ||
569 | if (total_slots) | 638 | if (total_slots) |
570 | bad_spec = total / total_slots; | 639 | bad_spec = total / total_slots; |
571 | return sanitize_val(bad_spec); | 640 | return sanitize_val(bad_spec); |
572 | } | 641 | } |
573 | 642 | ||
574 | static double td_retiring(int ctx, int cpu) | 643 | static double td_retiring(int ctx, int cpu, struct runtime_stat *st) |
575 | { | 644 | { |
576 | double retiring = 0; | 645 | double retiring = 0; |
577 | double total_slots = td_total_slots(ctx, cpu); | 646 | double total_slots = td_total_slots(ctx, cpu, st); |
578 | double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); | 647 | double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, |
648 | ctx, cpu); | ||
579 | 649 | ||
580 | if (total_slots) | 650 | if (total_slots) |
581 | retiring = ret_slots / total_slots; | 651 | retiring = ret_slots / total_slots; |
582 | return retiring; | 652 | return retiring; |
583 | } | 653 | } |
584 | 654 | ||
585 | static double td_fe_bound(int ctx, int cpu) | 655 | static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) |
586 | { | 656 | { |
587 | double fe_bound = 0; | 657 | double fe_bound = 0; |
588 | double total_slots = td_total_slots(ctx, cpu); | 658 | double total_slots = td_total_slots(ctx, cpu, st); |
589 | double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); | 659 | double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, |
660 | ctx, cpu); | ||
590 | 661 | ||
591 | if (total_slots) | 662 | if (total_slots) |
592 | fe_bound = fetch_bub / total_slots; | 663 | fe_bound = fetch_bub / total_slots; |
593 | return fe_bound; | 664 | return fe_bound; |
594 | } | 665 | } |
595 | 666 | ||
596 | static double td_be_bound(int ctx, int cpu) | 667 | static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) |
597 | { | 668 | { |
598 | double sum = (td_fe_bound(ctx, cpu) + | 669 | double sum = (td_fe_bound(ctx, cpu, st) + |
599 | td_bad_spec(ctx, cpu) + | 670 | td_bad_spec(ctx, cpu, st) + |
600 | td_retiring(ctx, cpu)); | 671 | td_retiring(ctx, cpu, st)); |
601 | if (sum == 0) | 672 | if (sum == 0) |
602 | return 0; | 673 | return 0; |
603 | return sanitize_val(1.0 - sum); | 674 | return sanitize_val(1.0 - sum); |
604 | } | 675 | } |
605 | 676 | ||
606 | static void print_smi_cost(int cpu, struct perf_evsel *evsel, | 677 | static void print_smi_cost(int cpu, struct perf_evsel *evsel, |
607 | struct perf_stat_output_ctx *out) | 678 | struct perf_stat_output_ctx *out, |
679 | struct runtime_stat *st) | ||
608 | { | 680 | { |
609 | double smi_num, aperf, cycles, cost = 0.0; | 681 | double smi_num, aperf, cycles, cost = 0.0; |
610 | int ctx = evsel_context(evsel); | 682 | int ctx = evsel_context(evsel); |
611 | const char *color = NULL; | 683 | const char *color = NULL; |
612 | 684 | ||
613 | smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); | 685 | smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); |
614 | aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); | 686 | aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); |
615 | cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 687 | cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
616 | 688 | ||
617 | if ((cycles == 0) || (aperf == 0)) | 689 | if ((cycles == 0) || (aperf == 0)) |
618 | return; | 690 | return; |
@@ -632,7 +704,8 @@ static void generic_metric(const char *metric_expr, | |||
632 | const char *metric_name, | 704 | const char *metric_name, |
633 | double avg, | 705 | double avg, |
634 | int cpu, | 706 | int cpu, |
635 | struct perf_stat_output_ctx *out) | 707 | struct perf_stat_output_ctx *out, |
708 | struct runtime_stat *st) | ||
636 | { | 709 | { |
637 | print_metric_t print_metric = out->print_metric; | 710 | print_metric_t print_metric = out->print_metric; |
638 | struct parse_ctx pctx; | 711 | struct parse_ctx pctx; |
@@ -651,7 +724,8 @@ static void generic_metric(const char *metric_expr, | |||
651 | stats = &walltime_nsecs_stats; | 724 | stats = &walltime_nsecs_stats; |
652 | scale = 1e-9; | 725 | scale = 1e-9; |
653 | } else { | 726 | } else { |
654 | v = saved_value_lookup(metric_events[i], cpu, false); | 727 | v = saved_value_lookup(metric_events[i], cpu, false, |
728 | STAT_NONE, 0, st); | ||
655 | if (!v) | 729 | if (!v) |
656 | break; | 730 | break; |
657 | stats = &v->stats; | 731 | stats = &v->stats; |
@@ -679,7 +753,8 @@ static void generic_metric(const char *metric_expr, | |||
679 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | 753 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
680 | double avg, int cpu, | 754 | double avg, int cpu, |
681 | struct perf_stat_output_ctx *out, | 755 | struct perf_stat_output_ctx *out, |
682 | struct rblist *metric_events) | 756 | struct rblist *metric_events, |
757 | struct runtime_stat *st) | ||
683 | { | 758 | { |
684 | void *ctxp = out->ctx; | 759 | void *ctxp = out->ctx; |
685 | print_metric_t print_metric = out->print_metric; | 760 | print_metric_t print_metric = out->print_metric; |
@@ -690,7 +765,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
690 | int num = 1; | 765 | int num = 1; |
691 | 766 | ||
692 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | 767 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { |
693 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 768 | total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
769 | |||
694 | if (total) { | 770 | if (total) { |
695 | ratio = avg / total; | 771 | ratio = avg / total; |
696 | print_metric(ctxp, NULL, "%7.2f ", | 772 | print_metric(ctxp, NULL, "%7.2f ", |
@@ -698,8 +774,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
698 | } else { | 774 | } else { |
699 | print_metric(ctxp, NULL, NULL, "insn per cycle", 0); | 775 | print_metric(ctxp, NULL, NULL, "insn per cycle", 0); |
700 | } | 776 | } |
701 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | 777 | |
702 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | 778 | total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, |
779 | ctx, cpu); | ||
780 | |||
781 | total = max(total, runtime_stat_avg(st, | ||
782 | STAT_STALLED_CYCLES_BACK, | ||
783 | ctx, cpu)); | ||
703 | 784 | ||
704 | if (total && avg) { | 785 | if (total && avg) { |
705 | out->new_line(ctxp); | 786 | out->new_line(ctxp); |
@@ -712,8 +793,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
712 | "stalled cycles per insn", 0); | 793 | "stalled cycles per insn", 0); |
713 | } | 794 | } |
714 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { | 795 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { |
715 | if (runtime_branches_stats[ctx][cpu].n != 0) | 796 | if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) |
716 | print_branch_misses(cpu, evsel, avg, out); | 797 | print_branch_misses(cpu, evsel, avg, out, st); |
717 | else | 798 | else |
718 | print_metric(ctxp, NULL, NULL, "of all branches", 0); | 799 | print_metric(ctxp, NULL, NULL, "of all branches", 0); |
719 | } else if ( | 800 | } else if ( |
@@ -721,8 +802,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
721 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | 802 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | |
722 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 803 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
723 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { | 804 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
724 | if (runtime_l1_dcache_stats[ctx][cpu].n != 0) | 805 | |
725 | print_l1_dcache_misses(cpu, evsel, avg, out); | 806 | if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) |
807 | print_l1_dcache_misses(cpu, evsel, avg, out, st); | ||
726 | else | 808 | else |
727 | print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); | 809 | print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); |
728 | } else if ( | 810 | } else if ( |
@@ -730,8 +812,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
730 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | 812 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | |
731 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 813 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
732 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { | 814 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
733 | if (runtime_l1_icache_stats[ctx][cpu].n != 0) | 815 | |
734 | print_l1_icache_misses(cpu, evsel, avg, out); | 816 | if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) |
817 | print_l1_icache_misses(cpu, evsel, avg, out, st); | ||
735 | else | 818 | else |
736 | print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); | 819 | print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); |
737 | } else if ( | 820 | } else if ( |
@@ -739,8 +822,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
739 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | 822 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | |
740 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 823 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
741 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { | 824 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
742 | if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) | 825 | |
743 | print_dtlb_cache_misses(cpu, evsel, avg, out); | 826 | if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) |
827 | print_dtlb_cache_misses(cpu, evsel, avg, out, st); | ||
744 | else | 828 | else |
745 | print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); | 829 | print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); |
746 | } else if ( | 830 | } else if ( |
@@ -748,8 +832,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
748 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | 832 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | |
749 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 833 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
750 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { | 834 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
751 | if (runtime_itlb_cache_stats[ctx][cpu].n != 0) | 835 | |
752 | print_itlb_cache_misses(cpu, evsel, avg, out); | 836 | if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) |
837 | print_itlb_cache_misses(cpu, evsel, avg, out, st); | ||
753 | else | 838 | else |
754 | print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); | 839 | print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); |
755 | } else if ( | 840 | } else if ( |
@@ -757,27 +842,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
757 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | 842 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | |
758 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 843 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
759 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { | 844 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
760 | if (runtime_ll_cache_stats[ctx][cpu].n != 0) | 845 | |
761 | print_ll_cache_misses(cpu, evsel, avg, out); | 846 | if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) |
847 | print_ll_cache_misses(cpu, evsel, avg, out, st); | ||
762 | else | 848 | else |
763 | print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); | 849 | print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); |
764 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { | 850 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { |
765 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); | 851 | total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); |
766 | 852 | ||
767 | if (total) | 853 | if (total) |
768 | ratio = avg * 100 / total; | 854 | ratio = avg * 100 / total; |
769 | 855 | ||
770 | if (runtime_cacherefs_stats[ctx][cpu].n != 0) | 856 | if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) |
771 | print_metric(ctxp, NULL, "%8.3f %%", | 857 | print_metric(ctxp, NULL, "%8.3f %%", |
772 | "of all cache refs", ratio); | 858 | "of all cache refs", ratio); |
773 | else | 859 | else |
774 | print_metric(ctxp, NULL, NULL, "of all cache refs", 0); | 860 | print_metric(ctxp, NULL, NULL, "of all cache refs", 0); |
775 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | 861 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { |
776 | print_stalled_cycles_frontend(cpu, evsel, avg, out); | 862 | print_stalled_cycles_frontend(cpu, evsel, avg, out, st); |
777 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | 863 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { |
778 | print_stalled_cycles_backend(cpu, evsel, avg, out); | 864 | print_stalled_cycles_backend(cpu, evsel, avg, out, st); |
779 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | 865 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { |
780 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 866 | total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); |
781 | 867 | ||
782 | if (total) { | 868 | if (total) { |
783 | ratio = avg / total; | 869 | ratio = avg / total; |
@@ -786,7 +872,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
786 | print_metric(ctxp, NULL, NULL, "Ghz", 0); | 872 | print_metric(ctxp, NULL, NULL, "Ghz", 0); |
787 | } | 873 | } |
788 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | 874 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { |
789 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 875 | total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
876 | |||
790 | if (total) | 877 | if (total) |
791 | print_metric(ctxp, NULL, | 878 | print_metric(ctxp, NULL, |
792 | "%7.2f%%", "transactional cycles", | 879 | "%7.2f%%", "transactional cycles", |
@@ -795,8 +882,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
795 | print_metric(ctxp, NULL, NULL, "transactional cycles", | 882 | print_metric(ctxp, NULL, NULL, "transactional cycles", |
796 | 0); | 883 | 0); |
797 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { | 884 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { |
798 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | 885 | total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); |
799 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | 886 | total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); |
887 | |||
800 | if (total2 < avg) | 888 | if (total2 < avg) |
801 | total2 = avg; | 889 | total2 = avg; |
802 | if (total) | 890 | if (total) |
@@ -805,19 +893,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
805 | else | 893 | else |
806 | print_metric(ctxp, NULL, NULL, "aborted cycles", 0); | 894 | print_metric(ctxp, NULL, NULL, "aborted cycles", 0); |
807 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { | 895 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { |
808 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | 896 | total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, |
897 | ctx, cpu); | ||
809 | 898 | ||
810 | if (avg) | 899 | if (avg) |
811 | ratio = total / avg; | 900 | ratio = total / avg; |
812 | 901 | ||
813 | if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) | 902 | if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) |
814 | print_metric(ctxp, NULL, "%8.0f", | 903 | print_metric(ctxp, NULL, "%8.0f", |
815 | "cycles / transaction", ratio); | 904 | "cycles / transaction", ratio); |
816 | else | 905 | else |
817 | print_metric(ctxp, NULL, NULL, "cycles / transaction", | 906 | print_metric(ctxp, NULL, NULL, "cycles / transaction", |
818 | 0); | 907 | 0); |
819 | } else if (perf_stat_evsel__is(evsel, ELISION_START)) { | 908 | } else if (perf_stat_evsel__is(evsel, ELISION_START)) { |
820 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | 909 | total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, |
910 | ctx, cpu); | ||
821 | 911 | ||
822 | if (avg) | 912 | if (avg) |
823 | ratio = total / avg; | 913 | ratio = total / avg; |
@@ -831,28 +921,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
831 | else | 921 | else |
832 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); | 922 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); |
833 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { | 923 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { |
834 | double fe_bound = td_fe_bound(ctx, cpu); | 924 | double fe_bound = td_fe_bound(ctx, cpu, st); |
835 | 925 | ||
836 | if (fe_bound > 0.2) | 926 | if (fe_bound > 0.2) |
837 | color = PERF_COLOR_RED; | 927 | color = PERF_COLOR_RED; |
838 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", | 928 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", |
839 | fe_bound * 100.); | 929 | fe_bound * 100.); |
840 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { | 930 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { |
841 | double retiring = td_retiring(ctx, cpu); | 931 | double retiring = td_retiring(ctx, cpu, st); |
842 | 932 | ||
843 | if (retiring > 0.7) | 933 | if (retiring > 0.7) |
844 | color = PERF_COLOR_GREEN; | 934 | color = PERF_COLOR_GREEN; |
845 | print_metric(ctxp, color, "%8.1f%%", "retiring", | 935 | print_metric(ctxp, color, "%8.1f%%", "retiring", |
846 | retiring * 100.); | 936 | retiring * 100.); |
847 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { | 937 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { |
848 | double bad_spec = td_bad_spec(ctx, cpu); | 938 | double bad_spec = td_bad_spec(ctx, cpu, st); |
849 | 939 | ||
850 | if (bad_spec > 0.1) | 940 | if (bad_spec > 0.1) |
851 | color = PERF_COLOR_RED; | 941 | color = PERF_COLOR_RED; |
852 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", | 942 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", |
853 | bad_spec * 100.); | 943 | bad_spec * 100.); |
854 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { | 944 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { |
855 | double be_bound = td_be_bound(ctx, cpu); | 945 | double be_bound = td_be_bound(ctx, cpu, st); |
856 | const char *name = "backend bound"; | 946 | const char *name = "backend bound"; |
857 | static int have_recovery_bubbles = -1; | 947 | static int have_recovery_bubbles = -1; |
858 | 948 | ||
@@ -865,19 +955,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
865 | 955 | ||
866 | if (be_bound > 0.2) | 956 | if (be_bound > 0.2) |
867 | color = PERF_COLOR_RED; | 957 | color = PERF_COLOR_RED; |
868 | if (td_total_slots(ctx, cpu) > 0) | 958 | if (td_total_slots(ctx, cpu, st) > 0) |
869 | print_metric(ctxp, color, "%8.1f%%", name, | 959 | print_metric(ctxp, color, "%8.1f%%", name, |
870 | be_bound * 100.); | 960 | be_bound * 100.); |
871 | else | 961 | else |
872 | print_metric(ctxp, NULL, NULL, name, 0); | 962 | print_metric(ctxp, NULL, NULL, name, 0); |
873 | } else if (evsel->metric_expr) { | 963 | } else if (evsel->metric_expr) { |
874 | generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, | 964 | generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, |
875 | evsel->metric_name, avg, cpu, out); | 965 | evsel->metric_name, avg, cpu, out, st); |
876 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 966 | } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { |
877 | char unit = 'M'; | 967 | char unit = 'M'; |
878 | char unit_buf[10]; | 968 | char unit_buf[10]; |
879 | 969 | ||
880 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 970 | total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); |
881 | 971 | ||
882 | if (total) | 972 | if (total) |
883 | ratio = 1000.0 * avg / total; | 973 | ratio = 1000.0 * avg / total; |
@@ -888,7 +978,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
888 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); | 978 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); |
889 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); | 979 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); |
890 | } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { | 980 | } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { |
891 | print_smi_cost(cpu, evsel, out); | 981 | print_smi_cost(cpu, evsel, out, st); |
892 | } else { | 982 | } else { |
893 | num = 0; | 983 | num = 0; |
894 | } | 984 | } |
@@ -901,7 +991,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
901 | out->new_line(ctxp); | 991 | out->new_line(ctxp); |
902 | generic_metric(mexp->metric_expr, mexp->metric_events, | 992 | generic_metric(mexp->metric_expr, mexp->metric_events, |
903 | evsel->name, mexp->metric_name, | 993 | evsel->name, mexp->metric_name, |
904 | avg, cpu, out); | 994 | avg, cpu, out, st); |
905 | } | 995 | } |
906 | } | 996 | } |
907 | if (num == 0) | 997 | if (num == 0) |