diff options
Diffstat (limited to 'tools/perf/util/stat-shadow.c')
| -rw-r--r-- | tools/perf/util/stat-shadow.c | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8a2bbd2a4d82..ac10cc675d39 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
| @@ -3,6 +3,9 @@ | |||
| 3 | #include "stat.h" | 3 | #include "stat.h" |
| 4 | #include "color.h" | 4 | #include "color.h" |
| 5 | #include "pmu.h" | 5 | #include "pmu.h" |
| 6 | #include "rblist.h" | ||
| 7 | #include "evlist.h" | ||
| 8 | #include "expr.h" | ||
| 6 | 9 | ||
| 7 | enum { | 10 | enum { |
| 8 | CTX_BIT_USER = 1 << 0, | 11 | CTX_BIT_USER = 1 << 0, |
| @@ -41,13 +44,73 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | |||
| 41 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | 44 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; |
| 42 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | 45 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; |
| 43 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | 46 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; |
| 47 | static struct rblist runtime_saved_values; | ||
| 44 | static bool have_frontend_stalled; | 48 | static bool have_frontend_stalled; |
| 45 | 49 | ||
| 46 | struct stats walltime_nsecs_stats; | 50 | struct stats walltime_nsecs_stats; |
| 47 | 51 | ||
| 52 | struct saved_value { | ||
| 53 | struct rb_node rb_node; | ||
| 54 | struct perf_evsel *evsel; | ||
| 55 | int cpu; | ||
| 56 | int ctx; | ||
| 57 | struct stats stats; | ||
| 58 | }; | ||
| 59 | |||
| 60 | static int saved_value_cmp(struct rb_node *rb_node, const void *entry) | ||
| 61 | { | ||
| 62 | struct saved_value *a = container_of(rb_node, | ||
| 63 | struct saved_value, | ||
| 64 | rb_node); | ||
| 65 | const struct saved_value *b = entry; | ||
| 66 | |||
| 67 | if (a->ctx != b->ctx) | ||
| 68 | return a->ctx - b->ctx; | ||
| 69 | if (a->cpu != b->cpu) | ||
| 70 | return a->cpu - b->cpu; | ||
| 71 | return a->evsel - b->evsel; | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, | ||
| 75 | const void *entry) | ||
| 76 | { | ||
| 77 | struct saved_value *nd = malloc(sizeof(struct saved_value)); | ||
| 78 | |||
| 79 | if (!nd) | ||
| 80 | return NULL; | ||
| 81 | memcpy(nd, entry, sizeof(struct saved_value)); | ||
| 82 | return &nd->rb_node; | ||
| 83 | } | ||
| 84 | |||
| 85 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, | ||
| 86 | int cpu, int ctx, | ||
| 87 | bool create) | ||
| 88 | { | ||
| 89 | struct rb_node *nd; | ||
| 90 | struct saved_value dm = { | ||
| 91 | .cpu = cpu, | ||
| 92 | .ctx = ctx, | ||
| 93 | .evsel = evsel, | ||
| 94 | }; | ||
| 95 | nd = rblist__find(&runtime_saved_values, &dm); | ||
| 96 | if (nd) | ||
| 97 | return container_of(nd, struct saved_value, rb_node); | ||
| 98 | if (create) { | ||
| 99 | rblist__add_node(&runtime_saved_values, &dm); | ||
| 100 | nd = rblist__find(&runtime_saved_values, &dm); | ||
| 101 | if (nd) | ||
| 102 | return container_of(nd, struct saved_value, rb_node); | ||
| 103 | } | ||
| 104 | return NULL; | ||
| 105 | } | ||
| 106 | |||
| 48 | void perf_stat__init_shadow_stats(void) | 107 | void perf_stat__init_shadow_stats(void) |
| 49 | { | 108 | { |
| 50 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); | 109 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); |
| 110 | rblist__init(&runtime_saved_values); | ||
| 111 | runtime_saved_values.node_cmp = saved_value_cmp; | ||
| 112 | runtime_saved_values.node_new = saved_value_new; | ||
| 113 | /* No delete for now */ | ||
| 51 | } | 114 | } |
| 52 | 115 | ||
| 53 | static int evsel_context(struct perf_evsel *evsel) | 116 | static int evsel_context(struct perf_evsel *evsel) |
| @@ -70,6 +133,8 @@ static int evsel_context(struct perf_evsel *evsel) | |||
| 70 | 133 | ||
| 71 | void perf_stat__reset_shadow_stats(void) | 134 | void perf_stat__reset_shadow_stats(void) |
| 72 | { | 135 | { |
| 136 | struct rb_node *pos, *next; | ||
| 137 | |||
| 73 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | 138 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); |
| 74 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | 139 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); |
| 75 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | 140 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); |
| @@ -92,6 +157,15 @@ void perf_stat__reset_shadow_stats(void) | |||
| 92 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | 157 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); |
| 93 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | 158 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); |
| 94 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | 159 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); |
| 160 | |||
| 161 | next = rb_first(&runtime_saved_values.entries); | ||
| 162 | while (next) { | ||
| 163 | pos = next; | ||
| 164 | next = rb_next(pos); | ||
| 165 | memset(&container_of(pos, struct saved_value, rb_node)->stats, | ||
| 166 | 0, | ||
| 167 | sizeof(struct stats)); | ||
| 168 | } | ||
| 95 | } | 169 | } |
| 96 | 170 | ||
| 97 | /* | 171 | /* |
| @@ -143,6 +217,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |||
| 143 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | 217 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); |
| 144 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | 218 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) |
| 145 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | 219 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); |
| 220 | |||
| 221 | if (counter->collect_stat) { | ||
| 222 | struct saved_value *v = saved_value_lookup(counter, cpu, ctx, | ||
| 223 | true); | ||
| 224 | update_stats(&v->stats, count[0]); | ||
| 225 | } | ||
| 146 | } | 226 | } |
| 147 | 227 | ||
| 148 | /* used for get_ratio_color() */ | 228 | /* used for get_ratio_color() */ |
| @@ -172,6 +252,95 @@ static const char *get_ratio_color(enum grc_type type, double ratio) | |||
| 172 | return color; | 252 | return color; |
| 173 | } | 253 | } |
| 174 | 254 | ||
| 255 | static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, | ||
| 256 | const char *name) | ||
| 257 | { | ||
| 258 | struct perf_evsel *c2; | ||
| 259 | |||
| 260 | evlist__for_each_entry (evsel_list, c2) { | ||
| 261 | if (!strcasecmp(c2->name, name)) | ||
| 262 | return c2; | ||
| 263 | } | ||
| 264 | return NULL; | ||
| 265 | } | ||
| 266 | |||
| 267 | /* Mark MetricExpr target events and link events using them to them. */ | ||
| 268 | void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) | ||
| 269 | { | ||
| 270 | struct perf_evsel *counter, *leader, **metric_events, *oc; | ||
| 271 | bool found; | ||
| 272 | const char **metric_names; | ||
| 273 | int i; | ||
| 274 | int num_metric_names; | ||
| 275 | |||
| 276 | evlist__for_each_entry(evsel_list, counter) { | ||
| 277 | bool invalid = false; | ||
| 278 | |||
| 279 | leader = counter->leader; | ||
| 280 | if (!counter->metric_expr) | ||
| 281 | continue; | ||
| 282 | metric_events = counter->metric_events; | ||
| 283 | if (!metric_events) { | ||
| 284 | if (expr__find_other(counter->metric_expr, counter->name, | ||
| 285 | &metric_names, &num_metric_names) < 0) | ||
| 286 | continue; | ||
| 287 | |||
| 288 | metric_events = calloc(sizeof(struct perf_evsel *), | ||
| 289 | num_metric_names + 1); | ||
| 290 | if (!metric_events) | ||
| 291 | return; | ||
| 292 | counter->metric_events = metric_events; | ||
| 293 | } | ||
| 294 | |||
| 295 | for (i = 0; i < num_metric_names; i++) { | ||
| 296 | found = false; | ||
| 297 | if (leader) { | ||
| 298 | /* Search in group */ | ||
| 299 | for_each_group_member (oc, leader) { | ||
| 300 | if (!strcasecmp(oc->name, metric_names[i])) { | ||
| 301 | found = true; | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | } | ||
| 306 | if (!found) { | ||
| 307 | /* Search ignoring groups */ | ||
| 308 | oc = perf_stat__find_event(evsel_list, metric_names[i]); | ||
| 309 | } | ||
| 310 | if (!oc) { | ||
| 311 | /* Deduping one is good enough to handle duplicated PMUs. */ | ||
| 312 | static char *printed; | ||
| 313 | |||
| 314 | /* | ||
| 315 | * Adding events automatically would be difficult, because | ||
| 316 | * it would risk creating groups that are not schedulable. | ||
| 317 | * perf stat doesn't understand all the scheduling constraints | ||
| 318 | * of events. So we ask the user instead to add the missing | ||
| 319 | * events. | ||
| 320 | */ | ||
| 321 | if (!printed || strcasecmp(printed, metric_names[i])) { | ||
| 322 | fprintf(stderr, | ||
| 323 | "Add %s event to groups to get metric expression for %s\n", | ||
| 324 | metric_names[i], | ||
| 325 | counter->name); | ||
| 326 | printed = strdup(metric_names[i]); | ||
| 327 | } | ||
| 328 | invalid = true; | ||
| 329 | continue; | ||
| 330 | } | ||
| 331 | metric_events[i] = oc; | ||
| 332 | oc->collect_stat = true; | ||
| 333 | } | ||
| 334 | metric_events[i] = NULL; | ||
| 335 | free(metric_names); | ||
| 336 | if (invalid) { | ||
| 337 | free(metric_events); | ||
| 338 | counter->metric_events = NULL; | ||
| 339 | counter->metric_expr = NULL; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 175 | static void print_stalled_cycles_frontend(int cpu, | 344 | static void print_stalled_cycles_frontend(int cpu, |
| 176 | struct perf_evsel *evsel, double avg, | 345 | struct perf_evsel *evsel, double avg, |
| 177 | struct perf_stat_output_ctx *out) | 346 | struct perf_stat_output_ctx *out) |
| @@ -614,6 +783,34 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, | |||
| 614 | be_bound * 100.); | 783 | be_bound * 100.); |
| 615 | else | 784 | else |
| 616 | print_metric(ctxp, NULL, NULL, name, 0); | 785 | print_metric(ctxp, NULL, NULL, name, 0); |
| 786 | } else if (evsel->metric_expr) { | ||
| 787 | struct parse_ctx pctx; | ||
| 788 | int i; | ||
| 789 | |||
| 790 | expr__ctx_init(&pctx); | ||
| 791 | expr__add_id(&pctx, evsel->name, avg); | ||
| 792 | for (i = 0; evsel->metric_events[i]; i++) { | ||
| 793 | struct saved_value *v; | ||
| 794 | |||
| 795 | v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); | ||
| 796 | if (!v) | ||
| 797 | break; | ||
| 798 | expr__add_id(&pctx, evsel->metric_events[i]->name, | ||
| 799 | avg_stats(&v->stats)); | ||
| 800 | } | ||
| 801 | if (!evsel->metric_events[i]) { | ||
| 802 | const char *p = evsel->metric_expr; | ||
| 803 | |||
| 804 | if (expr__parse(&ratio, &pctx, &p) == 0) | ||
| 805 | print_metric(ctxp, NULL, "%8.1f", | ||
| 806 | evsel->metric_name ? | ||
| 807 | evsel->metric_name : | ||
| 808 | out->force_header ? evsel->name : "", | ||
| 809 | ratio); | ||
| 810 | else | ||
| 811 | print_metric(ctxp, NULL, NULL, "", 0); | ||
| 812 | } else | ||
| 813 | print_metric(ctxp, NULL, NULL, "", 0); | ||
| 617 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 814 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
| 618 | char unit = 'M'; | 815 | char unit = 'M'; |
| 619 | char unit_buf[10]; | 816 | char unit_buf[10]; |
