aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-script.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2017-11-17 16:43:00 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-11-29 16:18:01 -0500
commit4bd1bef8bba2f99ff472ae3617864dda301f81bd (patch)
treeab5ca9f0d5c4da82f9ef15915aa202f8419a4e43 /tools/perf/builtin-script.c
parent373565d285e8d2113f1b6c0a2e461b9c8d0da1c9 (diff)
perf script: Allow computing 'perf stat' style metrics
Add support for computing 'perf stat' style metrics in 'perf script'. When using leader sampling we can get metrics for each sampling period by computing formulas over the values of the different group members. This allows things like fine grained IPC tracking through sampling, much more fine grained than with 'perf stat'. The metric is still averaged over the sampling period, it is not just for the sampling point. This patch adds a new metric output field for 'perf script' that uses the existing 'perf stat' metrics infrastructure to compute any metrics supported by 'perf stat'. For example to sample IPC: $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1 $ perf script -F metric,ip,sym,time,cpu,comm ... alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: metric: 0.13 insn per cycle swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: metric: 0.23 insn per cycle qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: metric: 0.46 insn per cycle :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: metric: 0.45 insn per cycle TopDown: This requires disabling SMT if you have it enabled, because SMT would require sampling per core, which is not supported. $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\ topdown-recovery-bubbles,\ topdown-slots-retired,topdown-total-slots,\ topdown-slots-issued}:S' -a sleep 1 $ perf script --header -I -F cpu,ip,sym,event,metric,period ... [000] 121108 ref-cycles: ffffffff8165222e copy_user_enhanced_fast_string [000] 190350 topdown-fetch-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 2055 topdown-recovery-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 148729 topdown-slots-retired: ffffffff8165222e copy_user_enhanced_fast_string [000] 144324 topdown-total-slots: ffffffff8165222e copy_user_enhanced_fast_string [000] 160852 topdown-slots-issued: ffffffff8165222e copy_user_enhanced_fast_string [000] metric: 33.0% frontend bound [000] metric: 3.5% bad speculation [000] metric: 25.8% retiring [000] metric: 37.7% backend bound [000] 112112 ref-cycles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 357222 topdown-fetch-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 3325 topdown-recovery-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 323553 topdown-slots-retired: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 270507 topdown-total-slots: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 341226 topdown-slots-issued: ffffffff8165aec8 _raw_spin_lock_irqsave [000] metric: 33.0% frontend bound [000] metric: 2.9% bad speculation [000] metric: 29.9% retiring [000] metric: 34.2% backend bound ... v2: Use evsel->priv for new fields Port to new base line, support fp output. Handle stats in ->stats, not ->priv Minor cleanups Extra explanation about the use of the term 'averaging', from Andi in the thread in the Link: tag below: <quote Andi> The current samples contains the sum of event counts for a sampling period. EventA-1 EventA-2 EventA-3 EventA-4 EventB-1 EventB-2 EventC-3 gap with no events overflow |-----------------------------------------------------------------| period-start period-end ^ ^ | | previous sample current sample So EventA = 4 and EventB = 3 at the sample point I generate a metric, let's say EventA / EventB. It applies to the whole period. But the metric is over a longer time which does not have the same behavior. For example the gap above doesn't have any events, while they are clustered at the beginning and end of the sample period. But we're summing everything together. The metric doesn't know that the gap is different than the busy period. That's what I'm trying to express with averaging. </quote> Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r--tools/perf/builtin-script.c97
1 files changed, 95 insertions, 2 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ee7c7aaaae72..39d8b55f0db3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
22#include "util/cpumap.h" 22#include "util/cpumap.h"
23#include "util/thread_map.h" 23#include "util/thread_map.h"
24#include "util/stat.h" 24#include "util/stat.h"
25#include "util/color.h"
25#include "util/string2.h" 26#include "util/string2.h"
26#include "util/thread-stack.h" 27#include "util/thread-stack.h"
27#include "util/time-utils.h" 28#include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
90 PERF_OUTPUT_SYNTH = 1U << 25, 91 PERF_OUTPUT_SYNTH = 1U << 25,
91 PERF_OUTPUT_PHYS_ADDR = 1U << 26, 92 PERF_OUTPUT_PHYS_ADDR = 1U << 26,
92 PERF_OUTPUT_UREGS = 1U << 27, 93 PERF_OUTPUT_UREGS = 1U << 27,
94 PERF_OUTPUT_METRIC = 1U << 28,
93}; 95};
94 96
95struct output_option { 97struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
124 {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, 126 {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
125 {.str = "synth", .field = PERF_OUTPUT_SYNTH}, 127 {.str = "synth", .field = PERF_OUTPUT_SYNTH},
126 {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, 128 {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
129 {.str = "metric", .field = PERF_OUTPUT_METRIC},
127}; 130};
128 131
129enum { 132enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
215 char *filename; 218 char *filename;
216 FILE *fp; 219 FILE *fp;
217 u64 samples; 220 u64 samples;
221 /* For metric output */
222 u64 val;
223 int gnum;
218}; 224};
219 225
226static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
227{
228 return (struct perf_evsel_script *)evsel->priv;
229}
230
220static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, 231static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
221 struct perf_data *data) 232 struct perf_data *data)
222{ 233{
223 struct perf_evsel_script *es = malloc(sizeof(*es)); 234 struct perf_evsel_script *es = zalloc(sizeof(*es));
224 235
225 if (es != NULL) { 236 if (es != NULL) {
226 if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) 237 if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel
228 es->fp = fopen(es->filename, "w"); 239 es->fp = fopen(es->filename, "w");
229 if (es->fp == NULL) 240 if (es->fp == NULL)
230 goto out_free_filename; 241 goto out_free_filename;
231 es->samples = 0;
232 } 242 }
233 243
234 return es; 244 return es;
@@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
1472 return fprintf(fp, "%-*s", maxlen, out); 1482 return fprintf(fp, "%-*s", maxlen, out);
1473} 1483}
1474 1484
1485struct metric_ctx {
1486 struct perf_sample *sample;
1487 struct thread *thread;
1488 struct perf_evsel *evsel;
1489 FILE *fp;
1490};
1491
1492static void script_print_metric(void *ctx, const char *color,
1493 const char *fmt,
1494 const char *unit, double val)
1495{
1496 struct metric_ctx *mctx = ctx;
1497
1498 if (!fmt)
1499 return;
1500 perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
1501 mctx->fp);
1502 fputs("\tmetric: ", mctx->fp);
1503 if (color)
1504 color_fprintf(mctx->fp, color, fmt, val);
1505 else
1506 printf(fmt, val);
1507 fprintf(mctx->fp, " %s\n", unit);
1508}
1509
1510static void script_new_line(void *ctx)
1511{
1512 struct metric_ctx *mctx = ctx;
1513
1514 perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
1515 mctx->fp);
1516 fputs("\tmetric: ", mctx->fp);
1517}
1518
1519static void perf_sample__fprint_metric(struct perf_script *script,
1520 struct thread *thread,
1521 struct perf_evsel *evsel,
1522 struct perf_sample *sample,
1523 FILE *fp)
1524{
1525 struct perf_stat_output_ctx ctx = {
1526 .print_metric = script_print_metric,
1527 .new_line = script_new_line,
1528 .ctx = &(struct metric_ctx) {
1529 .sample = sample,
1530 .thread = thread,
1531 .evsel = evsel,
1532 .fp = fp,
1533 },
1534 .force_header = false,
1535 };
1536 struct perf_evsel *ev2;
1537 static bool init;
1538 u64 val;
1539
1540 if (!init) {
1541 perf_stat__init_shadow_stats();
1542 init = true;
1543 }
1544 if (!evsel->stats)
1545 perf_evlist__alloc_stats(script->session->evlist, false);
1546 if (evsel_script(evsel->leader)->gnum++ == 0)
1547 perf_stat__reset_shadow_stats();
1548 val = sample->period * evsel->scale;
1549 perf_stat__update_shadow_stats(evsel,
1550 val,
1551 sample->cpu);
1552 evsel_script(evsel)->val = val;
1553 if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
1554 for_each_group_member (ev2, evsel->leader) {
1555 perf_stat__print_shadow_stats(ev2,
1556 evsel_script(ev2)->val,
1557 sample->cpu,
1558 &ctx,
1559 NULL);
1560 }
1561 evsel_script(evsel->leader)->gnum = 0;
1562 }
1563}
1564
1475static void process_event(struct perf_script *script, 1565static void process_event(struct perf_script *script,
1476 struct perf_sample *sample, struct perf_evsel *evsel, 1566 struct perf_sample *sample, struct perf_evsel *evsel,
1477 struct addr_location *al, 1567 struct addr_location *al,
@@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script,
1559 if (PRINT_FIELD(PHYS_ADDR)) 1649 if (PRINT_FIELD(PHYS_ADDR))
1560 fprintf(fp, "%16" PRIx64, sample->phys_addr); 1650 fprintf(fp, "%16" PRIx64, sample->phys_addr);
1561 fprintf(fp, "\n"); 1651 fprintf(fp, "\n");
1652
1653 if (PRINT_FIELD(METRIC))
1654 perf_sample__fprint_metric(script, thread, evsel, sample, fp);
1562} 1655}
1563 1656
1564static struct scripting_ops *scripting_ops; 1657static struct scripting_ops *scripting_ops;