perf script: Allow computing 'perf stat' style metrics

Add support for computing 'perf stat' style metrics in 'perf script'. When using leader sampling we can get metrics for each sampling period by computing formulas over the values of the different group members. This allows things like fine grained IPC tracking through sampling, much more fine grained than with 'perf stat'. The metric is still averaged over the sampling period, it is not just for the sampling point. This patch adds a new metric output field for 'perf script' that uses the existing 'perf stat' metrics infrastructure to compute any metrics supported by 'perf stat'. For example to sample IPC: $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1 $ perf script -F metric,ip,sym,time,cpu,comm ... alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: metric: 0.13 insn per cycle swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: metric: 0.23 insn per cycle qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: metric: 0.46 insn per cycle :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: metric: 0.45 insn per cycle TopDown: This requires disabling SMT if you have it enabled, because SMT would require sampling per core, which is not supported. $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\ topdown-recovery-bubbles,\ topdown-slots-retired,topdown-total-slots,\ topdown-slots-issued}:S' -a sleep 1 $ perf script --header -I -F cpu,ip,sym,event,metric,period ... [000] 121108 ref-cycles: ffffffff8165222e copy_user_enhanced_fast_string [000] 190350 topdown-fetch-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 2055 topdown-recovery-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 148729 topdown-slots-retired: ffffffff8165222e copy_user_enhanced_fast_string [000] 144324 topdown-total-slots: ffffffff8165222e copy_user_enhanced_fast_string [000] 160852 topdown-slots-issued: ffffffff8165222e copy_user_enhanced_fast_string [000] metric: 33.0% frontend bound [000] metric: 3.5% bad speculation [000] metric: 25.8% retiring [000] metric: 37.7% backend bound [000] 112112 ref-cycles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 357222 topdown-fetch-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 3325 topdown-recovery-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 323553 topdown-slots-retired: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 270507 topdown-total-slots: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 341226 topdown-slots-issued: ffffffff8165aec8 _raw_spin_lock_irqsave [000] metric: 33.0% frontend bound [000] metric: 2.9% bad speculation [000] metric: 29.9% retiring [000] metric: 34.2% backend bound ... v2: Use evsel->priv for new fields Port to new base line, support fp output. Handle stats in ->stats, not ->priv Minor cleanups Extra explanation about the use of the term 'averaging', from Andi in the thread in the Link: tag below: <quote Andi> The current samples contains the sum of event counts for a sampling period. EventA-1 EventA-2 EventA-3 EventA-4 EventB-1 EventB-2 EventC-3 gap with no events overflow |-----------------------------------------------------------------| period-start period-end ^ ^ | | previous sample current sample So EventA = 4 and EventB = 3 at the sample point I generate a metric, let's say EventA / EventB. It applies to the whole period. But the metric is over a longer time which does not have the same behavior. For example the gap above doesn't have any events, while they are clustered at the beginning and end of the sample period. But we're summing everything together. The metric doesn't know that the gap is different than the busy period. That's what I'm trying to express with averaging. </quote> Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
author: Andi Kleen <ak@linux.intel.com> 2017-11-17 16:43:00 -0500
committer: Arnaldo Carvalho de Melo <acme@redhat.com> 2017-11-29 16:18:01 -0500
commit: 4bd1bef8bba2f99ff472ae3617864dda301f81bd (patch)
tree: ab5ca9f0d5c4da82f9ef15915aa202f8419a4e43 /tools/perf/builtin-script.c
parent: 373565d285e8d2113f1b6c0a2e461b9c8d0da1c9 (diff)
1 files changed, 95 insertions, 2 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ee7c7aaaae72..39d8b55f0db3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/stat.h"
+#include "util/color.h"
 #include "util/string2.h"
 #include "util/thread-stack.h"
 #include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
        PERF_OUTPUT_SYNTH           = 1U << 25,
        PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
        PERF_OUTPUT_UREGS           = 1U << 27,
+        PERF_OUTPUT_METRIC          = 1U << 28,
 };
 struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
        {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
        {.str = "synth", .field = PERF_OUTPUT_SYNTH},
        {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+        {.str = "metric", .field = PERF_OUTPUT_METRIC},
 };
 enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
       char *filename;
       FILE *fp;
       u64  samples;
+       /* For metric output */
+       u64  val;
+       int  gnum;
 };
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+        return (struct perf_evsel_script *)evsel->priv;
+}
 static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
                                                        struct perf_data *data)
 {
-        struct perf_evsel_script *es = malloc(sizeof(*es));
+        struct perf_evsel_script *es = zalloc(sizeof(*es));
        if (es != NULL) {
                if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel
                es->fp = fopen(es->filename, "w");
                if (es->fp == NULL)
                        goto out_free_filename;
-                es->samples = 0;
        }
        return es;
@@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
        return fprintf(fp, "%-*s", maxlen, out);
 }
+struct metric_ctx {
+        struct perf_sample      *sample;
+        struct thread           *thread;
+        struct perf_evsel       *evsel;
+        FILE                    *fp;
+};
+static void script_print_metric(void *ctx, const char *color,
+                                const char *fmt,
+                                const char *unit, double val)
+{
+        struct metric_ctx *mctx = ctx;
+        if (!fmt)
+                return;
+        perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+                                   mctx->fp);
+        fputs("\tmetric: ", mctx->fp);
+        if (color)
+                color_fprintf(mctx->fp, color, fmt, val);
+        else
+                printf(fmt, val);
+        fprintf(mctx->fp, " %s\n", unit);
+}
+static void script_new_line(void *ctx)
+{
+        struct metric_ctx *mctx = ctx;
+        perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+                                   mctx->fp);
+        fputs("\tmetric: ", mctx->fp);
+}
+static void perf_sample__fprint_metric(struct perf_script *script,
+                                       struct thread *thread,
+                                       struct perf_evsel *evsel,
+                                       struct perf_sample *sample,
+                                       FILE *fp)
+{
+        struct perf_stat_output_ctx ctx = {
+                .print_metric = script_print_metric,
+                .new_line = script_new_line,
+                .ctx = &(struct metric_ctx) {
+                                .sample = sample,
+                                .thread = thread,
+                                .evsel  = evsel,
+                                .fp     = fp,
+                         },
+                .force_header = false,
+        };
+        struct perf_evsel *ev2;
+        static bool init;
+        u64 val;
+        if (!init) {
+                perf_stat__init_shadow_stats();
+                init = true;
+        }
+        if (!evsel->stats)
+                perf_evlist__alloc_stats(script->session->evlist, false);
+        if (evsel_script(evsel->leader)->gnum++ == 0)
+                perf_stat__reset_shadow_stats();
+        val = sample->period * evsel->scale;
+        perf_stat__update_shadow_stats(evsel,
+                                       val,
+                                       sample->cpu);
+        evsel_script(evsel)->val = val;
+        if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+                for_each_group_member (ev2, evsel->leader) {
+                        perf_stat__print_shadow_stats(ev2,
+                                                      evsel_script(ev2)->val,
+                                                      sample->cpu,
+                                                      &ctx,
+                                                      NULL);
+                }
+                evsel_script(evsel->leader)->gnum = 0;
+        }
+}
 static void process_event(struct perf_script *script,
                          struct perf_sample *sample, struct perf_evsel *evsel,
                          struct addr_location *al,
@@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script,
        if (PRINT_FIELD(PHYS_ADDR))
                fprintf(fp, "%16" PRIx64, sample->phys_addr);
        fprintf(fp, "\n");
+        if (PRINT_FIELD(METRIC))
+                perf_sample__fprint_metric(script, thread, evsel, sample, fp);
 }
 static struct scripting_ops     *scripting_ops;
author	Andi Kleen <ak@linux.intel.com>	2017-11-17 16:43:00 -0500
committer	Arnaldo Carvalho de Melo <acme@redhat.com>	2017-11-29 16:18:01 -0500
commit	4bd1bef8bba2f99ff472ae3617864dda301f81bd (patch)
tree	ab5ca9f0d5c4da82f9ef15915aa202f8419a4e43 /tools/perf/builtin-script.c
parent	373565d285e8d2113f1b6c0a2e461b9c8d0da1c9 (diff)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ee7c7aaaae72..39d8b55f0db3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
22	#include "util/cpumap.h"	22	#include "util/cpumap.h"
23	#include "util/thread_map.h"	23	#include "util/thread_map.h"
24	#include "util/stat.h"	24	#include "util/stat.h"
		25	#include "util/color.h"
25	#include "util/string2.h"	26	#include "util/string2.h"
26	#include "util/thread-stack.h"	27	#include "util/thread-stack.h"
27	#include "util/time-utils.h"	28	#include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
90	PERF_OUTPUT_SYNTH = 1U << 25,	91	PERF_OUTPUT_SYNTH = 1U << 25,
91	PERF_OUTPUT_PHYS_ADDR = 1U << 26,	92	PERF_OUTPUT_PHYS_ADDR = 1U << 26,
92	PERF_OUTPUT_UREGS = 1U << 27,	93	PERF_OUTPUT_UREGS = 1U << 27,
		94	PERF_OUTPUT_METRIC = 1U << 28,
93	};	95	};
94		96
95	struct output_option {	97	struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
124	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},	126	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
125	{.str = "synth", .field = PERF_OUTPUT_SYNTH},	127	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
126	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},	128	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
		129	{.str = "metric", .field = PERF_OUTPUT_METRIC},
127	};	130	};
128		131
129	enum {	132	enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
215	char *filename;	218	char *filename;
216	FILE *fp;	219	FILE *fp;
217	u64 samples;	220	u64 samples;
		221	/* For metric output */
		222	u64 val;
		223	int gnum;
218	};	224	};
219		225
		226	static inline struct perf_evsel_script evsel_script(struct perf_evsel evsel)
		227	{
		228	return (struct perf_evsel_script *)evsel->priv;
		229	}
		230
220	static struct perf_evsel_script perf_evsel_script__new(struct perf_evsel evsel,	231	static struct perf_evsel_script perf_evsel_script__new(struct perf_evsel evsel,
221	struct perf_data *data)	232	struct perf_data *data)
222	{	233	{
223	struct perf_evsel_script es = malloc(sizeof(es));	234	struct perf_evsel_script es = zalloc(sizeof(es));
224		235
225	if (es != NULL) {	236	if (es != NULL) {
226	if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)	237	if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script perf_evsel_script__new(struct perf_evsel evsel
228	es->fp = fopen(es->filename, "w");	239	es->fp = fopen(es->filename, "w");
229	if (es->fp == NULL)	240	if (es->fp == NULL)
230	goto out_free_filename;	241	goto out_free_filename;
231	es->samples = 0;
232	}	242	}
233		243
234	return es;	244	return es;
@@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
1472	return fprintf(fp, "%-*s", maxlen, out);	1482	return fprintf(fp, "%-*s", maxlen, out);
1473	}	1483	}
1474		1484
		1485	struct metric_ctx {
		1486	struct perf_sample *sample;
		1487	struct thread *thread;
		1488	struct perf_evsel *evsel;
		1489	FILE *fp;
		1490	};
		1491
		1492	static void script_print_metric(void ctx, const char color,
		1493	const char *fmt,
		1494	const char *unit, double val)
		1495	{
		1496	struct metric_ctx *mctx = ctx;
		1497
		1498	if (!fmt)
		1499	return;
		1500	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
		1501	mctx->fp);
		1502	fputs("\tmetric: ", mctx->fp);
		1503	if (color)
		1504	color_fprintf(mctx->fp, color, fmt, val);
		1505	else
		1506	printf(fmt, val);
		1507	fprintf(mctx->fp, " %s\n", unit);
		1508	}
		1509
		1510	static void script_new_line(void *ctx)
		1511	{
		1512	struct metric_ctx *mctx = ctx;
		1513
		1514	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
		1515	mctx->fp);
		1516	fputs("\tmetric: ", mctx->fp);
		1517	}
		1518
		1519	static void perf_sample__fprint_metric(struct perf_script *script,
		1520	struct thread *thread,
		1521	struct perf_evsel *evsel,
		1522	struct perf_sample *sample,
		1523	FILE *fp)
		1524	{
		1525	struct perf_stat_output_ctx ctx = {
		1526	.print_metric = script_print_metric,
		1527	.new_line = script_new_line,
		1528	.ctx = &(struct metric_ctx) {
		1529	.sample = sample,
		1530	.thread = thread,
		1531	.evsel = evsel,
		1532	.fp = fp,
		1533	},
		1534	.force_header = false,
		1535	};
		1536	struct perf_evsel *ev2;
		1537	static bool init;
		1538	u64 val;
		1539
		1540	if (!init) {
		1541	perf_stat__init_shadow_stats();
		1542	init = true;
		1543	}
		1544	if (!evsel->stats)
		1545	perf_evlist__alloc_stats(script->session->evlist, false);
		1546	if (evsel_script(evsel->leader)->gnum++ == 0)
		1547	perf_stat__reset_shadow_stats();
		1548	val = sample->period * evsel->scale;
		1549	perf_stat__update_shadow_stats(evsel,
		1550	val,
		1551	sample->cpu);
		1552	evsel_script(evsel)->val = val;
		1553	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
		1554	for_each_group_member (ev2, evsel->leader) {
		1555	perf_stat__print_shadow_stats(ev2,
		1556	evsel_script(ev2)->val,
		1557	sample->cpu,
		1558	&ctx,
		1559	NULL);
		1560	}
		1561	evsel_script(evsel->leader)->gnum = 0;
		1562	}
		1563	}
		1564
1475	static void process_event(struct perf_script *script,	1565	static void process_event(struct perf_script *script,
1476	struct perf_sample sample, struct perf_evsel evsel,	1566	struct perf_sample sample, struct perf_evsel evsel,
1477	struct addr_location *al,	1567	struct addr_location *al,
@@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script,
1559	if (PRINT_FIELD(PHYS_ADDR))	1649	if (PRINT_FIELD(PHYS_ADDR))
1560	fprintf(fp, "%16" PRIx64, sample->phys_addr);	1650	fprintf(fp, "%16" PRIx64, sample->phys_addr);
1561	fprintf(fp, "\n");	1651	fprintf(fp, "\n");
		1652
		1653	if (PRINT_FIELD(METRIC))
		1654	perf_sample__fprint_metric(script, thread, evsel, sample, fp);
1562	}	1655	}
1563		1656
1564	static struct scripting_ops *scripting_ops;	1657	static struct scripting_ops *scripting_ops;