aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-c2c.c
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2016-06-03 09:40:28 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-10-19 12:18:31 -0400
commit1e181b92a2da30ba1f80c61a41cfb9ef02f43b79 (patch)
treec76824c0bd6b2ebd03554b1accae118f5846b141 /tools/perf/builtin-c2c.c
parent51dedaa446532da821fb1160fc4865ca37a54df3 (diff)
perf c2c report: Add 'node' sort key
It is to be displayed in the single cacheline output: node It displays nodes hits related to cacheline accesses. The node filed comes in 3 flavors: - node IDs separated by ',' - node IDs with stats for each ID, in following format: Node{cpus %hitms %stores} - node IDs with list of affected CPUs in following format: Node{cpu list} User can switch the flavor with -N option (-NN,-NNN). It will be available in TUI to switch this with 'n' key. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Joe Mario <jmario@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-c2c.c')
-rw-r--r--tools/perf/builtin-c2c.c219
1 files changed, 219 insertions, 0 deletions
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ffd41744886e..ca2f37479e6d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1,6 +1,7 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <asm/bug.h>
4#include "util.h" 5#include "util.h"
5#include "debug.h" 6#include "debug.h"
6#include "builtin.h" 7#include "builtin.h"
@@ -22,6 +23,8 @@ struct c2c_hists {
22struct c2c_hist_entry { 23struct c2c_hist_entry {
23 struct c2c_hists *hists; 24 struct c2c_hists *hists;
24 struct c2c_stats stats; 25 struct c2c_stats stats;
26 unsigned long *cpuset;
27 struct c2c_stats *node_stats;
25 /* 28 /*
26 * must be at the end, 29 * must be at the end,
27 * because of its callchain dynamic entry 30 * because of its callchain dynamic entry
@@ -32,6 +35,12 @@ struct c2c_hist_entry {
32struct perf_c2c { 35struct perf_c2c {
33 struct perf_tool tool; 36 struct perf_tool tool;
34 struct c2c_hists hists; 37 struct c2c_hists hists;
38
39 unsigned long **nodes;
40 int nodes_cnt;
41 int cpus_cnt;
42 int *cpu2node;
43 int node_info;
35}; 44};
36 45
37static struct perf_c2c c2c; 46static struct perf_c2c c2c;
@@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size)
44 if (!c2c_he) 53 if (!c2c_he)
45 return NULL; 54 return NULL;
46 55
56 c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
57 if (!c2c_he->cpuset)
58 return NULL;
59
60 c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
61 if (!c2c_he->node_stats)
62 return NULL;
63
47 return &c2c_he->he; 64 return &c2c_he->he;
48} 65}
49 66
@@ -57,6 +74,8 @@ static void c2c_he_free(void *he)
57 free(c2c_he->hists); 74 free(c2c_he->hists);
58 } 75 }
59 76
77 free(c2c_he->cpuset);
78 free(c2c_he->node_stats);
60 free(c2c_he); 79 free(c2c_he);
61} 80}
62 81
@@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he,
93 return hists; 112 return hists;
94} 113}
95 114
115static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
116 struct perf_sample *sample)
117{
118 if (WARN_ONCE(sample->cpu == (unsigned int) -1,
119 "WARNING: no sample cpu value"))
120 return;
121
122 set_bit(sample->cpu, c2c_he->cpuset);
123}
124
96static int process_sample_event(struct perf_tool *tool __maybe_unused, 125static int process_sample_event(struct perf_tool *tool __maybe_unused,
97 union perf_event *event, 126 union perf_event *event,
98 struct perf_sample *sample, 127 struct perf_sample *sample,
@@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
133 c2c_add_stats(&c2c_he->stats, &stats); 162 c2c_add_stats(&c2c_he->stats, &stats);
134 c2c_add_stats(&c2c_hists->stats, &stats); 163 c2c_add_stats(&c2c_hists->stats, &stats);
135 164
165 c2c_he__set_cpu(c2c_he, sample);
166
136 hists__inc_nr_samples(&c2c_hists->hists, he->filtered); 167 hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
137 ret = hist_entry__append_callchain(he, sample); 168 ret = hist_entry__append_callchain(he, sample);
138 169
139 if (!ret) { 170 if (!ret) {
171 /*
172 * There's already been warning about missing
173 * sample's cpu value. Let's account all to
174 * node 0 in this case, without any further
175 * warning.
176 *
177 * Doing node stats only for single callchain data.
178 */
179 int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
180 int node = c2c.cpu2node[cpu];
181
140 mi = mi_dup; 182 mi = mi_dup;
141 183
142 mi_dup = memdup(mi, sizeof(*mi)); 184 mi_dup = memdup(mi, sizeof(*mi));
@@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
156 c2c_he = container_of(he, struct c2c_hist_entry, he); 198 c2c_he = container_of(he, struct c2c_hist_entry, he);
157 c2c_add_stats(&c2c_he->stats, &stats); 199 c2c_add_stats(&c2c_he->stats, &stats);
158 c2c_add_stats(&c2c_hists->stats, &stats); 200 c2c_add_stats(&c2c_hists->stats, &stats);
201 c2c_add_stats(&c2c_he->node_stats[node], &stats);
202
203 c2c_he__set_cpu(c2c_he, sample);
159 204
160 hists__inc_nr_samples(&c2c_hists->hists, he->filtered); 205 hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
161 ret = hist_entry__append_callchain(he, sample); 206 ret = hist_entry__append_callchain(he, sample);
@@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
826 return left->thread->pid_ - right->thread->pid_; 871 return left->thread->pid_ - right->thread->pid_;
827} 872}
828 873
874static int64_t
875empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
876 struct hist_entry *left __maybe_unused,
877 struct hist_entry *right __maybe_unused)
878{
879 return 0;
880}
881
882static int
883node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
884 struct hist_entry *he)
885{
886 struct c2c_hist_entry *c2c_he;
887 bool first = true;
888 int node;
889 int ret = 0;
890
891 c2c_he = container_of(he, struct c2c_hist_entry, he);
892
893 for (node = 0; node < c2c.nodes_cnt; node++) {
894 DECLARE_BITMAP(set, c2c.cpus_cnt);
895
896 bitmap_zero(set, c2c.cpus_cnt);
897 bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
898
899 if (!bitmap_weight(set, c2c.cpus_cnt)) {
900 if (c2c.node_info == 1) {
901 ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
902 advance_hpp(hpp, ret);
903 }
904 continue;
905 }
906
907 if (!first) {
908 ret = scnprintf(hpp->buf, hpp->size, " ");
909 advance_hpp(hpp, ret);
910 }
911
912 switch (c2c.node_info) {
913 case 0:
914 ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
915 advance_hpp(hpp, ret);
916 break;
917 case 1:
918 {
919 int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
920 struct c2c_stats *stats = &c2c_he->node_stats[node];
921
922 ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
923 advance_hpp(hpp, ret);
924
925
926 if (c2c_he->stats.rmt_hitm > 0) {
927 ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
928 percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
929 } else {
930 ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
931 }
932
933 advance_hpp(hpp, ret);
934
935 if (c2c_he->stats.store > 0) {
936 ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
937 percent(stats->store, c2c_he->stats.store));
938 } else {
939 ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
940 }
941
942 advance_hpp(hpp, ret);
943 break;
944 }
945 case 2:
946 ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
947 advance_hpp(hpp, ret);
948
949 ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
950 advance_hpp(hpp, ret);
951
952 ret = scnprintf(hpp->buf, hpp->size, "}");
953 advance_hpp(hpp, ret);
954 break;
955 default:
956 break;
957 }
958
959 first = false;
960 }
961
962 return 0;
963}
964
829#define HEADER_LOW(__h) \ 965#define HEADER_LOW(__h) \
830 { \ 966 { \
831 .line[1] = { \ 967 .line[1] = { \
@@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = {
1115 .se = &sort_dso, 1251 .se = &sort_dso,
1116}; 1252};
1117 1253
1254static struct c2c_header header_node[3] = {
1255 HEADER_LOW("Node"),
1256 HEADER_LOW("Node{cpus %hitms %stores}"),
1257 HEADER_LOW("Node{cpu list}"),
1258};
1259
1260static struct c2c_dimension dim_node = {
1261 .name = "node",
1262 .cmp = empty_cmp,
1263 .entry = node_entry,
1264 .width = 4,
1265};
1266
1118static struct c2c_dimension *dimensions[] = { 1267static struct c2c_dimension *dimensions[] = {
1119 &dim_dcacheline, 1268 &dim_dcacheline,
1120 &dim_offset, 1269 &dim_offset,
@@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = {
1148 &dim_tid, 1297 &dim_tid,
1149 &dim_symbol, 1298 &dim_symbol,
1150 &dim_dso, 1299 &dim_dso,
1300 &dim_node,
1151 NULL, 1301 NULL,
1152}; 1302};
1153 1303
@@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he)
1374 return 0; 1524 return 0;
1375} 1525}
1376 1526
1527static void setup_nodes_header(void)
1528{
1529 dim_node.header = header_node[c2c.node_info];
1530}
1531
1532static int setup_nodes(struct perf_session *session)
1533{
1534 struct numa_node *n;
1535 unsigned long **nodes;
1536 int node, cpu;
1537 int *cpu2node;
1538
1539 if (c2c.node_info > 2)
1540 c2c.node_info = 2;
1541
1542 c2c.nodes_cnt = session->header.env.nr_numa_nodes;
1543 c2c.cpus_cnt = session->header.env.nr_cpus_online;
1544
1545 n = session->header.env.numa_nodes;
1546 if (!n)
1547 return -EINVAL;
1548
1549 nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
1550 if (!nodes)
1551 return -ENOMEM;
1552
1553 c2c.nodes = nodes;
1554
1555 cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
1556 if (!cpu2node)
1557 return -ENOMEM;
1558
1559 for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
1560 cpu2node[cpu] = -1;
1561
1562 c2c.cpu2node = cpu2node;
1563
1564 for (node = 0; node < c2c.nodes_cnt; node++) {
1565 struct cpu_map *map = n[node].map;
1566 unsigned long *set;
1567
1568 set = bitmap_alloc(c2c.cpus_cnt);
1569 if (!set)
1570 return -ENOMEM;
1571
1572 for (cpu = 0; cpu < map->nr; cpu++) {
1573 set_bit(map->map[cpu], set);
1574
1575 if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
1576 return -EINVAL;
1577
1578 cpu2node[map->map[cpu]] = node;
1579 }
1580
1581 nodes[node] = set;
1582 }
1583
1584 setup_nodes_header();
1585 return 0;
1586}
1587
1588
1377static int perf_c2c__report(int argc, const char **argv) 1589static int perf_c2c__report(int argc, const char **argv)
1378{ 1590{
1379 struct perf_session *session; 1591 struct perf_session *session;
@@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv)
1388 "be more verbose (show counter open errors, etc)"), 1600 "be more verbose (show counter open errors, etc)"),
1389 OPT_STRING('i', "input", &input_name, "file", 1601 OPT_STRING('i', "input", &input_name, "file",
1390 "the input file to process"), 1602 "the input file to process"),
1603 OPT_INCR('N', "node-info", &c2c.node_info,
1604 "show extra node info in report (repeat for more info)"),
1391 OPT_END() 1605 OPT_END()
1392 }; 1606 };
1393 int err = 0; 1607 int err = 0;
@@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv)
1413 pr_debug("No memory for session\n"); 1627 pr_debug("No memory for session\n");
1414 goto out; 1628 goto out;
1415 } 1629 }
1630 err = setup_nodes(session);
1631 if (err) {
1632 pr_err("Failed setup nodes\n");
1633 goto out;
1634 }
1416 1635
1417 if (symbol__init(&session->header.env) < 0) 1636 if (symbol__init(&session->header.env) < 0)
1418 goto out_session; 1637 goto out_session;