diff options
author | Jiri Olsa <jolsa@kernel.org> | 2016-06-03 09:40:28 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-10-19 12:18:31 -0400 |
commit | 1e181b92a2da30ba1f80c61a41cfb9ef02f43b79 (patch) | |
tree | c76824c0bd6b2ebd03554b1accae118f5846b141 /tools/perf/builtin-c2c.c | |
parent | 51dedaa446532da821fb1160fc4865ca37a54df3 (diff) |
perf c2c report: Add 'node' sort key
It is to be displayed in the single cacheline output:
node
It displays nodes hits related to cacheline accesses.
The node filed comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
- node IDs with list of affected CPUs in following format:
Node{cpu list}
User can switch the flavor with -N option (-NN,-NNN).
It will be available in TUI to switch this with 'n' key.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-c2c.c')
-rw-r--r-- | tools/perf/builtin-c2c.c | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ffd41744886e..ca2f37479e6d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/compiler.h> | 1 | #include <linux/compiler.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/stringify.h> | 3 | #include <linux/stringify.h> |
4 | #include <asm/bug.h> | ||
4 | #include "util.h" | 5 | #include "util.h" |
5 | #include "debug.h" | 6 | #include "debug.h" |
6 | #include "builtin.h" | 7 | #include "builtin.h" |
@@ -22,6 +23,8 @@ struct c2c_hists { | |||
22 | struct c2c_hist_entry { | 23 | struct c2c_hist_entry { |
23 | struct c2c_hists *hists; | 24 | struct c2c_hists *hists; |
24 | struct c2c_stats stats; | 25 | struct c2c_stats stats; |
26 | unsigned long *cpuset; | ||
27 | struct c2c_stats *node_stats; | ||
25 | /* | 28 | /* |
26 | * must be at the end, | 29 | * must be at the end, |
27 | * because of its callchain dynamic entry | 30 | * because of its callchain dynamic entry |
@@ -32,6 +35,12 @@ struct c2c_hist_entry { | |||
32 | struct perf_c2c { | 35 | struct perf_c2c { |
33 | struct perf_tool tool; | 36 | struct perf_tool tool; |
34 | struct c2c_hists hists; | 37 | struct c2c_hists hists; |
38 | |||
39 | unsigned long **nodes; | ||
40 | int nodes_cnt; | ||
41 | int cpus_cnt; | ||
42 | int *cpu2node; | ||
43 | int node_info; | ||
35 | }; | 44 | }; |
36 | 45 | ||
37 | static struct perf_c2c c2c; | 46 | static struct perf_c2c c2c; |
@@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size) | |||
44 | if (!c2c_he) | 53 | if (!c2c_he) |
45 | return NULL; | 54 | return NULL; |
46 | 55 | ||
56 | c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt); | ||
57 | if (!c2c_he->cpuset) | ||
58 | return NULL; | ||
59 | |||
60 | c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); | ||
61 | if (!c2c_he->node_stats) | ||
62 | return NULL; | ||
63 | |||
47 | return &c2c_he->he; | 64 | return &c2c_he->he; |
48 | } | 65 | } |
49 | 66 | ||
@@ -57,6 +74,8 @@ static void c2c_he_free(void *he) | |||
57 | free(c2c_he->hists); | 74 | free(c2c_he->hists); |
58 | } | 75 | } |
59 | 76 | ||
77 | free(c2c_he->cpuset); | ||
78 | free(c2c_he->node_stats); | ||
60 | free(c2c_he); | 79 | free(c2c_he); |
61 | } | 80 | } |
62 | 81 | ||
@@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he, | |||
93 | return hists; | 112 | return hists; |
94 | } | 113 | } |
95 | 114 | ||
115 | static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, | ||
116 | struct perf_sample *sample) | ||
117 | { | ||
118 | if (WARN_ONCE(sample->cpu == (unsigned int) -1, | ||
119 | "WARNING: no sample cpu value")) | ||
120 | return; | ||
121 | |||
122 | set_bit(sample->cpu, c2c_he->cpuset); | ||
123 | } | ||
124 | |||
96 | static int process_sample_event(struct perf_tool *tool __maybe_unused, | 125 | static int process_sample_event(struct perf_tool *tool __maybe_unused, |
97 | union perf_event *event, | 126 | union perf_event *event, |
98 | struct perf_sample *sample, | 127 | struct perf_sample *sample, |
@@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, | |||
133 | c2c_add_stats(&c2c_he->stats, &stats); | 162 | c2c_add_stats(&c2c_he->stats, &stats); |
134 | c2c_add_stats(&c2c_hists->stats, &stats); | 163 | c2c_add_stats(&c2c_hists->stats, &stats); |
135 | 164 | ||
165 | c2c_he__set_cpu(c2c_he, sample); | ||
166 | |||
136 | hists__inc_nr_samples(&c2c_hists->hists, he->filtered); | 167 | hists__inc_nr_samples(&c2c_hists->hists, he->filtered); |
137 | ret = hist_entry__append_callchain(he, sample); | 168 | ret = hist_entry__append_callchain(he, sample); |
138 | 169 | ||
139 | if (!ret) { | 170 | if (!ret) { |
171 | /* | ||
172 | * There's already been warning about missing | ||
173 | * sample's cpu value. Let's account all to | ||
174 | * node 0 in this case, without any further | ||
175 | * warning. | ||
176 | * | ||
177 | * Doing node stats only for single callchain data. | ||
178 | */ | ||
179 | int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu; | ||
180 | int node = c2c.cpu2node[cpu]; | ||
181 | |||
140 | mi = mi_dup; | 182 | mi = mi_dup; |
141 | 183 | ||
142 | mi_dup = memdup(mi, sizeof(*mi)); | 184 | mi_dup = memdup(mi, sizeof(*mi)); |
@@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, | |||
156 | c2c_he = container_of(he, struct c2c_hist_entry, he); | 198 | c2c_he = container_of(he, struct c2c_hist_entry, he); |
157 | c2c_add_stats(&c2c_he->stats, &stats); | 199 | c2c_add_stats(&c2c_he->stats, &stats); |
158 | c2c_add_stats(&c2c_hists->stats, &stats); | 200 | c2c_add_stats(&c2c_hists->stats, &stats); |
201 | c2c_add_stats(&c2c_he->node_stats[node], &stats); | ||
202 | |||
203 | c2c_he__set_cpu(c2c_he, sample); | ||
159 | 204 | ||
160 | hists__inc_nr_samples(&c2c_hists->hists, he->filtered); | 205 | hists__inc_nr_samples(&c2c_hists->hists, he->filtered); |
161 | ret = hist_entry__append_callchain(he, sample); | 206 | ret = hist_entry__append_callchain(he, sample); |
@@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, | |||
826 | return left->thread->pid_ - right->thread->pid_; | 871 | return left->thread->pid_ - right->thread->pid_; |
827 | } | 872 | } |
828 | 873 | ||
874 | static int64_t | ||
875 | empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, | ||
876 | struct hist_entry *left __maybe_unused, | ||
877 | struct hist_entry *right __maybe_unused) | ||
878 | { | ||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | static int | ||
883 | node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, | ||
884 | struct hist_entry *he) | ||
885 | { | ||
886 | struct c2c_hist_entry *c2c_he; | ||
887 | bool first = true; | ||
888 | int node; | ||
889 | int ret = 0; | ||
890 | |||
891 | c2c_he = container_of(he, struct c2c_hist_entry, he); | ||
892 | |||
893 | for (node = 0; node < c2c.nodes_cnt; node++) { | ||
894 | DECLARE_BITMAP(set, c2c.cpus_cnt); | ||
895 | |||
896 | bitmap_zero(set, c2c.cpus_cnt); | ||
897 | bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt); | ||
898 | |||
899 | if (!bitmap_weight(set, c2c.cpus_cnt)) { | ||
900 | if (c2c.node_info == 1) { | ||
901 | ret = scnprintf(hpp->buf, hpp->size, "%21s", " "); | ||
902 | advance_hpp(hpp, ret); | ||
903 | } | ||
904 | continue; | ||
905 | } | ||
906 | |||
907 | if (!first) { | ||
908 | ret = scnprintf(hpp->buf, hpp->size, " "); | ||
909 | advance_hpp(hpp, ret); | ||
910 | } | ||
911 | |||
912 | switch (c2c.node_info) { | ||
913 | case 0: | ||
914 | ret = scnprintf(hpp->buf, hpp->size, "%2d", node); | ||
915 | advance_hpp(hpp, ret); | ||
916 | break; | ||
917 | case 1: | ||
918 | { | ||
919 | int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt); | ||
920 | struct c2c_stats *stats = &c2c_he->node_stats[node]; | ||
921 | |||
922 | ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); | ||
923 | advance_hpp(hpp, ret); | ||
924 | |||
925 | |||
926 | if (c2c_he->stats.rmt_hitm > 0) { | ||
927 | ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", | ||
928 | percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm)); | ||
929 | } else { | ||
930 | ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); | ||
931 | } | ||
932 | |||
933 | advance_hpp(hpp, ret); | ||
934 | |||
935 | if (c2c_he->stats.store > 0) { | ||
936 | ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}", | ||
937 | percent(stats->store, c2c_he->stats.store)); | ||
938 | } else { | ||
939 | ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a"); | ||
940 | } | ||
941 | |||
942 | advance_hpp(hpp, ret); | ||
943 | break; | ||
944 | } | ||
945 | case 2: | ||
946 | ret = scnprintf(hpp->buf, hpp->size, "%2d{", node); | ||
947 | advance_hpp(hpp, ret); | ||
948 | |||
949 | ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size); | ||
950 | advance_hpp(hpp, ret); | ||
951 | |||
952 | ret = scnprintf(hpp->buf, hpp->size, "}"); | ||
953 | advance_hpp(hpp, ret); | ||
954 | break; | ||
955 | default: | ||
956 | break; | ||
957 | } | ||
958 | |||
959 | first = false; | ||
960 | } | ||
961 | |||
962 | return 0; | ||
963 | } | ||
964 | |||
829 | #define HEADER_LOW(__h) \ | 965 | #define HEADER_LOW(__h) \ |
830 | { \ | 966 | { \ |
831 | .line[1] = { \ | 967 | .line[1] = { \ |
@@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = { | |||
1115 | .se = &sort_dso, | 1251 | .se = &sort_dso, |
1116 | }; | 1252 | }; |
1117 | 1253 | ||
1254 | static struct c2c_header header_node[3] = { | ||
1255 | HEADER_LOW("Node"), | ||
1256 | HEADER_LOW("Node{cpus %hitms %stores}"), | ||
1257 | HEADER_LOW("Node{cpu list}"), | ||
1258 | }; | ||
1259 | |||
1260 | static struct c2c_dimension dim_node = { | ||
1261 | .name = "node", | ||
1262 | .cmp = empty_cmp, | ||
1263 | .entry = node_entry, | ||
1264 | .width = 4, | ||
1265 | }; | ||
1266 | |||
1118 | static struct c2c_dimension *dimensions[] = { | 1267 | static struct c2c_dimension *dimensions[] = { |
1119 | &dim_dcacheline, | 1268 | &dim_dcacheline, |
1120 | &dim_offset, | 1269 | &dim_offset, |
@@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = { | |||
1148 | &dim_tid, | 1297 | &dim_tid, |
1149 | &dim_symbol, | 1298 | &dim_symbol, |
1150 | &dim_dso, | 1299 | &dim_dso, |
1300 | &dim_node, | ||
1151 | NULL, | 1301 | NULL, |
1152 | }; | 1302 | }; |
1153 | 1303 | ||
@@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he) | |||
1374 | return 0; | 1524 | return 0; |
1375 | } | 1525 | } |
1376 | 1526 | ||
1527 | static void setup_nodes_header(void) | ||
1528 | { | ||
1529 | dim_node.header = header_node[c2c.node_info]; | ||
1530 | } | ||
1531 | |||
1532 | static int setup_nodes(struct perf_session *session) | ||
1533 | { | ||
1534 | struct numa_node *n; | ||
1535 | unsigned long **nodes; | ||
1536 | int node, cpu; | ||
1537 | int *cpu2node; | ||
1538 | |||
1539 | if (c2c.node_info > 2) | ||
1540 | c2c.node_info = 2; | ||
1541 | |||
1542 | c2c.nodes_cnt = session->header.env.nr_numa_nodes; | ||
1543 | c2c.cpus_cnt = session->header.env.nr_cpus_online; | ||
1544 | |||
1545 | n = session->header.env.numa_nodes; | ||
1546 | if (!n) | ||
1547 | return -EINVAL; | ||
1548 | |||
1549 | nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt); | ||
1550 | if (!nodes) | ||
1551 | return -ENOMEM; | ||
1552 | |||
1553 | c2c.nodes = nodes; | ||
1554 | |||
1555 | cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt); | ||
1556 | if (!cpu2node) | ||
1557 | return -ENOMEM; | ||
1558 | |||
1559 | for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) | ||
1560 | cpu2node[cpu] = -1; | ||
1561 | |||
1562 | c2c.cpu2node = cpu2node; | ||
1563 | |||
1564 | for (node = 0; node < c2c.nodes_cnt; node++) { | ||
1565 | struct cpu_map *map = n[node].map; | ||
1566 | unsigned long *set; | ||
1567 | |||
1568 | set = bitmap_alloc(c2c.cpus_cnt); | ||
1569 | if (!set) | ||
1570 | return -ENOMEM; | ||
1571 | |||
1572 | for (cpu = 0; cpu < map->nr; cpu++) { | ||
1573 | set_bit(map->map[cpu], set); | ||
1574 | |||
1575 | if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) | ||
1576 | return -EINVAL; | ||
1577 | |||
1578 | cpu2node[map->map[cpu]] = node; | ||
1579 | } | ||
1580 | |||
1581 | nodes[node] = set; | ||
1582 | } | ||
1583 | |||
1584 | setup_nodes_header(); | ||
1585 | return 0; | ||
1586 | } | ||
1587 | |||
1588 | |||
1377 | static int perf_c2c__report(int argc, const char **argv) | 1589 | static int perf_c2c__report(int argc, const char **argv) |
1378 | { | 1590 | { |
1379 | struct perf_session *session; | 1591 | struct perf_session *session; |
@@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv) | |||
1388 | "be more verbose (show counter open errors, etc)"), | 1600 | "be more verbose (show counter open errors, etc)"), |
1389 | OPT_STRING('i', "input", &input_name, "file", | 1601 | OPT_STRING('i', "input", &input_name, "file", |
1390 | "the input file to process"), | 1602 | "the input file to process"), |
1603 | OPT_INCR('N', "node-info", &c2c.node_info, | ||
1604 | "show extra node info in report (repeat for more info)"), | ||
1391 | OPT_END() | 1605 | OPT_END() |
1392 | }; | 1606 | }; |
1393 | int err = 0; | 1607 | int err = 0; |
@@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv) | |||
1413 | pr_debug("No memory for session\n"); | 1627 | pr_debug("No memory for session\n"); |
1414 | goto out; | 1628 | goto out; |
1415 | } | 1629 | } |
1630 | err = setup_nodes(session); | ||
1631 | if (err) { | ||
1632 | pr_err("Failed setup nodes\n"); | ||
1633 | goto out; | ||
1634 | } | ||
1416 | 1635 | ||
1417 | if (symbol__init(&session->header.env) < 0) | 1636 | if (symbol__init(&session->header.env) < 0) |
1418 | goto out_session; | 1637 | goto out_session; |