aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-10-22 04:22:05 -0400
committerIngo Molnar <mingo@kernel.org>2016-10-22 04:22:05 -0400
commite9c848928abf4cb60601e9ae7d336f0333c98bca (patch)
tree9fa3b9926f9c0b4f93495706c357221b533b213f /tools
parent10b37cb59fa1e61fec1386f324615e0e8202cd87 (diff)
parentaf09b2d35e18f1a377aaa2bc4e5ba4abb98a1088 (diff)
Merge tag 'perf-c2c-for-mingo-20161021' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull new 'perf c2c' tool from Arnaldo Carvalho de Melo: - The 'perf c2c' tool provides means for Shared Data C2C/HITM analysis. It allows you to track down cacheline contention. The tool is based on x86's load latency and precise store facility events provided by Intel CPUs. It was tested by Joe Mario and has proven to be useful, finding some cacheline contentions. Joe also wrote a blog about c2c tool with examples: https://joemario.github.io/blog/2016/09/01/c2c-blog/ Excerpt of the content on this site: --- At a high level, “perf c2c” will show you: * The cachelines where false sharing was detected. * The readers and writers to those cachelines, and the offsets where those accesses occurred. * The pid, tid, instruction addr, function name, binary object name for those readers and writers. * The source file and line number for each reader and writer. * The average load latency for the loads to those cachelines. * Which numa nodes the samples a cacheline came from and which CPUs were involved. Using perf c2c is similar to using the Linux perf tool today. First collect data with “perf c2c record” Then generate a report output with “perf c2c report” --- There one finds extensive details on using the tool, with tips on reducing the volume of samples while still capturing enough to do its job. (Dick Fowles, Joe Mario, Don Zickus, Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/perf-c2c.txt282
-rw-r--r--tools/perf/builtin-c2c.c2754
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/ui/browsers/hists.h1
-rw-r--r--tools/perf/util/hist.c1
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/mem-events.c128
-rw-r--r--tools/perf/util/mem-events.h37
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/sort.h1
13 files changed, 3210 insertions, 2 deletions
diff --git a/tools/perf/Build b/tools/perf/Build
index a43fae7f439a..b12d5d1666e3 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -21,6 +21,7 @@ perf-y += builtin-inject.o
21perf-y += builtin-mem.o 21perf-y += builtin-mem.o
22perf-y += builtin-data.o 22perf-y += builtin-data.o
23perf-y += builtin-version.o 23perf-y += builtin-version.o
24perf-y += builtin-c2c.o
24 25
25perf-$(CONFIG_AUDIT) += builtin-trace.o 26perf-$(CONFIG_AUDIT) += builtin-trace.o
26perf-$(CONFIG_LIBELF) += builtin-probe.o 27perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
new file mode 100644
index 000000000000..21810d711f5f
--- /dev/null
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -0,0 +1,282 @@
1perf-c2c(1)
2===========
3
4NAME
5----
6perf-c2c - Shared Data C2C/HITM Analyzer.
7
8SYNOPSIS
9--------
10[verse]
11'perf c2c record' [<options>] <command>
12'perf c2c record' [<options>] -- [<record command options>] <command>
13'perf c2c report' [<options>]
14
15DESCRIPTION
16-----------
17C2C stands for Cache To Cache.
18
19The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
20you to track down the cacheline contentions.
21
22The tool is based on x86's load latency and precise store facility events
23provided by Intel CPUs. These events provide:
24 - memory address of the access
25 - type of the access (load and store details)
26 - latency (in cycles) of the load access
27
28The c2c tool provide means to record this data and report back access details
29for cachelines with highest contention - highest number of HITM accesses.
30
31The basic workflow with this tool follows the standard record/report phase.
32User uses the record command to record events data and report command to
33display it.
34
35
36RECORD OPTIONS
37--------------
38-e::
39--event=::
40 Select the PMU event. Use 'perf mem record -e list'
41 to list available events.
42
43-v::
44--verbose::
45 Be more verbose (show counter open errors, etc).
46
47-l::
48--ldlat::
49 Configure mem-loads latency.
50
51-k::
52--all-kernel::
53 Configure all used events to run in kernel space.
54
55-u::
56--all-user::
57 Configure all used events to run in user space.
58
59REPORT OPTIONS
60--------------
61-k::
62--vmlinux=<file>::
63 vmlinux pathname
64
65-v::
66--verbose::
67 Be more verbose (show counter open errors, etc).
68
69-i::
70--input::
71 Specify the input file to process.
72
73-N::
74--node-info::
75 Show extra node info in report (see NODE INFO section)
76
77-c::
78--coalesce::
79 Specify sorintg fields for single cacheline display.
80 Following fields are available: tid,pid,iaddr,dso
81 (see COALESCE)
82
83-g::
84--call-graph::
85 Setup callchains parameters.
86 Please refer to perf-report man page for details.
87
88--stdio::
89 Force the stdio output (see STDIO OUTPUT)
90
91--stats::
92 Display only statistic tables and force stdio mode.
93
94--full-symbols::
95 Display full length of symbols.
96
97--no-source::
98 Do not display Source:Line column.
99
100--show-all::
101 Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
102
103C2C RECORD
104----------
105The perf c2c record command setup options related to HITM cacheline analysis
106and calls standard perf record command.
107
108Following perf record options are configured by default:
109(check perf record man page for details)
110
111 -W,-d,--sample-cpu
112
113Unless specified otherwise with '-e' option, following events are monitored by
114default:
115
116 cpu/mem-loads,ldlat=30/P
117 cpu/mem-stores/P
118
119User can pass any 'perf record' option behind '--' mark, like (to enable
120callchains and system wide monitoring):
121
122 $ perf c2c record -- -g -a
123
124Please check RECORD OPTIONS section for specific c2c record options.
125
126C2C REPORT
127----------
128The perf c2c report command displays shared data analysis. It comes in two
129display modes: stdio and tui (default).
130
131The report command workflow is following:
132 - sort all the data based on the cacheline address
133 - store access details for each cacheline
134 - sort all cachelines based on user settings
135 - display data
136
137In general perf report output consist of 2 basic views:
138 1) most expensive cachelines list
139 2) offsets details for each cacheline
140
141For each cacheline in the 1) list we display following data:
142(Both stdio and TUI modes follow the same fields output)
143
144 Index
145 - zero based index to identify the cacheline
146
147 Cacheline
148 - cacheline address (hex number)
149
150 Total records
151 - sum of all cachelines accesses
152
153 Rmt/Lcl Hitm
154 - cacheline percentage of all Remote/Local HITM accesses
155
156 LLC Load Hitm - Total, Lcl, Rmt
157 - count of Total/Local/Remote load HITMs
158
159 Store Reference - Total, L1Hit, L1Miss
160 Total - all store accesses
161 L1Hit - store accesses that hit L1
162 L1Hit - store accesses that missed L1
163
164 Load Dram
165 - count of local and remote DRAM accesses
166
167 LLC Ld Miss
168 - count of all accesses that missed LLC
169
170 Total Loads
171 - sum of all load accesses
172
173 Core Load Hit - FB, L1, L2
174 - count of load hits in FB (Fill Buffer), L1 and L2 cache
175
176 LLC Load Hit - Llc, Rmt
177 - count of LLC and Remote load hits
178
179For each offset in the 2) list we display following data:
180
181 HITM - Rmt, Lcl
182 - % of Remote/Local HITM accesses for given offset within cacheline
183
184 Store Refs - L1 Hit, L1 Miss
185 - % of store accesses that hit/missed L1 for given offset within cacheline
186
187 Data address - Offset
188 - offset address
189
190 Pid
191 - pid of the process responsible for the accesses
192
193 Tid
194 - tid of the process responsible for the accesses
195
196 Code address
197 - code address responsible for the accesses
198
199 cycles - rmt hitm, lcl hitm, load
200 - sum of cycles for given accesses - Remote/Local HITM and generic load
201
202 cpu cnt
203 - number of cpus that participated on the access
204
205 Symbol
206 - code symbol related to the 'Code address' value
207
208 Shared Object
209 - shared object name related to the 'Code address' value
210
211 Source:Line
212 - source information related to the 'Code address' value
213
214 Node
215 - nodes participating on the access (see NODE INFO section)
216
217NODE INFO
218---------
219The 'Node' field displays nodes that accesses given cacheline
220offset. Its output comes in 3 flavors:
221 - node IDs separated by ','
222 - node IDs with stats for each ID, in following format:
223 Node{cpus %hitms %stores}
224 - node IDs with list of affected CPUs in following format:
225 Node{cpu list}
226
227User can switch between above flavors with -N option or
228use 'n' key to interactively switch in TUI mode.
229
230COALESCE
231--------
232User can specify how to sort offsets for cacheline.
233
234Following fields are available and governs the final
235output fields set for caheline offsets output:
236
237 tid - coalesced by process TIDs
238 pid - coalesced by process PIDs
239 iaddr - coalesced by code address, following fields are displayed:
240 Code address, Code symbol, Shared Object, Source line
241 dso - coalesced by shared object
242
243By default the coalescing is setup with 'pid,tid,iaddr'.
244
245STDIO OUTPUT
246------------
247The stdio output displays data on standard output.
248
249Following tables are displayed:
250 Trace Event Information
251 - overall statistics of memory accesses
252
253 Global Shared Cache Line Event Information
254 - overall statistics on shared cachelines
255
256 Shared Data Cache Line Table
257 - list of most expensive cachelines
258
259 Shared Cache Line Distribution Pareto
260 - list of all accessed offsets for each cacheline
261
262TUI OUTPUT
263----------
264The TUI output provides interactive interface to navigate
265through cachelines list and to display offset details.
266
267For details please refer to the help window by pressing '?' key.
268
269CREDITS
270-------
271Although Don Zickus, Dick Fowles and Joe Mario worked together
272to get this implemented, we got lots of early help from Arnaldo
273Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen.
274
275C2C BLOG
276--------
277Check Joe's blog on c2c tool for detailed use case explanation:
278 https://joemario.github.io/blog/2016/09/01/c2c-blog/
279
280SEE ALSO
281--------
282linkperf:perf-record[1], linkperf:perf-mem[1]
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 000000000000..c6d0dda594d9
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,2754 @@
1/*
2 * This is rewrite of original c2c tool introduced in here:
3 * http://lwn.net/Articles/588866/
4 *
5 * The original tool was changed to fit in current perf state.
6 *
7 * Original authors:
8 * Don Zickus <dzickus@redhat.com>
9 * Dick Fowles <fowles@inreach.com>
10 * Joe Mario <jmario@redhat.com>
11 */
12#include <linux/compiler.h>
13#include <linux/kernel.h>
14#include <linux/stringify.h>
15#include <asm/bug.h>
16#include "util.h"
17#include "debug.h"
18#include "builtin.h"
19#include <subcmd/parse-options.h>
20#include "mem-events.h"
21#include "session.h"
22#include "hist.h"
23#include "sort.h"
24#include "tool.h"
25#include "data.h"
26#include "sort.h"
27#include "evlist.h"
28#include "evsel.h"
29#include <asm/bug.h>
30#include "ui/browsers/hists.h"
31#include "evlist.h"
32
33struct c2c_hists {
34 struct hists hists;
35 struct perf_hpp_list list;
36 struct c2c_stats stats;
37};
38
39struct compute_stats {
40 struct stats lcl_hitm;
41 struct stats rmt_hitm;
42 struct stats load;
43};
44
45struct c2c_hist_entry {
46 struct c2c_hists *hists;
47 struct c2c_stats stats;
48 unsigned long *cpuset;
49 struct c2c_stats *node_stats;
50 unsigned int cacheline_idx;
51
52 struct compute_stats cstats;
53
54 /*
55 * must be at the end,
56 * because of its callchain dynamic entry
57 */
58 struct hist_entry he;
59};
60
61static char const *coalesce_default = "pid,tid,iaddr";
62
63struct perf_c2c {
64 struct perf_tool tool;
65 struct c2c_hists hists;
66
67 unsigned long **nodes;
68 int nodes_cnt;
69 int cpus_cnt;
70 int *cpu2node;
71 int node_info;
72
73 bool show_src;
74 bool show_all;
75 bool use_stdio;
76 bool stats_only;
77 bool symbol_full;
78
79 /* HITM shared clines stats */
80 struct c2c_stats hitm_stats;
81 int shared_clines;
82
83 int display;
84
85 const char *coalesce;
86 char *cl_sort;
87 char *cl_resort;
88 char *cl_output;
89};
90
91enum {
92 DISPLAY_LCL,
93 DISPLAY_RMT,
94};
95
96static struct perf_c2c c2c;
97
98static void *c2c_he_zalloc(size_t size)
99{
100 struct c2c_hist_entry *c2c_he;
101
102 c2c_he = zalloc(size + sizeof(*c2c_he));
103 if (!c2c_he)
104 return NULL;
105
106 c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
107 if (!c2c_he->cpuset)
108 return NULL;
109
110 c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
111 if (!c2c_he->node_stats)
112 return NULL;
113
114 init_stats(&c2c_he->cstats.lcl_hitm);
115 init_stats(&c2c_he->cstats.rmt_hitm);
116 init_stats(&c2c_he->cstats.load);
117
118 return &c2c_he->he;
119}
120
121static void c2c_he_free(void *he)
122{
123 struct c2c_hist_entry *c2c_he;
124
125 c2c_he = container_of(he, struct c2c_hist_entry, he);
126 if (c2c_he->hists) {
127 hists__delete_entries(&c2c_he->hists->hists);
128 free(c2c_he->hists);
129 }
130
131 free(c2c_he->cpuset);
132 free(c2c_he->node_stats);
133 free(c2c_he);
134}
135
136static struct hist_entry_ops c2c_entry_ops = {
137 .new = c2c_he_zalloc,
138 .free = c2c_he_free,
139};
140
141static int c2c_hists__init(struct c2c_hists *hists,
142 const char *sort,
143 int nr_header_lines);
144
145static struct c2c_hists*
146he__get_c2c_hists(struct hist_entry *he,
147 const char *sort,
148 int nr_header_lines)
149{
150 struct c2c_hist_entry *c2c_he;
151 struct c2c_hists *hists;
152 int ret;
153
154 c2c_he = container_of(he, struct c2c_hist_entry, he);
155 if (c2c_he->hists)
156 return c2c_he->hists;
157
158 hists = c2c_he->hists = zalloc(sizeof(*hists));
159 if (!hists)
160 return NULL;
161
162 ret = c2c_hists__init(hists, sort, nr_header_lines);
163 if (ret) {
164 free(hists);
165 return NULL;
166 }
167
168 return hists;
169}
170
171static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
172 struct perf_sample *sample)
173{
174 if (WARN_ONCE(sample->cpu == (unsigned int) -1,
175 "WARNING: no sample cpu value"))
176 return;
177
178 set_bit(sample->cpu, c2c_he->cpuset);
179}
180
181static void compute_stats(struct c2c_hist_entry *c2c_he,
182 struct c2c_stats *stats,
183 u64 weight)
184{
185 struct compute_stats *cstats = &c2c_he->cstats;
186
187 if (stats->rmt_hitm)
188 update_stats(&cstats->rmt_hitm, weight);
189 else if (stats->lcl_hitm)
190 update_stats(&cstats->lcl_hitm, weight);
191 else if (stats->load)
192 update_stats(&cstats->load, weight);
193}
194
195static int process_sample_event(struct perf_tool *tool __maybe_unused,
196 union perf_event *event,
197 struct perf_sample *sample,
198 struct perf_evsel *evsel __maybe_unused,
199 struct machine *machine)
200{
201 struct c2c_hists *c2c_hists = &c2c.hists;
202 struct c2c_hist_entry *c2c_he;
203 struct c2c_stats stats = { .nr_entries = 0, };
204 struct hist_entry *he;
205 struct addr_location al;
206 struct mem_info *mi, *mi_dup;
207 int ret;
208
209 if (machine__resolve(machine, &al, sample) < 0) {
210 pr_debug("problem processing %d event, skipping it.\n",
211 event->header.type);
212 return -1;
213 }
214
215 ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
216 evsel, &al, sysctl_perf_event_max_stack);
217 if (ret)
218 goto out;
219
220 mi = sample__resolve_mem(sample, &al);
221 if (mi == NULL)
222 return -ENOMEM;
223
224 mi_dup = memdup(mi, sizeof(*mi));
225 if (!mi_dup)
226 goto free_mi;
227
228 c2c_decode_stats(&stats, mi);
229
230 he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
231 &al, NULL, NULL, mi,
232 sample, true);
233 if (he == NULL)
234 goto free_mi_dup;
235
236 c2c_he = container_of(he, struct c2c_hist_entry, he);
237 c2c_add_stats(&c2c_he->stats, &stats);
238 c2c_add_stats(&c2c_hists->stats, &stats);
239
240 c2c_he__set_cpu(c2c_he, sample);
241
242 hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
243 ret = hist_entry__append_callchain(he, sample);
244
245 if (!ret) {
246 /*
247 * There's already been warning about missing
248 * sample's cpu value. Let's account all to
249 * node 0 in this case, without any further
250 * warning.
251 *
252 * Doing node stats only for single callchain data.
253 */
254 int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
255 int node = c2c.cpu2node[cpu];
256
257 mi = mi_dup;
258
259 mi_dup = memdup(mi, sizeof(*mi));
260 if (!mi_dup)
261 goto free_mi;
262
263 c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
264 if (!c2c_hists)
265 goto free_mi_dup;
266
267 he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
268 &al, NULL, NULL, mi,
269 sample, true);
270 if (he == NULL)
271 goto free_mi_dup;
272
273 c2c_he = container_of(he, struct c2c_hist_entry, he);
274 c2c_add_stats(&c2c_he->stats, &stats);
275 c2c_add_stats(&c2c_hists->stats, &stats);
276 c2c_add_stats(&c2c_he->node_stats[node], &stats);
277
278 compute_stats(c2c_he, &stats, sample->weight);
279
280 c2c_he__set_cpu(c2c_he, sample);
281
282 hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
283 ret = hist_entry__append_callchain(he, sample);
284 }
285
286out:
287 addr_location__put(&al);
288 return ret;
289
290free_mi_dup:
291 free(mi_dup);
292free_mi:
293 free(mi);
294 ret = -ENOMEM;
295 goto out;
296}
297
298static struct perf_c2c c2c = {
299 .tool = {
300 .sample = process_sample_event,
301 .mmap = perf_event__process_mmap,
302 .mmap2 = perf_event__process_mmap2,
303 .comm = perf_event__process_comm,
304 .exit = perf_event__process_exit,
305 .fork = perf_event__process_fork,
306 .lost = perf_event__process_lost,
307 .ordered_events = true,
308 .ordering_requires_timestamps = true,
309 },
310};
311
312static const char * const c2c_usage[] = {
313 "perf c2c {record|report}",
314 NULL
315};
316
317static const char * const __usage_report[] = {
318 "perf c2c report",
319 NULL
320};
321
322static const char * const *report_c2c_usage = __usage_report;
323
324#define C2C_HEADER_MAX 2
325
326struct c2c_header {
327 struct {
328 const char *text;
329 int span;
330 } line[C2C_HEADER_MAX];
331};
332
333struct c2c_dimension {
334 struct c2c_header header;
335 const char *name;
336 int width;
337 struct sort_entry *se;
338
339 int64_t (*cmp)(struct perf_hpp_fmt *fmt,
340 struct hist_entry *, struct hist_entry *);
341 int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
342 struct hist_entry *he);
343 int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
344 struct hist_entry *he);
345};
346
347struct c2c_fmt {
348 struct perf_hpp_fmt fmt;
349 struct c2c_dimension *dim;
350};
351
352#define SYMBOL_WIDTH 30
353
354static struct c2c_dimension dim_symbol;
355static struct c2c_dimension dim_srcline;
356
357static int symbol_width(struct hists *hists, struct sort_entry *se)
358{
359 int width = hists__col_len(hists, se->se_width_idx);
360
361 if (!c2c.symbol_full)
362 width = MIN(width, SYMBOL_WIDTH);
363
364 return width;
365}
366
367static int c2c_width(struct perf_hpp_fmt *fmt,
368 struct perf_hpp *hpp __maybe_unused,
369 struct hists *hists __maybe_unused)
370{
371 struct c2c_fmt *c2c_fmt;
372 struct c2c_dimension *dim;
373
374 c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
375 dim = c2c_fmt->dim;
376
377 if (dim == &dim_symbol || dim == &dim_srcline)
378 return symbol_width(hists, dim->se);
379
380 return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
381 c2c_fmt->dim->width;
382}
383
384static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
385 struct hists *hists, int line, int *span)
386{
387 struct perf_hpp_list *hpp_list = hists->hpp_list;
388 struct c2c_fmt *c2c_fmt;
389 struct c2c_dimension *dim;
390 const char *text = NULL;
391 int width = c2c_width(fmt, hpp, hists);
392
393 c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
394 dim = c2c_fmt->dim;
395
396 if (dim->se) {
397 text = dim->header.line[line].text;
398 /* Use the last line from sort_entry if not defined. */
399 if (!text && (line == hpp_list->nr_header_lines - 1))
400 text = dim->se->se_header;
401 } else {
402 text = dim->header.line[line].text;
403
404 if (*span) {
405 (*span)--;
406 return 0;
407 } else {
408 *span = dim->header.line[line].span;
409 }
410 }
411
412 if (text == NULL)
413 text = "";
414
415 return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
416}
417
418#define HEX_STR(__s, __v) \
419({ \
420 scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v); \
421 __s; \
422})
423
424static int64_t
425dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
426 struct hist_entry *left, struct hist_entry *right)
427{
428 return sort__dcacheline_cmp(left, right);
429}
430
431static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
432 struct hist_entry *he)
433{
434 uint64_t addr = 0;
435 int width = c2c_width(fmt, hpp, he->hists);
436 char buf[20];
437
438 if (he->mem_info)
439 addr = cl_address(he->mem_info->daddr.addr);
440
441 return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
442}
443
444static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
445 struct hist_entry *he)
446{
447 uint64_t addr = 0;
448 int width = c2c_width(fmt, hpp, he->hists);
449 char buf[20];
450
451 if (he->mem_info)
452 addr = cl_offset(he->mem_info->daddr.al_addr);
453
454 return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
455}
456
457static int64_t
458offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
459 struct hist_entry *left, struct hist_entry *right)
460{
461 uint64_t l = 0, r = 0;
462
463 if (left->mem_info)
464 l = cl_offset(left->mem_info->daddr.addr);
465 if (right->mem_info)
466 r = cl_offset(right->mem_info->daddr.addr);
467
468 return (int64_t)(r - l);
469}
470
471static int
472iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
473 struct hist_entry *he)
474{
475 uint64_t addr = 0;
476 int width = c2c_width(fmt, hpp, he->hists);
477 char buf[20];
478
479 if (he->mem_info)
480 addr = he->mem_info->iaddr.addr;
481
482 return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
483}
484
485static int64_t
486iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
487 struct hist_entry *left, struct hist_entry *right)
488{
489 return sort__iaddr_cmp(left, right);
490}
491
492static int
493tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
494 struct hist_entry *he)
495{
496 struct c2c_hist_entry *c2c_he;
497 int width = c2c_width(fmt, hpp, he->hists);
498 unsigned int tot_hitm;
499
500 c2c_he = container_of(he, struct c2c_hist_entry, he);
501 tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm;
502
503 return scnprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm);
504}
505
506static int64_t
507tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
508 struct hist_entry *left, struct hist_entry *right)
509{
510 struct c2c_hist_entry *c2c_left;
511 struct c2c_hist_entry *c2c_right;
512 unsigned int tot_hitm_left;
513 unsigned int tot_hitm_right;
514
515 c2c_left = container_of(left, struct c2c_hist_entry, he);
516 c2c_right = container_of(right, struct c2c_hist_entry, he);
517
518 tot_hitm_left = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm;
519 tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm;
520
521 return tot_hitm_left - tot_hitm_right;
522}
523
524#define STAT_FN_ENTRY(__f) \
525static int \
526__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, \
527 struct hist_entry *he) \
528{ \
529 struct c2c_hist_entry *c2c_he; \
530 int width = c2c_width(fmt, hpp, he->hists); \
531 \
532 c2c_he = container_of(he, struct c2c_hist_entry, he); \
533 return scnprintf(hpp->buf, hpp->size, "%*u", width, \
534 c2c_he->stats.__f); \
535}
536
537#define STAT_FN_CMP(__f) \
538static int64_t \
539__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
540 struct hist_entry *left, struct hist_entry *right) \
541{ \
542 struct c2c_hist_entry *c2c_left, *c2c_right; \
543 \
544 c2c_left = container_of(left, struct c2c_hist_entry, he); \
545 c2c_right = container_of(right, struct c2c_hist_entry, he); \
546 return c2c_left->stats.__f - c2c_right->stats.__f; \
547}
548
549#define STAT_FN(__f) \
550 STAT_FN_ENTRY(__f) \
551 STAT_FN_CMP(__f)
552
553STAT_FN(rmt_hitm)
554STAT_FN(lcl_hitm)
555STAT_FN(store)
556STAT_FN(st_l1hit)
557STAT_FN(st_l1miss)
558STAT_FN(ld_fbhit)
559STAT_FN(ld_l1hit)
560STAT_FN(ld_l2hit)
561STAT_FN(ld_llchit)
562STAT_FN(rmt_hit)
563
564static uint64_t llc_miss(struct c2c_stats *stats)
565{
566 uint64_t llcmiss;
567
568 llcmiss = stats->lcl_dram +
569 stats->rmt_dram +
570 stats->rmt_hitm +
571 stats->rmt_hit;
572
573 return llcmiss;
574}
575
576static int
577ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
578 struct hist_entry *he)
579{
580 struct c2c_hist_entry *c2c_he;
581 int width = c2c_width(fmt, hpp, he->hists);
582
583 c2c_he = container_of(he, struct c2c_hist_entry, he);
584
585 return scnprintf(hpp->buf, hpp->size, "%*lu", width,
586 llc_miss(&c2c_he->stats));
587}
588
589static int64_t
590ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
591 struct hist_entry *left, struct hist_entry *right)
592{
593 struct c2c_hist_entry *c2c_left;
594 struct c2c_hist_entry *c2c_right;
595
596 c2c_left = container_of(left, struct c2c_hist_entry, he);
597 c2c_right = container_of(right, struct c2c_hist_entry, he);
598
599 return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats);
600}
601
602static uint64_t total_records(struct c2c_stats *stats)
603{
604 uint64_t lclmiss, ldcnt, total;
605
606 lclmiss = stats->lcl_dram +
607 stats->rmt_dram +
608 stats->rmt_hitm +
609 stats->rmt_hit;
610
611 ldcnt = lclmiss +
612 stats->ld_fbhit +
613 stats->ld_l1hit +
614 stats->ld_l2hit +
615 stats->ld_llchit +
616 stats->lcl_hitm;
617
618 total = ldcnt +
619 stats->st_l1hit +
620 stats->st_l1miss;
621
622 return total;
623}
624
625static int
626tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
627 struct hist_entry *he)
628{
629 struct c2c_hist_entry *c2c_he;
630 int width = c2c_width(fmt, hpp, he->hists);
631 uint64_t tot_recs;
632
633 c2c_he = container_of(he, struct c2c_hist_entry, he);
634 tot_recs = total_records(&c2c_he->stats);
635
636 return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
637}
638
639static int64_t
640tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
641 struct hist_entry *left, struct hist_entry *right)
642{
643 struct c2c_hist_entry *c2c_left;
644 struct c2c_hist_entry *c2c_right;
645 uint64_t tot_recs_left;
646 uint64_t tot_recs_right;
647
648 c2c_left = container_of(left, struct c2c_hist_entry, he);
649 c2c_right = container_of(right, struct c2c_hist_entry, he);
650
651 tot_recs_left = total_records(&c2c_left->stats);
652 tot_recs_right = total_records(&c2c_right->stats);
653
654 return tot_recs_left - tot_recs_right;
655}
656
657static uint64_t total_loads(struct c2c_stats *stats)
658{
659 uint64_t lclmiss, ldcnt;
660
661 lclmiss = stats->lcl_dram +
662 stats->rmt_dram +
663 stats->rmt_hitm +
664 stats->rmt_hit;
665
666 ldcnt = lclmiss +
667 stats->ld_fbhit +
668 stats->ld_l1hit +
669 stats->ld_l2hit +
670 stats->ld_llchit +
671 stats->lcl_hitm;
672
673 return ldcnt;
674}
675
676static int
677tot_loads_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
678 struct hist_entry *he)
679{
680 struct c2c_hist_entry *c2c_he;
681 int width = c2c_width(fmt, hpp, he->hists);
682 uint64_t tot_recs;
683
684 c2c_he = container_of(he, struct c2c_hist_entry, he);
685 tot_recs = total_loads(&c2c_he->stats);
686
687 return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
688}
689
690static int64_t
691tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
692 struct hist_entry *left, struct hist_entry *right)
693{
694 struct c2c_hist_entry *c2c_left;
695 struct c2c_hist_entry *c2c_right;
696 uint64_t tot_recs_left;
697 uint64_t tot_recs_right;
698
699 c2c_left = container_of(left, struct c2c_hist_entry, he);
700 c2c_right = container_of(right, struct c2c_hist_entry, he);
701
702 tot_recs_left = total_loads(&c2c_left->stats);
703 tot_recs_right = total_loads(&c2c_right->stats);
704
705 return tot_recs_left - tot_recs_right;
706}
707
708typedef double (get_percent_cb)(struct c2c_hist_entry *);
709
710static int
711percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
712 struct hist_entry *he, get_percent_cb get_percent)
713{
714 struct c2c_hist_entry *c2c_he;
715 int width = c2c_width(fmt, hpp, he->hists);
716 double per;
717
718 c2c_he = container_of(he, struct c2c_hist_entry, he);
719 per = get_percent(c2c_he);
720
721#ifdef HAVE_SLANG_SUPPORT
722 if (use_browser)
723 return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per);
724#endif
725 return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
726}
727
728static double percent_hitm(struct c2c_hist_entry *c2c_he)
729{
730 struct c2c_hists *hists;
731 struct c2c_stats *stats;
732 struct c2c_stats *total;
733 int tot = 0, st = 0;
734 double p;
735
736 hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
737 stats = &c2c_he->stats;
738 total = &hists->stats;
739
740 switch (c2c.display) {
741 case DISPLAY_RMT:
742 st = stats->rmt_hitm;
743 tot = total->rmt_hitm;
744 break;
745 case DISPLAY_LCL:
746 st = stats->lcl_hitm;
747 tot = total->lcl_hitm;
748 default:
749 break;
750 }
751
752 p = tot ? (double) st / tot : 0;
753
754 return 100 * p;
755}
756
757#define PERC_STR(__s, __v) \
758({ \
759 scnprintf(__s, sizeof(__s), "%.2F%%", __v); \
760 __s; \
761})
762
763static int
764percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
765 struct hist_entry *he)
766{
767 struct c2c_hist_entry *c2c_he;
768 int width = c2c_width(fmt, hpp, he->hists);
769 char buf[10];
770 double per;
771
772 c2c_he = container_of(he, struct c2c_hist_entry, he);
773 per = percent_hitm(c2c_he);
774 return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
775}
776
777static int
778percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
779 struct hist_entry *he)
780{
781 return percent_color(fmt, hpp, he, percent_hitm);
782}
783
784static int64_t
785percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
786 struct hist_entry *left, struct hist_entry *right)
787{
788 struct c2c_hist_entry *c2c_left;
789 struct c2c_hist_entry *c2c_right;
790 double per_left;
791 double per_right;
792
793 c2c_left = container_of(left, struct c2c_hist_entry, he);
794 c2c_right = container_of(right, struct c2c_hist_entry, he);
795
796 per_left = percent_hitm(c2c_left);
797 per_right = percent_hitm(c2c_right);
798
799 return per_left - per_right;
800}
801
802static struct c2c_stats *he_stats(struct hist_entry *he)
803{
804 struct c2c_hist_entry *c2c_he;
805
806 c2c_he = container_of(he, struct c2c_hist_entry, he);
807 return &c2c_he->stats;
808}
809
810static struct c2c_stats *total_stats(struct hist_entry *he)
811{
812 struct c2c_hists *hists;
813
814 hists = container_of(he->hists, struct c2c_hists, hists);
815 return &hists->stats;
816}
817
818static double percent(int st, int tot)
819{
820 return tot ? 100. * (double) st / (double) tot : 0;
821}
822
823#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f)
824
825#define PERCENT_FN(__f) \
826static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
827{ \
828 struct c2c_hists *hists; \
829 \
830 hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \
831 return percent(c2c_he->stats.__f, hists->stats.__f); \
832}
833
834PERCENT_FN(rmt_hitm)
835PERCENT_FN(lcl_hitm)
836PERCENT_FN(st_l1hit)
837PERCENT_FN(st_l1miss)
838
839static int
840percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
841 struct hist_entry *he)
842{
843 int width = c2c_width(fmt, hpp, he->hists);
844 double per = PERCENT(he, rmt_hitm);
845 char buf[10];
846
847 return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
848}
849
850static int
851percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
852 struct hist_entry *he)
853{
854 return percent_color(fmt, hpp, he, percent_rmt_hitm);
855}
856
857static int64_t
858percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
859 struct hist_entry *left, struct hist_entry *right)
860{
861 double per_left;
862 double per_right;
863
864 per_left = PERCENT(left, lcl_hitm);
865 per_right = PERCENT(right, lcl_hitm);
866
867 return per_left - per_right;
868}
869
870static int
871percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
872 struct hist_entry *he)
873{
874 int width = c2c_width(fmt, hpp, he->hists);
875 double per = PERCENT(he, lcl_hitm);
876 char buf[10];
877
878 return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
879}
880
881static int
882percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
883 struct hist_entry *he)
884{
885 return percent_color(fmt, hpp, he, percent_lcl_hitm);
886}
887
888static int64_t
889percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
890 struct hist_entry *left, struct hist_entry *right)
891{
892 double per_left;
893 double per_right;
894
895 per_left = PERCENT(left, lcl_hitm);
896 per_right = PERCENT(right, lcl_hitm);
897
898 return per_left - per_right;
899}
900
901static int
902percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
903 struct hist_entry *he)
904{
905 int width = c2c_width(fmt, hpp, he->hists);
906 double per = PERCENT(he, st_l1hit);
907 char buf[10];
908
909 return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
910}
911
912static int
913percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
914 struct hist_entry *he)
915{
916 return percent_color(fmt, hpp, he, percent_st_l1hit);
917}
918
919static int64_t
920percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
921 struct hist_entry *left, struct hist_entry *right)
922{
923 double per_left;
924 double per_right;
925
926 per_left = PERCENT(left, st_l1hit);
927 per_right = PERCENT(right, st_l1hit);
928
929 return per_left - per_right;
930}
931
932static int
933percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
934 struct hist_entry *he)
935{
936 int width = c2c_width(fmt, hpp, he->hists);
937 double per = PERCENT(he, st_l1miss);
938 char buf[10];
939
940 return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
941}
942
943static int
944percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
945 struct hist_entry *he)
946{
947 return percent_color(fmt, hpp, he, percent_st_l1miss);
948}
949
950static int64_t
951percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
952 struct hist_entry *left, struct hist_entry *right)
953{
954 double per_left;
955 double per_right;
956
957 per_left = PERCENT(left, st_l1miss);
958 per_right = PERCENT(right, st_l1miss);
959
960 return per_left - per_right;
961}
962
963STAT_FN(lcl_dram)
964STAT_FN(rmt_dram)
965
966static int
967pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
968 struct hist_entry *he)
969{
970 int width = c2c_width(fmt, hpp, he->hists);
971
972 return scnprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_);
973}
974
975static int64_t
976pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
977 struct hist_entry *left, struct hist_entry *right)
978{
979 return left->thread->pid_ - right->thread->pid_;
980}
981
982static int64_t
983empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
984 struct hist_entry *left __maybe_unused,
985 struct hist_entry *right __maybe_unused)
986{
987 return 0;
988}
989
990static int
991node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
992 struct hist_entry *he)
993{
994 struct c2c_hist_entry *c2c_he;
995 bool first = true;
996 int node;
997 int ret = 0;
998
999 c2c_he = container_of(he, struct c2c_hist_entry, he);
1000
1001 for (node = 0; node < c2c.nodes_cnt; node++) {
1002 DECLARE_BITMAP(set, c2c.cpus_cnt);
1003
1004 bitmap_zero(set, c2c.cpus_cnt);
1005 bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
1006
1007 if (!bitmap_weight(set, c2c.cpus_cnt)) {
1008 if (c2c.node_info == 1) {
1009 ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
1010 advance_hpp(hpp, ret);
1011 }
1012 continue;
1013 }
1014
1015 if (!first) {
1016 ret = scnprintf(hpp->buf, hpp->size, " ");
1017 advance_hpp(hpp, ret);
1018 }
1019
1020 switch (c2c.node_info) {
1021 case 0:
1022 ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
1023 advance_hpp(hpp, ret);
1024 break;
1025 case 1:
1026 {
1027 int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
1028 struct c2c_stats *stats = &c2c_he->node_stats[node];
1029
1030 ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
1031 advance_hpp(hpp, ret);
1032
1033 #define DISPLAY_HITM(__h) \
1034 if (c2c_he->stats.__h> 0) { \
1035 ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
1036 percent(stats->__h, c2c_he->stats.__h));\
1037 } else { \
1038 ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
1039 }
1040
1041 switch (c2c.display) {
1042 case DISPLAY_RMT:
1043 DISPLAY_HITM(rmt_hitm);
1044 break;
1045 case DISPLAY_LCL:
1046 DISPLAY_HITM(lcl_hitm);
1047 default:
1048 break;
1049 }
1050
1051 #undef DISPLAY_HITM
1052
1053 advance_hpp(hpp, ret);
1054
1055 if (c2c_he->stats.store > 0) {
1056 ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
1057 percent(stats->store, c2c_he->stats.store));
1058 } else {
1059 ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
1060 }
1061
1062 advance_hpp(hpp, ret);
1063 break;
1064 }
1065 case 2:
1066 ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
1067 advance_hpp(hpp, ret);
1068
1069 ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
1070 advance_hpp(hpp, ret);
1071
1072 ret = scnprintf(hpp->buf, hpp->size, "}");
1073 advance_hpp(hpp, ret);
1074 break;
1075 default:
1076 break;
1077 }
1078
1079 first = false;
1080 }
1081
1082 return 0;
1083}
1084
1085static int
1086mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
1087 struct hist_entry *he, double mean)
1088{
1089 int width = c2c_width(fmt, hpp, he->hists);
1090 char buf[10];
1091
1092 scnprintf(buf, 10, "%6.0f", mean);
1093 return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
1094}
1095
1096#define MEAN_ENTRY(__func, __val) \
1097static int \
1098__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
1099{ \
1100 struct c2c_hist_entry *c2c_he; \
1101 c2c_he = container_of(he, struct c2c_hist_entry, he); \
1102 return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \
1103}
1104
1105MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
1106MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
1107MEAN_ENTRY(mean_load_entry, load);
1108
1109static int
1110cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
1111 struct hist_entry *he)
1112{
1113 struct c2c_hist_entry *c2c_he;
1114 int width = c2c_width(fmt, hpp, he->hists);
1115 char buf[10];
1116
1117 c2c_he = container_of(he, struct c2c_hist_entry, he);
1118
1119 scnprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt));
1120 return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
1121}
1122
1123static int
1124cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
1125 struct hist_entry *he)
1126{
1127 struct c2c_hist_entry *c2c_he;
1128 int width = c2c_width(fmt, hpp, he->hists);
1129 char buf[10];
1130
1131 c2c_he = container_of(he, struct c2c_hist_entry, he);
1132
1133 scnprintf(buf, 10, "%u", c2c_he->cacheline_idx);
1134 return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
1135}
1136
1137static int
1138cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
1139 struct hist_entry *he)
1140{
1141 int width = c2c_width(fmt, hpp, he->hists);
1142
1143 return scnprintf(hpp->buf, hpp->size, "%*s", width, "");
1144}
1145
1146#define HEADER_LOW(__h) \
1147 { \
1148 .line[1] = { \
1149 .text = __h, \
1150 }, \
1151 }
1152
1153#define HEADER_BOTH(__h0, __h1) \
1154 { \
1155 .line[0] = { \
1156 .text = __h0, \
1157 }, \
1158 .line[1] = { \
1159 .text = __h1, \
1160 }, \
1161 }
1162
1163#define HEADER_SPAN(__h0, __h1, __s) \
1164 { \
1165 .line[0] = { \
1166 .text = __h0, \
1167 .span = __s, \
1168 }, \
1169 .line[1] = { \
1170 .text = __h1, \
1171 }, \
1172 }
1173
1174#define HEADER_SPAN_LOW(__h) \
1175 { \
1176 .line[1] = { \
1177 .text = __h, \
1178 }, \
1179 }
1180
1181static struct c2c_dimension dim_dcacheline = {
1182 .header = HEADER_LOW("Cacheline"),
1183 .name = "dcacheline",
1184 .cmp = dcacheline_cmp,
1185 .entry = dcacheline_entry,
1186 .width = 18,
1187};
1188
1189static struct c2c_header header_offset_tui = HEADER_LOW("Off");
1190
1191static struct c2c_dimension dim_offset = {
1192 .header = HEADER_BOTH("Data address", "Offset"),
1193 .name = "offset",
1194 .cmp = offset_cmp,
1195 .entry = offset_entry,
1196 .width = 18,
1197};
1198
1199static struct c2c_dimension dim_iaddr = {
1200 .header = HEADER_LOW("Code address"),
1201 .name = "iaddr",
1202 .cmp = iaddr_cmp,
1203 .entry = iaddr_entry,
1204 .width = 18,
1205};
1206
1207static struct c2c_dimension dim_tot_hitm = {
1208 .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
1209 .name = "tot_hitm",
1210 .cmp = tot_hitm_cmp,
1211 .entry = tot_hitm_entry,
1212 .width = 7,
1213};
1214
1215static struct c2c_dimension dim_lcl_hitm = {
1216 .header = HEADER_SPAN_LOW("Lcl"),
1217 .name = "lcl_hitm",
1218 .cmp = lcl_hitm_cmp,
1219 .entry = lcl_hitm_entry,
1220 .width = 7,
1221};
1222
1223static struct c2c_dimension dim_rmt_hitm = {
1224 .header = HEADER_SPAN_LOW("Rmt"),
1225 .name = "rmt_hitm",
1226 .cmp = rmt_hitm_cmp,
1227 .entry = rmt_hitm_entry,
1228 .width = 7,
1229};
1230
1231static struct c2c_dimension dim_cl_rmt_hitm = {
1232 .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
1233 .name = "cl_rmt_hitm",
1234 .cmp = rmt_hitm_cmp,
1235 .entry = rmt_hitm_entry,
1236 .width = 7,
1237};
1238
1239static struct c2c_dimension dim_cl_lcl_hitm = {
1240 .header = HEADER_SPAN_LOW("Lcl"),
1241 .name = "cl_lcl_hitm",
1242 .cmp = lcl_hitm_cmp,
1243 .entry = lcl_hitm_entry,
1244 .width = 7,
1245};
1246
1247static struct c2c_dimension dim_stores = {
1248 .header = HEADER_SPAN("---- Store Reference ----", "Total", 2),
1249 .name = "stores",
1250 .cmp = store_cmp,
1251 .entry = store_entry,
1252 .width = 7,
1253};
1254
1255static struct c2c_dimension dim_stores_l1hit = {
1256 .header = HEADER_SPAN_LOW("L1Hit"),
1257 .name = "stores_l1hit",
1258 .cmp = st_l1hit_cmp,
1259 .entry = st_l1hit_entry,
1260 .width = 7,
1261};
1262
1263static struct c2c_dimension dim_stores_l1miss = {
1264 .header = HEADER_SPAN_LOW("L1Miss"),
1265 .name = "stores_l1miss",
1266 .cmp = st_l1miss_cmp,
1267 .entry = st_l1miss_entry,
1268 .width = 7,
1269};
1270
1271static struct c2c_dimension dim_cl_stores_l1hit = {
1272 .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
1273 .name = "cl_stores_l1hit",
1274 .cmp = st_l1hit_cmp,
1275 .entry = st_l1hit_entry,
1276 .width = 7,
1277};
1278
1279static struct c2c_dimension dim_cl_stores_l1miss = {
1280 .header = HEADER_SPAN_LOW("L1 Miss"),
1281 .name = "cl_stores_l1miss",
1282 .cmp = st_l1miss_cmp,
1283 .entry = st_l1miss_entry,
1284 .width = 7,
1285};
1286
1287static struct c2c_dimension dim_ld_fbhit = {
1288 .header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2),
1289 .name = "ld_fbhit",
1290 .cmp = ld_fbhit_cmp,
1291 .entry = ld_fbhit_entry,
1292 .width = 7,
1293};
1294
1295static struct c2c_dimension dim_ld_l1hit = {
1296 .header = HEADER_SPAN_LOW("L1"),
1297 .name = "ld_l1hit",
1298 .cmp = ld_l1hit_cmp,
1299 .entry = ld_l1hit_entry,
1300 .width = 7,
1301};
1302
1303static struct c2c_dimension dim_ld_l2hit = {
1304 .header = HEADER_SPAN_LOW("L2"),
1305 .name = "ld_l2hit",
1306 .cmp = ld_l2hit_cmp,
1307 .entry = ld_l2hit_entry,
1308 .width = 7,
1309};
1310
1311static struct c2c_dimension dim_ld_llchit = {
1312 .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
1313 .name = "ld_lclhit",
1314 .cmp = ld_llchit_cmp,
1315 .entry = ld_llchit_entry,
1316 .width = 8,
1317};
1318
1319static struct c2c_dimension dim_ld_rmthit = {
1320 .header = HEADER_SPAN_LOW("Rmt"),
1321 .name = "ld_rmthit",
1322 .cmp = rmt_hit_cmp,
1323 .entry = rmt_hit_entry,
1324 .width = 8,
1325};
1326
1327static struct c2c_dimension dim_ld_llcmiss = {
1328 .header = HEADER_BOTH("LLC", "Ld Miss"),
1329 .name = "ld_llcmiss",
1330 .cmp = ld_llcmiss_cmp,
1331 .entry = ld_llcmiss_entry,
1332 .width = 7,
1333};
1334
1335static struct c2c_dimension dim_tot_recs = {
1336 .header = HEADER_BOTH("Total", "records"),
1337 .name = "tot_recs",
1338 .cmp = tot_recs_cmp,
1339 .entry = tot_recs_entry,
1340 .width = 7,
1341};
1342
1343static struct c2c_dimension dim_tot_loads = {
1344 .header = HEADER_BOTH("Total", "Loads"),
1345 .name = "tot_loads",
1346 .cmp = tot_loads_cmp,
1347 .entry = tot_loads_entry,
1348 .width = 7,
1349};
1350
1351static struct c2c_header percent_hitm_header[] = {
1352 [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
1353 [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
1354};
1355
1356static struct c2c_dimension dim_percent_hitm = {
1357 .name = "percent_hitm",
1358 .cmp = percent_hitm_cmp,
1359 .entry = percent_hitm_entry,
1360 .color = percent_hitm_color,
1361 .width = 7,
1362};
1363
1364static struct c2c_dimension dim_percent_rmt_hitm = {
1365 .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
1366 .name = "percent_rmt_hitm",
1367 .cmp = percent_rmt_hitm_cmp,
1368 .entry = percent_rmt_hitm_entry,
1369 .color = percent_rmt_hitm_color,
1370 .width = 7,
1371};
1372
1373static struct c2c_dimension dim_percent_lcl_hitm = {
1374 .header = HEADER_SPAN_LOW("Lcl"),
1375 .name = "percent_lcl_hitm",
1376 .cmp = percent_lcl_hitm_cmp,
1377 .entry = percent_lcl_hitm_entry,
1378 .color = percent_lcl_hitm_color,
1379 .width = 7,
1380};
1381
1382static struct c2c_dimension dim_percent_stores_l1hit = {
1383 .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
1384 .name = "percent_stores_l1hit",
1385 .cmp = percent_stores_l1hit_cmp,
1386 .entry = percent_stores_l1hit_entry,
1387 .color = percent_stores_l1hit_color,
1388 .width = 7,
1389};
1390
1391static struct c2c_dimension dim_percent_stores_l1miss = {
1392 .header = HEADER_SPAN_LOW("L1 Miss"),
1393 .name = "percent_stores_l1miss",
1394 .cmp = percent_stores_l1miss_cmp,
1395 .entry = percent_stores_l1miss_entry,
1396 .color = percent_stores_l1miss_color,
1397 .width = 7,
1398};
1399
1400static struct c2c_dimension dim_dram_lcl = {
1401 .header = HEADER_SPAN("--- Load Dram ----", "Lcl", 1),
1402 .name = "dram_lcl",
1403 .cmp = lcl_dram_cmp,
1404 .entry = lcl_dram_entry,
1405 .width = 8,
1406};
1407
1408static struct c2c_dimension dim_dram_rmt = {
1409 .header = HEADER_SPAN_LOW("Rmt"),
1410 .name = "dram_rmt",
1411 .cmp = rmt_dram_cmp,
1412 .entry = rmt_dram_entry,
1413 .width = 8,
1414};
1415
1416static struct c2c_dimension dim_pid = {
1417 .header = HEADER_LOW("Pid"),
1418 .name = "pid",
1419 .cmp = pid_cmp,
1420 .entry = pid_entry,
1421 .width = 7,
1422};
1423
1424static struct c2c_dimension dim_tid = {
1425 .header = HEADER_LOW("Tid"),
1426 .name = "tid",
1427 .se = &sort_thread,
1428};
1429
1430static struct c2c_dimension dim_symbol = {
1431 .name = "symbol",
1432 .se = &sort_sym,
1433};
1434
1435static struct c2c_dimension dim_dso = {
1436 .header = HEADER_BOTH("Shared", "Object"),
1437 .name = "dso",
1438 .se = &sort_dso,
1439};
1440
1441static struct c2c_header header_node[3] = {
1442 HEADER_LOW("Node"),
1443 HEADER_LOW("Node{cpus %hitms %stores}"),
1444 HEADER_LOW("Node{cpu list}"),
1445};
1446
1447static struct c2c_dimension dim_node = {
1448 .name = "node",
1449 .cmp = empty_cmp,
1450 .entry = node_entry,
1451 .width = 4,
1452};
1453
1454static struct c2c_dimension dim_mean_rmt = {
1455 .header = HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2),
1456 .name = "mean_rmt",
1457 .cmp = empty_cmp,
1458 .entry = mean_rmt_entry,
1459 .width = 8,
1460};
1461
1462static struct c2c_dimension dim_mean_lcl = {
1463 .header = HEADER_SPAN_LOW("lcl hitm"),
1464 .name = "mean_lcl",
1465 .cmp = empty_cmp,
1466 .entry = mean_lcl_entry,
1467 .width = 8,
1468};
1469
1470static struct c2c_dimension dim_mean_load = {
1471 .header = HEADER_SPAN_LOW("load"),
1472 .name = "mean_load",
1473 .cmp = empty_cmp,
1474 .entry = mean_load_entry,
1475 .width = 8,
1476};
1477
1478static struct c2c_dimension dim_cpucnt = {
1479 .header = HEADER_BOTH("cpu", "cnt"),
1480 .name = "cpucnt",
1481 .cmp = empty_cmp,
1482 .entry = cpucnt_entry,
1483 .width = 8,
1484};
1485
1486static struct c2c_dimension dim_srcline = {
1487 .name = "cl_srcline",
1488 .se = &sort_srcline,
1489};
1490
1491static struct c2c_dimension dim_dcacheline_idx = {
1492 .header = HEADER_LOW("Index"),
1493 .name = "cl_idx",
1494 .cmp = empty_cmp,
1495 .entry = cl_idx_entry,
1496 .width = 5,
1497};
1498
1499static struct c2c_dimension dim_dcacheline_num = {
1500 .header = HEADER_LOW("Num"),
1501 .name = "cl_num",
1502 .cmp = empty_cmp,
1503 .entry = cl_idx_entry,
1504 .width = 5,
1505};
1506
1507static struct c2c_dimension dim_dcacheline_num_empty = {
1508 .header = HEADER_LOW("Num"),
1509 .name = "cl_num_empty",
1510 .cmp = empty_cmp,
1511 .entry = cl_idx_empty_entry,
1512 .width = 5,
1513};
1514
1515static struct c2c_dimension *dimensions[] = {
1516 &dim_dcacheline,
1517 &dim_offset,
1518 &dim_iaddr,
1519 &dim_tot_hitm,
1520 &dim_lcl_hitm,
1521 &dim_rmt_hitm,
1522 &dim_cl_lcl_hitm,
1523 &dim_cl_rmt_hitm,
1524 &dim_stores,
1525 &dim_stores_l1hit,
1526 &dim_stores_l1miss,
1527 &dim_cl_stores_l1hit,
1528 &dim_cl_stores_l1miss,
1529 &dim_ld_fbhit,
1530 &dim_ld_l1hit,
1531 &dim_ld_l2hit,
1532 &dim_ld_llchit,
1533 &dim_ld_rmthit,
1534 &dim_ld_llcmiss,
1535 &dim_tot_recs,
1536 &dim_tot_loads,
1537 &dim_percent_hitm,
1538 &dim_percent_rmt_hitm,
1539 &dim_percent_lcl_hitm,
1540 &dim_percent_stores_l1hit,
1541 &dim_percent_stores_l1miss,
1542 &dim_dram_lcl,
1543 &dim_dram_rmt,
1544 &dim_pid,
1545 &dim_tid,
1546 &dim_symbol,
1547 &dim_dso,
1548 &dim_node,
1549 &dim_mean_rmt,
1550 &dim_mean_lcl,
1551 &dim_mean_load,
1552 &dim_cpucnt,
1553 &dim_srcline,
1554 &dim_dcacheline_idx,
1555 &dim_dcacheline_num,
1556 &dim_dcacheline_num_empty,
1557 NULL,
1558};
1559
1560static void fmt_free(struct perf_hpp_fmt *fmt)
1561{
1562 struct c2c_fmt *c2c_fmt;
1563
1564 c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
1565 free(c2c_fmt);
1566}
1567
1568static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
1569{
1570 struct c2c_fmt *c2c_a = container_of(a, struct c2c_fmt, fmt);
1571 struct c2c_fmt *c2c_b = container_of(b, struct c2c_fmt, fmt);
1572
1573 return c2c_a->dim == c2c_b->dim;
1574}
1575
1576static struct c2c_dimension *get_dimension(const char *name)
1577{
1578 unsigned int i;
1579
1580 for (i = 0; dimensions[i]; i++) {
1581 struct c2c_dimension *dim = dimensions[i];
1582
1583 if (!strcmp(dim->name, name))
1584 return dim;
1585 };
1586
1587 return NULL;
1588}
1589
1590static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
1591 struct hist_entry *he)
1592{
1593 struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
1594 struct c2c_dimension *dim = c2c_fmt->dim;
1595 size_t len = fmt->user_len;
1596
1597 if (!len) {
1598 len = hists__col_len(he->hists, dim->se->se_width_idx);
1599
1600 if (dim == &dim_symbol || dim == &dim_srcline)
1601 len = symbol_width(he->hists, dim->se);
1602 }
1603
1604 return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
1605}
1606
1607static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
1608 struct hist_entry *a, struct hist_entry *b)
1609{
1610 struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
1611 struct c2c_dimension *dim = c2c_fmt->dim;
1612
1613 return dim->se->se_cmp(a, b);
1614}
1615
1616static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
1617 struct hist_entry *a, struct hist_entry *b)
1618{
1619 struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
1620 struct c2c_dimension *dim = c2c_fmt->dim;
1621 int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
1622
1623 collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp;
1624 return collapse_fn(a, b);
1625}
1626
1627static struct c2c_fmt *get_format(const char *name)
1628{
1629 struct c2c_dimension *dim = get_dimension(name);
1630 struct c2c_fmt *c2c_fmt;
1631 struct perf_hpp_fmt *fmt;
1632
1633 if (!dim)
1634 return NULL;
1635
1636 c2c_fmt = zalloc(sizeof(*c2c_fmt));
1637 if (!c2c_fmt)
1638 return NULL;
1639
1640 c2c_fmt->dim = dim;
1641
1642 fmt = &c2c_fmt->fmt;
1643 INIT_LIST_HEAD(&fmt->list);
1644 INIT_LIST_HEAD(&fmt->sort_list);
1645
1646 fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
1647 fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
1648 fmt->color = dim->se ? NULL : dim->color;
1649 fmt->entry = dim->se ? c2c_se_entry : dim->entry;
1650 fmt->header = c2c_header;
1651 fmt->width = c2c_width;
1652 fmt->collapse = dim->se ? c2c_se_collapse : dim->cmp;
1653 fmt->equal = fmt_equal;
1654 fmt->free = fmt_free;
1655
1656 return c2c_fmt;
1657}
1658
1659static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
1660{
1661 struct c2c_fmt *c2c_fmt = get_format(name);
1662
1663 if (!c2c_fmt) {
1664 reset_dimensions();
1665 return output_field_add(hpp_list, name);
1666 }
1667
1668 perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
1669 return 0;
1670}
1671
1672static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
1673{
1674 struct c2c_fmt *c2c_fmt = get_format(name);
1675 struct c2c_dimension *dim;
1676
1677 if (!c2c_fmt) {
1678 reset_dimensions();
1679 return sort_dimension__add(hpp_list, name, NULL, 0);
1680 }
1681
1682 dim = c2c_fmt->dim;
1683 if (dim == &dim_dso)
1684 hpp_list->dso = 1;
1685
1686 perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
1687 return 0;
1688}
1689
1690#define PARSE_LIST(_list, _fn) \
1691 do { \
1692 char *tmp, *tok; \
1693 ret = 0; \
1694 \
1695 if (!_list) \
1696 break; \
1697 \
1698 for (tok = strtok_r((char *)_list, ", ", &tmp); \
1699 tok; tok = strtok_r(NULL, ", ", &tmp)) { \
1700 ret = _fn(hpp_list, tok); \
1701 if (ret == -EINVAL) { \
1702 error("Invalid --fields key: `%s'", tok); \
1703 break; \
1704 } else if (ret == -ESRCH) { \
1705 error("Unknown --fields key: `%s'", tok); \
1706 break; \
1707 } \
1708 } \
1709 } while (0)
1710
1711static int hpp_list__parse(struct perf_hpp_list *hpp_list,
1712 const char *output_,
1713 const char *sort_)
1714{
1715 char *output = output_ ? strdup(output_) : NULL;
1716 char *sort = sort_ ? strdup(sort_) : NULL;
1717 int ret;
1718
1719 PARSE_LIST(output, c2c_hists__init_output);
1720 PARSE_LIST(sort, c2c_hists__init_sort);
1721
1722 /* copy sort keys to output fields */
1723 perf_hpp__setup_output_field(hpp_list);
1724
1725 /*
1726 * We dont need other sorting keys other than those
1727 * we already specified. It also really slows down
1728 * the processing a lot with big number of output
1729 * fields, so switching this off for c2c.
1730 */
1731
1732#if 0
1733 /* and then copy output fields to sort keys */
1734 perf_hpp__append_sort_keys(&hists->list);
1735#endif
1736
1737 free(output);
1738 free(sort);
1739 return ret;
1740}
1741
1742static int c2c_hists__init(struct c2c_hists *hists,
1743 const char *sort,
1744 int nr_header_lines)
1745{
1746 __hists__init(&hists->hists, &hists->list);
1747
1748 /*
1749 * Initialize only with sort fields, we need to resort
1750 * later anyway, and that's where we add output fields
1751 * as well.
1752 */
1753 perf_hpp_list__init(&hists->list);
1754
1755 /* Overload number of header lines.*/
1756 hists->list.nr_header_lines = nr_header_lines;
1757
1758 return hpp_list__parse(&hists->list, NULL, sort);
1759}
1760
1761__maybe_unused
1762static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
1763 const char *output,
1764 const char *sort)
1765{
1766 perf_hpp__reset_output_field(&c2c_hists->list);
1767 return hpp_list__parse(&c2c_hists->list, output, sort);
1768}
1769
1770#define DISPLAY_LINE_LIMIT 0.0005
1771
1772static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
1773{
1774 struct c2c_hist_entry *c2c_he;
1775 double ld_dist;
1776
1777 if (c2c.show_all)
1778 return true;
1779
1780 c2c_he = container_of(he, struct c2c_hist_entry, he);
1781
1782#define FILTER_HITM(__h) \
1783 if (stats->__h) { \
1784 ld_dist = ((double)c2c_he->stats.__h / stats->__h); \
1785 if (ld_dist < DISPLAY_LINE_LIMIT) \
1786 he->filtered = HIST_FILTER__C2C; \
1787 } else { \
1788 he->filtered = HIST_FILTER__C2C; \
1789 }
1790
1791 switch (c2c.display) {
1792 case DISPLAY_LCL:
1793 FILTER_HITM(lcl_hitm);
1794 break;
1795 case DISPLAY_RMT:
1796 FILTER_HITM(rmt_hitm);
1797 default:
1798 break;
1799 };
1800
1801#undef FILTER_HITM
1802
1803 return he->filtered == 0;
1804}
1805
1806static inline int valid_hitm_or_store(struct hist_entry *he)
1807{
1808 struct c2c_hist_entry *c2c_he;
1809 bool has_hitm;
1810
1811 c2c_he = container_of(he, struct c2c_hist_entry, he);
1812 has_hitm = c2c.display == DISPLAY_LCL ?
1813 c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm;
1814 return has_hitm || c2c_he->stats.store;
1815}
1816
1817static void calc_width(struct hist_entry *he)
1818{
1819 struct c2c_hists *c2c_hists;
1820
1821 c2c_hists = container_of(he->hists, struct c2c_hists, hists);
1822 hists__calc_col_len(&c2c_hists->hists, he);
1823}
1824
1825static int filter_cb(struct hist_entry *he)
1826{
1827 if (c2c.show_src && !he->srcline)
1828 he->srcline = hist_entry__get_srcline(he);
1829
1830 calc_width(he);
1831
1832 if (!valid_hitm_or_store(he))
1833 he->filtered = HIST_FILTER__C2C;
1834
1835 return 0;
1836}
1837
1838static int resort_cl_cb(struct hist_entry *he)
1839{
1840 struct c2c_hist_entry *c2c_he;
1841 struct c2c_hists *c2c_hists;
1842 bool display = he__display(he, &c2c.hitm_stats);
1843
1844 c2c_he = container_of(he, struct c2c_hist_entry, he);
1845 c2c_hists = c2c_he->hists;
1846
1847 calc_width(he);
1848
1849 if (display && c2c_hists) {
1850 static unsigned int idx;
1851
1852 c2c_he->cacheline_idx = idx++;
1853
1854 c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);
1855
1856 hists__collapse_resort(&c2c_hists->hists, NULL);
1857 hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
1858 }
1859
1860 return 0;
1861}
1862
1863static void setup_nodes_header(void)
1864{
1865 dim_node.header = header_node[c2c.node_info];
1866}
1867
1868static int setup_nodes(struct perf_session *session)
1869{
1870 struct numa_node *n;
1871 unsigned long **nodes;
1872 int node, cpu;
1873 int *cpu2node;
1874
1875 if (c2c.node_info > 2)
1876 c2c.node_info = 2;
1877
1878 c2c.nodes_cnt = session->header.env.nr_numa_nodes;
1879 c2c.cpus_cnt = session->header.env.nr_cpus_online;
1880
1881 n = session->header.env.numa_nodes;
1882 if (!n)
1883 return -EINVAL;
1884
1885 nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
1886 if (!nodes)
1887 return -ENOMEM;
1888
1889 c2c.nodes = nodes;
1890
1891 cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
1892 if (!cpu2node)
1893 return -ENOMEM;
1894
1895 for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
1896 cpu2node[cpu] = -1;
1897
1898 c2c.cpu2node = cpu2node;
1899
1900 for (node = 0; node < c2c.nodes_cnt; node++) {
1901 struct cpu_map *map = n[node].map;
1902 unsigned long *set;
1903
1904 set = bitmap_alloc(c2c.cpus_cnt);
1905 if (!set)
1906 return -ENOMEM;
1907
1908 for (cpu = 0; cpu < map->nr; cpu++) {
1909 set_bit(map->map[cpu], set);
1910
1911 if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
1912 return -EINVAL;
1913
1914 cpu2node[map->map[cpu]] = node;
1915 }
1916
1917 nodes[node] = set;
1918 }
1919
1920 setup_nodes_header();
1921 return 0;
1922}
1923
1924#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
1925
1926static int resort_hitm_cb(struct hist_entry *he)
1927{
1928 struct c2c_hist_entry *c2c_he;
1929 c2c_he = container_of(he, struct c2c_hist_entry, he);
1930
1931 if (HAS_HITMS(c2c_he)) {
1932 c2c.shared_clines++;
1933 c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats);
1934 }
1935
1936 return 0;
1937}
1938
1939static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb)
1940{
1941 struct rb_node *next = rb_first(&hists->entries);
1942 int ret = 0;
1943
1944 while (next) {
1945 struct hist_entry *he;
1946
1947 he = rb_entry(next, struct hist_entry, rb_node);
1948 ret = cb(he);
1949 if (ret)
1950 break;
1951 next = rb_next(&he->rb_node);
1952 }
1953
1954 return ret;
1955}
1956
1957static void print_c2c__display_stats(FILE *out)
1958{
1959 int llc_misses;
1960 struct c2c_stats *stats = &c2c.hists.stats;
1961
1962 llc_misses = stats->lcl_dram +
1963 stats->rmt_dram +
1964 stats->rmt_hit +
1965 stats->rmt_hitm;
1966
1967 fprintf(out, "=================================================\n");
1968 fprintf(out, " Trace Event Information \n");
1969 fprintf(out, "=================================================\n");
1970 fprintf(out, " Total records : %10d\n", stats->nr_entries);
1971 fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks);
1972 fprintf(out, " Load Operations : %10d\n", stats->load);
1973 fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache);
1974 fprintf(out, " Loads - IO : %10d\n", stats->ld_io);
1975 fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss);
1976 fprintf(out, " Loads - no mapping : %10d\n", stats->ld_noadrs);
1977 fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit);
1978 fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit);
1979 fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit);
1980 fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm);
1981 fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm);
1982 fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm);
1983 fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit);
1984 fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram);
1985 fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram);
1986 fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl);
1987 fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared);
1988 fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
1989 fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
1990 fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
1991 fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
1992 fprintf(out, " LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.);
1993 fprintf(out, " Store Operations : %10d\n", stats->store);
1994 fprintf(out, " Store - uncacheable : %10d\n", stats->st_uncache);
1995 fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs);
1996 fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit);
1997 fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss);
1998 fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap);
1999 fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
2000}
2001
2002static void print_shared_cacheline_info(FILE *out)
2003{
2004 struct c2c_stats *stats = &c2c.hitm_stats;
2005 int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm;
2006
2007 fprintf(out, "=================================================\n");
2008 fprintf(out, " Global Shared Cache Line Event Information \n");
2009 fprintf(out, "=================================================\n");
2010 fprintf(out, " Total Shared Cache Lines : %10d\n", c2c.shared_clines);
2011 fprintf(out, " Load HITs on shared lines : %10d\n", stats->load);
2012 fprintf(out, " Fill Buffer Hits on shared lines : %10d\n", stats->ld_fbhit);
2013 fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit);
2014 fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
2015 fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
2016 fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
2017 fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
2018 fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit);
2019 fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store);
2020}
2021
2022static void print_cacheline(struct c2c_hists *c2c_hists,
2023 struct hist_entry *he_cl,
2024 struct perf_hpp_list *hpp_list,
2025 FILE *out)
2026{
2027 char bf[1000];
2028 struct perf_hpp hpp = {
2029 .buf = bf,
2030 .size = 1000,
2031 };
2032 static bool once;
2033
2034 if (!once) {
2035 hists__fprintf_headers(&c2c_hists->hists, out);
2036 once = true;
2037 } else {
2038 fprintf(out, "\n");
2039 }
2040
2041 fprintf(out, " -------------------------------------------------------------\n");
2042 __hist_entry__snprintf(he_cl, &hpp, hpp_list);
2043 fprintf(out, "%s\n", bf);
2044 fprintf(out, " -------------------------------------------------------------\n");
2045
2046 hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
2047}
2048
2049static void print_pareto(FILE *out)
2050{
2051 struct perf_hpp_list hpp_list;
2052 struct rb_node *nd;
2053 int ret;
2054
2055 perf_hpp_list__init(&hpp_list);
2056 ret = hpp_list__parse(&hpp_list,
2057 "cl_num,"
2058 "cl_rmt_hitm,"
2059 "cl_lcl_hitm,"
2060 "cl_stores_l1hit,"
2061 "cl_stores_l1miss,"
2062 "dcacheline",
2063 NULL);
2064
2065 if (WARN_ONCE(ret, "failed to setup sort entries\n"))
2066 return;
2067
2068 nd = rb_first(&c2c.hists.hists.entries);
2069
2070 for (; nd; nd = rb_next(nd)) {
2071 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
2072 struct c2c_hist_entry *c2c_he;
2073
2074 if (he->filtered)
2075 continue;
2076
2077 c2c_he = container_of(he, struct c2c_hist_entry, he);
2078 print_cacheline(c2c_he->hists, he, &hpp_list, out);
2079 }
2080}
2081
2082static void print_c2c_info(FILE *out, struct perf_session *session)
2083{
2084 struct perf_evlist *evlist = session->evlist;
2085 struct perf_evsel *evsel;
2086 bool first = true;
2087
2088 fprintf(out, "=================================================\n");
2089 fprintf(out, " c2c details \n");
2090 fprintf(out, "=================================================\n");
2091
2092 evlist__for_each_entry(evlist, evsel) {
2093 fprintf(out, "%-36s: %s\n", first ? " Events" : "",
2094 perf_evsel__name(evsel));
2095 first = false;
2096 }
2097 fprintf(out, " Cachelines sort on : %s HITMs\n",
2098 c2c.display == DISPLAY_LCL ? "Local" : "Remote");
2099 fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
2100}
2101
2102static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
2103{
2104 setup_pager();
2105
2106 print_c2c__display_stats(out);
2107 fprintf(out, "\n");
2108 print_shared_cacheline_info(out);
2109 fprintf(out, "\n");
2110 print_c2c_info(out, session);
2111
2112 if (c2c.stats_only)
2113 return;
2114
2115 fprintf(out, "\n");
2116 fprintf(out, "=================================================\n");
2117 fprintf(out, " Shared Data Cache Line Table \n");
2118 fprintf(out, "=================================================\n");
2119 fprintf(out, "#\n");
2120
2121 hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false);
2122
2123 fprintf(out, "\n");
2124 fprintf(out, "=================================================\n");
2125 fprintf(out, " Shared Cache Line Distribution Pareto \n");
2126 fprintf(out, "=================================================\n");
2127 fprintf(out, "#\n");
2128
2129 print_pareto(out);
2130}
2131
2132#ifdef HAVE_SLANG_SUPPORT
2133static void c2c_browser__update_nr_entries(struct hist_browser *hb)
2134{
2135 u64 nr_entries = 0;
2136 struct rb_node *nd = rb_first(&hb->hists->entries);
2137
2138 while (nd) {
2139 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
2140
2141 if (!he->filtered)
2142 nr_entries++;
2143
2144 nd = rb_next(nd);
2145 }
2146
2147 hb->nr_non_filtered_entries = nr_entries;
2148}
2149
2150struct c2c_cacheline_browser {
2151 struct hist_browser hb;
2152 struct hist_entry *he;
2153};
2154
2155static int
2156perf_c2c_cacheline_browser__title(struct hist_browser *browser,
2157 char *bf, size_t size)
2158{
2159 struct c2c_cacheline_browser *cl_browser;
2160 struct hist_entry *he;
2161 uint64_t addr = 0;
2162
2163 cl_browser = container_of(browser, struct c2c_cacheline_browser, hb);
2164 he = cl_browser->he;
2165
2166 if (he->mem_info)
2167 addr = cl_address(he->mem_info->daddr.addr);
2168
2169 scnprintf(bf, size, "Cacheline 0x%lx", addr);
2170 return 0;
2171}
2172
2173static struct c2c_cacheline_browser*
2174c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he)
2175{
2176 struct c2c_cacheline_browser *browser;
2177
2178 browser = zalloc(sizeof(*browser));
2179 if (browser) {
2180 hist_browser__init(&browser->hb, hists);
2181 browser->hb.c2c_filter = true;
2182 browser->hb.title = perf_c2c_cacheline_browser__title;
2183 browser->he = he;
2184 }
2185
2186 return browser;
2187}
2188
2189static int perf_c2c__browse_cacheline(struct hist_entry *he)
2190{
2191 struct c2c_hist_entry *c2c_he;
2192 struct c2c_hists *c2c_hists;
2193 struct c2c_cacheline_browser *cl_browser;
2194 struct hist_browser *browser;
2195 int key = -1;
2196 const char help[] =
2197 " ENTER Togle callchains (if present) \n"
2198 " n Togle Node details info \n"
2199 " s Togle full lenght of symbol and source line columns \n"
2200 " q Return back to cacheline list \n";
2201
2202 /* Display compact version first. */
2203 c2c.symbol_full = false;
2204
2205 c2c_he = container_of(he, struct c2c_hist_entry, he);
2206 c2c_hists = c2c_he->hists;
2207
2208 cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he);
2209 if (cl_browser == NULL)
2210 return -1;
2211
2212 browser = &cl_browser->hb;
2213
2214 /* reset abort key so that it can get Ctrl-C as a key */
2215 SLang_reset_tty();
2216 SLang_init_tty(0, 0, 0);
2217
2218 c2c_browser__update_nr_entries(browser);
2219
2220 while (1) {
2221 key = hist_browser__run(browser, "? - help");
2222
2223 switch (key) {
2224 case 's':
2225 c2c.symbol_full = !c2c.symbol_full;
2226 break;
2227 case 'n':
2228 c2c.node_info = (c2c.node_info + 1) % 3;
2229 setup_nodes_header();
2230 break;
2231 case 'q':
2232 goto out;
2233 case '?':
2234 ui_browser__help_window(&browser->b, help);
2235 break;
2236 default:
2237 break;
2238 }
2239 }
2240
2241out:
2242 free(cl_browser);
2243 return 0;
2244}
2245
2246static int perf_c2c_browser__title(struct hist_browser *browser,
2247 char *bf, size_t size)
2248{
2249 scnprintf(bf, size,
2250 "Shared Data Cache Line Table "
2251 "(%lu entries, sorted on %s HITMs)",
2252 browser->nr_non_filtered_entries,
2253 c2c.display == DISPLAY_LCL ? "local" : "remote");
2254 return 0;
2255}
2256
2257static struct hist_browser*
2258perf_c2c_browser__new(struct hists *hists)
2259{
2260 struct hist_browser *browser = hist_browser__new(hists);
2261
2262 if (browser) {
2263 browser->title = perf_c2c_browser__title;
2264 browser->c2c_filter = true;
2265 }
2266
2267 return browser;
2268}
2269
2270static int perf_c2c__hists_browse(struct hists *hists)
2271{
2272 struct hist_browser *browser;
2273 int key = -1;
2274 const char help[] =
2275 " d Display cacheline details \n"
2276 " ENTER Togle callchains (if present) \n"
2277 " q Quit \n";
2278
2279 browser = perf_c2c_browser__new(hists);
2280 if (browser == NULL)
2281 return -1;
2282
2283 /* reset abort key so that it can get Ctrl-C as a key */
2284 SLang_reset_tty();
2285 SLang_init_tty(0, 0, 0);
2286
2287 c2c_browser__update_nr_entries(browser);
2288
2289 while (1) {
2290 key = hist_browser__run(browser, "? - help");
2291
2292 switch (key) {
2293 case 'q':
2294 goto out;
2295 case 'd':
2296 perf_c2c__browse_cacheline(browser->he_selection);
2297 break;
2298 case '?':
2299 ui_browser__help_window(&browser->b, help);
2300 break;
2301 default:
2302 break;
2303 }
2304 }
2305
2306out:
2307 hist_browser__delete(browser);
2308 return 0;
2309}
2310
2311static void perf_c2c_display(struct perf_session *session)
2312{
2313 if (c2c.use_stdio)
2314 perf_c2c__hists_fprintf(stdout, session);
2315 else
2316 perf_c2c__hists_browse(&c2c.hists.hists);
2317}
2318#else
2319static void perf_c2c_display(struct perf_session *session)
2320{
2321 use_browser = 0;
2322 perf_c2c__hists_fprintf(stdout, session);
2323}
2324#endif /* HAVE_SLANG_SUPPORT */
2325
2326static void ui_quirks(void)
2327{
2328 if (!c2c.use_stdio) {
2329 dim_offset.width = 5;
2330 dim_offset.header = header_offset_tui;
2331 }
2332
2333 dim_percent_hitm.header = percent_hitm_header[c2c.display];
2334}
2335
2336#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
2337
2338const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
2339 CALLCHAIN_REPORT_HELP
2340 "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
2341
2342static int
2343parse_callchain_opt(const struct option *opt, const char *arg, int unset)
2344{
2345 struct callchain_param *callchain = opt->value;
2346
2347 callchain->enabled = !unset;
2348 /*
2349 * --no-call-graph
2350 */
2351 if (unset) {
2352 symbol_conf.use_callchain = false;
2353 callchain->mode = CHAIN_NONE;
2354 return 0;
2355 }
2356
2357 return parse_callchain_report_opt(arg);
2358}
2359
2360static int setup_callchain(struct perf_evlist *evlist)
2361{
2362 u64 sample_type = perf_evlist__combined_sample_type(evlist);
2363 enum perf_call_graph_mode mode = CALLCHAIN_NONE;
2364
2365 if ((sample_type & PERF_SAMPLE_REGS_USER) &&
2366 (sample_type & PERF_SAMPLE_STACK_USER))
2367 mode = CALLCHAIN_DWARF;
2368 else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
2369 mode = CALLCHAIN_LBR;
2370 else if (sample_type & PERF_SAMPLE_CALLCHAIN)
2371 mode = CALLCHAIN_FP;
2372
2373 if (!callchain_param.enabled &&
2374 callchain_param.mode != CHAIN_NONE &&
2375 mode != CALLCHAIN_NONE) {
2376 symbol_conf.use_callchain = true;
2377 if (callchain_register_param(&callchain_param) < 0) {
2378 ui__error("Can't register callchain params.\n");
2379 return -EINVAL;
2380 }
2381 }
2382
2383 callchain_param.record_mode = mode;
2384 callchain_param.min_percent = 0;
2385 return 0;
2386}
2387
2388static int setup_display(const char *str)
2389{
2390 const char *display = str ?: "rmt";
2391
2392 if (!strcmp(display, "rmt"))
2393 c2c.display = DISPLAY_RMT;
2394 else if (!strcmp(display, "lcl"))
2395 c2c.display = DISPLAY_LCL;
2396 else {
2397 pr_err("failed: unknown display type: %s\n", str);
2398 return -1;
2399 }
2400
2401 return 0;
2402}
2403
2404#define for_each_token(__tok, __buf, __sep, __tmp) \
2405 for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \
2406 __tok = strtok_r(NULL, __sep, &__tmp))
2407
2408static int build_cl_output(char *cl_sort, bool no_source)
2409{
2410 char *tok, *tmp, *buf = strdup(cl_sort);
2411 bool add_pid = false;
2412 bool add_tid = false;
2413 bool add_iaddr = false;
2414 bool add_sym = false;
2415 bool add_dso = false;
2416 bool add_src = false;
2417
2418 if (!buf)
2419 return -ENOMEM;
2420
2421 for_each_token(tok, buf, ",", tmp) {
2422 if (!strcmp(tok, "tid")) {
2423 add_tid = true;
2424 } else if (!strcmp(tok, "pid")) {
2425 add_pid = true;
2426 } else if (!strcmp(tok, "iaddr")) {
2427 add_iaddr = true;
2428 add_sym = true;
2429 add_dso = true;
2430 add_src = no_source ? false : true;
2431 } else if (!strcmp(tok, "dso")) {
2432 add_dso = true;
2433 } else if (strcmp(tok, "offset")) {
2434 pr_err("unrecognized sort token: %s\n", tok);
2435 return -EINVAL;
2436 }
2437 }
2438
2439 if (asprintf(&c2c.cl_output,
2440 "%s%s%s%s%s%s%s%s%s%s",
2441 c2c.use_stdio ? "cl_num_empty," : "",
2442 "percent_rmt_hitm,"
2443 "percent_lcl_hitm,"
2444 "percent_stores_l1hit,"
2445 "percent_stores_l1miss,"
2446 "offset,",
2447 add_pid ? "pid," : "",
2448 add_tid ? "tid," : "",
2449 add_iaddr ? "iaddr," : "",
2450 "mean_rmt,"
2451 "mean_lcl,"
2452 "mean_load,"
2453 "cpucnt,",
2454 add_sym ? "symbol," : "",
2455 add_dso ? "dso," : "",
2456 add_src ? "cl_srcline," : "",
2457 "node") < 0)
2458 return -ENOMEM;
2459
2460 c2c.show_src = add_src;
2461
2462 free(buf);
2463 return 0;
2464}
2465
2466static int setup_coalesce(const char *coalesce, bool no_source)
2467{
2468 const char *c = coalesce ?: coalesce_default;
2469
2470 if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
2471 return -ENOMEM;
2472
2473 if (build_cl_output(c2c.cl_sort, no_source))
2474 return -1;
2475
2476 if (asprintf(&c2c.cl_resort, "offset,%s",
2477 c2c.display == DISPLAY_RMT ?
2478 "rmt_hitm,lcl_hitm" :
2479 "lcl_hitm,rmt_hitm") < 0)
2480 return -ENOMEM;
2481
2482 pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
2483 pr_debug("coalesce resort fields: %s\n", c2c.cl_resort);
2484 pr_debug("coalesce output fields: %s\n", c2c.cl_output);
2485 return 0;
2486}
2487
2488static int perf_c2c__report(int argc, const char **argv)
2489{
2490 struct perf_session *session;
2491 struct ui_progress prog;
2492 struct perf_data_file file = {
2493 .mode = PERF_DATA_MODE_READ,
2494 };
2495 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
2496 const char *display = NULL;
2497 const char *coalesce = NULL;
2498 bool no_source = false;
2499 const struct option c2c_options[] = {
2500 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2501 "file", "vmlinux pathname"),
2502 OPT_INCR('v', "verbose", &verbose,
2503 "be more verbose (show counter open errors, etc)"),
2504 OPT_STRING('i', "input", &input_name, "file",
2505 "the input file to process"),
2506 OPT_INCR('N', "node-info", &c2c.node_info,
2507 "show extra node info in report (repeat for more info)"),
2508#ifdef HAVE_SLANG_SUPPORT
2509 OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
2510#endif
2511 OPT_BOOLEAN(0, "stats", &c2c.stats_only,
2512 "Use the stdio interface"),
2513 OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
2514 "Display full length of symbols"),
2515 OPT_BOOLEAN(0, "no-source", &no_source,
2516 "Do not display Source Line column"),
2517 OPT_BOOLEAN(0, "show-all", &c2c.show_all,
2518 "Show all captured HITM lines."),
2519 OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
2520 "print_type,threshold[,print_limit],order,sort_key[,branch],value",
2521 callchain_help, &parse_callchain_opt,
2522 callchain_default_opt),
2523 OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
2524 OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
2525 "coalesce fields: pid,tid,iaddr,dso"),
2526 OPT_END()
2527 };
2528 int err = 0;
2529
2530 argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
2531 PARSE_OPT_STOP_AT_NON_OPTION);
2532 if (argc)
2533 usage_with_options(report_c2c_usage, c2c_options);
2534
2535 if (c2c.stats_only)
2536 c2c.use_stdio = true;
2537
2538 if (c2c.use_stdio)
2539 use_browser = 0;
2540 else
2541 use_browser = 1;
2542
2543 setup_browser(false);
2544
2545 if (!input_name || !strlen(input_name))
2546 input_name = "perf.data";
2547
2548 file.path = input_name;
2549
2550 err = setup_display(display);
2551 if (err)
2552 goto out;
2553
2554 err = setup_coalesce(coalesce, no_source);
2555 if (err) {
2556 pr_debug("Failed to initialize hists\n");
2557 goto out;
2558 }
2559
2560 err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
2561 if (err) {
2562 pr_debug("Failed to initialize hists\n");
2563 goto out;
2564 }
2565
2566 session = perf_session__new(&file, 0, &c2c.tool);
2567 if (session == NULL) {
2568 pr_debug("No memory for session\n");
2569 goto out;
2570 }
2571 err = setup_nodes(session);
2572 if (err) {
2573 pr_err("Failed setup nodes\n");
2574 goto out;
2575 }
2576
2577 err = setup_callchain(session->evlist);
2578 if (err)
2579 goto out_session;
2580
2581 if (symbol__init(&session->header.env) < 0)
2582 goto out_session;
2583
2584 /* No pipe support at the moment. */
2585 if (perf_data_file__is_pipe(session->file)) {
2586 pr_debug("No pipe support at the moment.\n");
2587 goto out_session;
2588 }
2589
2590 err = perf_session__process_events(session);
2591 if (err) {
2592 pr_err("failed to process sample\n");
2593 goto out_session;
2594 }
2595
2596 c2c_hists__reinit(&c2c.hists,
2597 "cl_idx,"
2598 "dcacheline,"
2599 "tot_recs,"
2600 "percent_hitm,"
2601 "tot_hitm,lcl_hitm,rmt_hitm,"
2602 "stores,stores_l1hit,stores_l1miss,"
2603 "dram_lcl,dram_rmt,"
2604 "ld_llcmiss,"
2605 "tot_loads,"
2606 "ld_fbhit,ld_l1hit,ld_l2hit,"
2607 "ld_lclhit,ld_rmthit",
2608 c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
2609 );
2610
2611 ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
2612
2613 hists__collapse_resort(&c2c.hists.hists, NULL);
2614 hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb);
2615 hists__iterate_cb(&c2c.hists.hists, resort_cl_cb);
2616
2617 ui_progress__finish();
2618
2619 ui_quirks();
2620
2621 perf_c2c_display(session);
2622
2623out_session:
2624 perf_session__delete(session);
2625out:
2626 return err;
2627}
2628
2629static int parse_record_events(const struct option *opt __maybe_unused,
2630 const char *str, int unset __maybe_unused)
2631{
2632 bool *event_set = (bool *) opt->value;
2633
2634 *event_set = true;
2635 return perf_mem_events__parse(str);
2636}
2637
2638
2639static const char * const __usage_record[] = {
2640 "perf c2c record [<options>] [<command>]",
2641 "perf c2c record [<options>] -- <command> [<options>]",
2642 NULL
2643};
2644
2645static const char * const *record_mem_usage = __usage_record;
2646
2647static int perf_c2c__record(int argc, const char **argv)
2648{
2649 int rec_argc, i = 0, j;
2650 const char **rec_argv;
2651 int ret;
2652 bool all_user = false, all_kernel = false;
2653 bool event_set = false;
2654 struct option options[] = {
2655 OPT_CALLBACK('e', "event", &event_set, "event",
2656 "event selector. Use 'perf mem record -e list' to list available events",
2657 parse_record_events),
2658 OPT_INCR('v', "verbose", &verbose,
2659 "be more verbose (show counter open errors, etc)"),
2660 OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
2661 OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
2662 OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
2663 OPT_END()
2664 };
2665
2666 if (perf_mem_events__init()) {
2667 pr_err("failed: memory events not supported\n");
2668 return -1;
2669 }
2670
2671 argc = parse_options(argc, argv, options, record_mem_usage,
2672 PARSE_OPT_KEEP_UNKNOWN);
2673
2674 rec_argc = argc + 10; /* max number of arguments */
2675 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2676 if (!rec_argv)
2677 return -1;
2678
2679 rec_argv[i++] = "record";
2680
2681 if (!event_set) {
2682 perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
2683 perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
2684 }
2685
2686 if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
2687 rec_argv[i++] = "-W";
2688
2689 rec_argv[i++] = "-d";
2690 rec_argv[i++] = "--sample-cpu";
2691
2692 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
2693 if (!perf_mem_events[j].record)
2694 continue;
2695
2696 if (!perf_mem_events[j].supported) {
2697 pr_err("failed: event '%s' not supported\n",
2698 perf_mem_events[j].name);
2699 return -1;
2700 }
2701
2702 rec_argv[i++] = "-e";
2703 rec_argv[i++] = perf_mem_events__name(j);
2704 };
2705
2706 if (all_user)
2707 rec_argv[i++] = "--all-user";
2708
2709 if (all_kernel)
2710 rec_argv[i++] = "--all-kernel";
2711
2712 for (j = 0; j < argc; j++, i++)
2713 rec_argv[i] = argv[j];
2714
2715 if (verbose > 0) {
2716 pr_debug("calling: ");
2717
2718 j = 0;
2719
2720 while (rec_argv[j]) {
2721 pr_debug("%s ", rec_argv[j]);
2722 j++;
2723 }
2724 pr_debug("\n");
2725 }
2726
2727 ret = cmd_record(i, rec_argv, NULL);
2728 free(rec_argv);
2729 return ret;
2730}
2731
2732int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
2733{
2734 const struct option c2c_options[] = {
2735 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2736 OPT_END()
2737 };
2738
2739 argc = parse_options(argc, argv, c2c_options, c2c_usage,
2740 PARSE_OPT_STOP_AT_NON_OPTION);
2741
2742 if (!argc)
2743 usage_with_options(c2c_usage, c2c_options);
2744
2745 if (!strncmp(argv[0], "rec", 3)) {
2746 return perf_c2c__record(argc, argv);
2747 } else if (!strncmp(argv[0], "rep", 3)) {
2748 return perf_c2c__report(argc, argv);
2749 } else {
2750 usage_with_options(c2c_usage, c2c_options);
2751 }
2752
2753 return 0;
2754}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 41c24010ab43..0bcf68e98ccc 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix);
18int cmd_buildid_cache(int argc, const char **argv, const char *prefix); 18int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
19int cmd_buildid_list(int argc, const char **argv, const char *prefix); 19int cmd_buildid_list(int argc, const char **argv, const char *prefix);
20int cmd_config(int argc, const char **argv, const char *prefix); 20int cmd_config(int argc, const char **argv, const char *prefix);
21int cmd_c2c(int argc, const char **argv, const char *prefix);
21int cmd_diff(int argc, const char **argv, const char *prefix); 22int cmd_diff(int argc, const char **argv, const char *prefix);
22int cmd_evlist(int argc, const char **argv, const char *prefix); 23int cmd_evlist(int argc, const char **argv, const char *prefix);
23int cmd_help(int argc, const char **argv, const char *prefix); 24int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 64c06961bfe4..aa23b3347d6b 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -43,6 +43,7 @@ static struct cmd_struct commands[] = {
43 { "buildid-cache", cmd_buildid_cache, 0 }, 43 { "buildid-cache", cmd_buildid_cache, 0 },
44 { "buildid-list", cmd_buildid_list, 0 }, 44 { "buildid-list", cmd_buildid_list, 0 },
45 { "config", cmd_config, 0 }, 45 { "config", cmd_config, 0 },
46 { "c2c", cmd_c2c, 0 },
46 { "diff", cmd_diff, 0 }, 47 { "diff", cmd_diff, 0 },
47 { "evlist", cmd_evlist, 0 }, 48 { "evlist", cmd_evlist, 0 },
48 { "help", cmd_help, 0 }, 49 { "help", cmd_help, 0 },
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4ffff7be9299..31d6d5a7c2dc 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -30,7 +30,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
30 30
31static bool hist_browser__has_filter(struct hist_browser *hb) 31static bool hist_browser__has_filter(struct hist_browser *hb)
32{ 32{
33 return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; 33 return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter || hb->c2c_filter;
34} 34}
35 35
36static int hist_browser__get_folding(struct hist_browser *browser) 36static int hist_browser__get_folding(struct hist_browser *browser)
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 39bd0f28f211..23d6acb84800 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -18,6 +18,7 @@ struct hist_browser {
18 u64 nr_non_filtered_entries; 18 u64 nr_non_filtered_entries;
19 u64 nr_hierarchy_entries; 19 u64 nr_hierarchy_entries;
20 u64 nr_callchain_rows; 20 u64 nr_callchain_rows;
21 bool c2c_filter;
21 22
22 /* Get title string. */ 23 /* Get title string. */
23 int (*title)(struct hist_browser *browser, 24 int (*title)(struct hist_browser *browser,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b02992efb513..e1be4132054d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
1195 case HIST_FILTER__GUEST: 1195 case HIST_FILTER__GUEST:
1196 case HIST_FILTER__HOST: 1196 case HIST_FILTER__HOST:
1197 case HIST_FILTER__SOCKET: 1197 case HIST_FILTER__SOCKET:
1198 case HIST_FILTER__C2C:
1198 default: 1199 default:
1199 return; 1200 return;
1200 } 1201 }
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9928fed8bc59..d4b6514eeef5 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -22,6 +22,7 @@ enum hist_filter {
22 HIST_FILTER__GUEST, 22 HIST_FILTER__GUEST,
23 HIST_FILTER__HOST, 23 HIST_FILTER__HOST,
24 HIST_FILTER__SOCKET, 24 HIST_FILTER__SOCKET,
25 HIST_FILTER__C2C,
25}; 26};
26 27
27enum hist_column { 28enum hist_column {
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bbc368e7d1e4..e50773286ef6 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -9,6 +9,7 @@
9#include "mem-events.h" 9#include "mem-events.h"
10#include "debug.h" 10#include "debug.h"
11#include "symbol.h" 11#include "symbol.h"
12#include "sort.h"
12 13
13unsigned int perf_mem_events__loads_ldlat = 30; 14unsigned int perf_mem_events__loads_ldlat = 30;
14 15
@@ -268,3 +269,130 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in
268 269
269 return i; 270 return i;
270} 271}
272
273int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
274{
275 union perf_mem_data_src *data_src = &mi->data_src;
276 u64 daddr = mi->daddr.addr;
277 u64 op = data_src->mem_op;
278 u64 lvl = data_src->mem_lvl;
279 u64 snoop = data_src->mem_snoop;
280 u64 lock = data_src->mem_lock;
281 int err = 0;
282
283#define P(a, b) PERF_MEM_##a##_##b
284
285 stats->nr_entries++;
286
287 if (lock & P(LOCK, LOCKED)) stats->locks++;
288
289 if (op & P(OP, LOAD)) {
290 /* load */
291 stats->load++;
292
293 if (!daddr) {
294 stats->ld_noadrs++;
295 return -1;
296 }
297
298 if (lvl & P(LVL, HIT)) {
299 if (lvl & P(LVL, UNC)) stats->ld_uncache++;
300 if (lvl & P(LVL, IO)) stats->ld_io++;
301 if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
302 if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
303 if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
304 if (lvl & P(LVL, L3 )) {
305 if (snoop & P(SNOOP, HITM))
306 stats->lcl_hitm++;
307 else
308 stats->ld_llchit++;
309 }
310
311 if (lvl & P(LVL, LOC_RAM)) {
312 stats->lcl_dram++;
313 if (snoop & P(SNOOP, HIT))
314 stats->ld_shared++;
315 else
316 stats->ld_excl++;
317 }
318
319 if ((lvl & P(LVL, REM_RAM1)) ||
320 (lvl & P(LVL, REM_RAM2))) {
321 stats->rmt_dram++;
322 if (snoop & P(SNOOP, HIT))
323 stats->ld_shared++;
324 else
325 stats->ld_excl++;
326 }
327 }
328
329 if ((lvl & P(LVL, REM_CCE1)) ||
330 (lvl & P(LVL, REM_CCE2))) {
331 if (snoop & P(SNOOP, HIT))
332 stats->rmt_hit++;
333 else if (snoop & P(SNOOP, HITM))
334 stats->rmt_hitm++;
335 }
336
337 if ((lvl & P(LVL, MISS)))
338 stats->ld_miss++;
339
340 } else if (op & P(OP, STORE)) {
341 /* store */
342 stats->store++;
343
344 if (!daddr) {
345 stats->st_noadrs++;
346 return -1;
347 }
348
349 if (lvl & P(LVL, HIT)) {
350 if (lvl & P(LVL, UNC)) stats->st_uncache++;
351 if (lvl & P(LVL, L1 )) stats->st_l1hit++;
352 }
353 if (lvl & P(LVL, MISS))
354 if (lvl & P(LVL, L1)) stats->st_l1miss++;
355 } else {
356 /* unparsable data_src? */
357 stats->noparse++;
358 return -1;
359 }
360
361 if (!mi->daddr.map || !mi->iaddr.map) {
362 stats->nomap++;
363 return -1;
364 }
365
366#undef P
367 return err;
368}
369
370void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
371{
372 stats->nr_entries += add->nr_entries;
373
374 stats->locks += add->locks;
375 stats->store += add->store;
376 stats->st_uncache += add->st_uncache;
377 stats->st_noadrs += add->st_noadrs;
378 stats->st_l1hit += add->st_l1hit;
379 stats->st_l1miss += add->st_l1miss;
380 stats->load += add->load;
381 stats->ld_excl += add->ld_excl;
382 stats->ld_shared += add->ld_shared;
383 stats->ld_uncache += add->ld_uncache;
384 stats->ld_io += add->ld_io;
385 stats->ld_miss += add->ld_miss;
386 stats->ld_noadrs += add->ld_noadrs;
387 stats->ld_fbhit += add->ld_fbhit;
388 stats->ld_l1hit += add->ld_l1hit;
389 stats->ld_l2hit += add->ld_l2hit;
390 stats->ld_llchit += add->ld_llchit;
391 stats->lcl_hitm += add->lcl_hitm;
392 stats->rmt_hitm += add->rmt_hitm;
393 stats->rmt_hit += add->rmt_hit;
394 stats->lcl_dram += add->lcl_dram;
395 stats->rmt_dram += add->rmt_dram;
396 stats->nomap += add->nomap;
397 stats->noparse += add->noparse;
398}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9d789d..faf80403b519 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
2#define __PERF_MEM_EVENTS_H 2#define __PERF_MEM_EVENTS_H
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include <stdint.h>
6#include <stdio.h>
7#include <linux/types.h>
8#include "stat.h"
5 9
6struct perf_mem_event { 10struct perf_mem_event {
7 bool record; 11 bool record;
@@ -33,4 +37,37 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
33 37
34int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); 38int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
35 39
40struct c2c_stats {
41 u32 nr_entries;
42
43 u32 locks; /* count of 'lock' transactions */
44 u32 store; /* count of all stores in trace */
45 u32 st_uncache; /* stores to uncacheable address */
46 u32 st_noadrs; /* cacheable store with no address */
47 u32 st_l1hit; /* count of stores that hit L1D */
48 u32 st_l1miss; /* count of stores that miss L1D */
49 u32 load; /* count of all loads in trace */
50 u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
51 u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
52 u32 ld_uncache; /* loads to uncacheable address */
53 u32 ld_io; /* loads to io address */
54 u32 ld_miss; /* loads miss */
55 u32 ld_noadrs; /* cacheable load with no address */
56 u32 ld_fbhit; /* count of loads hitting Fill Buffer */
57 u32 ld_l1hit; /* count of loads that hit L1D */
58 u32 ld_l2hit; /* count of loads that hit L2D */
59 u32 ld_llchit; /* count of loads that hit LLC */
60 u32 lcl_hitm; /* count of loads with local HITM */
61 u32 rmt_hitm; /* count of loads with remote HITM */
62 u32 rmt_hit; /* count of loads with remote hit clean; */
63 u32 lcl_dram; /* count of loads miss to local DRAM */
64 u32 rmt_dram; /* count of loads miss to remote DRAM */
65 u32 nomap; /* count of load/stores with no phys adrs */
66 u32 noparse; /* count of unparsable data sources */
67};
68
69struct hist_entry;
70int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
71void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
72
36#endif /* __PERF_MEM_EVENTS_H */ 73#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 452e15a10dd2..df622f4e301e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -315,7 +315,7 @@ struct sort_entry sort_sym = {
315 315
316/* --sort srcline */ 316/* --sort srcline */
317 317
318static char *hist_entry__get_srcline(struct hist_entry *he) 318char *hist_entry__get_srcline(struct hist_entry *he)
319{ 319{
320 struct map *map = he->ms.map; 320 struct map *map = he->ms.map;
321 321
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 099c97557d33..7aff317fc7c4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -280,4 +280,5 @@ int64_t
280sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); 280sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
281int64_t 281int64_t
282sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); 282sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
283char *hist_entry__get_srcline(struct hist_entry *he);
283#endif /* __PERF_SORT_H */ 284#endif /* __PERF_SORT_H */