aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-10-22 04:22:05 -0400
committerIngo Molnar <mingo@kernel.org>2016-10-22 04:22:05 -0400
commite9c848928abf4cb60601e9ae7d336f0333c98bca (patch)
tree9fa3b9926f9c0b4f93495706c357221b533b213f /tools/perf/util
parent10b37cb59fa1e61fec1386f324615e0e8202cd87 (diff)
parentaf09b2d35e18f1a377aaa2bc4e5ba4abb98a1088 (diff)
Merge tag 'perf-c2c-for-mingo-20161021' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull new 'perf c2c' tool from Arnaldo Carvalho de Melo: - The 'perf c2c' tool provides means for Shared Data C2C/HITM analysis. It allows you to track down cacheline contention. The tool is based on x86's load latency and precise store facility events provided by Intel CPUs. It was tested by Joe Mario and has proven to be useful, finding some cacheline contentions. Joe also wrote a blog about c2c tool with examples: https://joemario.github.io/blog/2016/09/01/c2c-blog/ Excerpt of the content on this site: --- At a high level, “perf c2c” will show you: * The cachelines where false sharing was detected. * The readers and writers to those cachelines, and the offsets where those accesses occurred. * The pid, tid, instruction addr, function name, binary object name for those readers and writers. * The source file and line number for each reader and writer. * The average load latency for the loads to those cachelines. * Which numa nodes the samples a cacheline came from and which CPUs were involved. Using perf c2c is similar to using the Linux perf tool today. First collect data with “perf c2c record” Then generate a report output with “perf c2c report” --- There one finds extensive details on using the tool, with tips on reducing the volume of samples while still capturing enough to do its job. (Dick Fowles, Joe Mario, Don Zickus, Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/hist.c1
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/mem-events.c128
-rw-r--r--tools/perf/util/mem-events.h37
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/sort.h1
6 files changed, 169 insertions, 1 deletions
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b02992efb513..e1be4132054d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
1195 case HIST_FILTER__GUEST: 1195 case HIST_FILTER__GUEST:
1196 case HIST_FILTER__HOST: 1196 case HIST_FILTER__HOST:
1197 case HIST_FILTER__SOCKET: 1197 case HIST_FILTER__SOCKET:
1198 case HIST_FILTER__C2C:
1198 default: 1199 default:
1199 return; 1200 return;
1200 } 1201 }
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9928fed8bc59..d4b6514eeef5 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -22,6 +22,7 @@ enum hist_filter {
22 HIST_FILTER__GUEST, 22 HIST_FILTER__GUEST,
23 HIST_FILTER__HOST, 23 HIST_FILTER__HOST,
24 HIST_FILTER__SOCKET, 24 HIST_FILTER__SOCKET,
25 HIST_FILTER__C2C,
25}; 26};
26 27
27enum hist_column { 28enum hist_column {
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bbc368e7d1e4..e50773286ef6 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -9,6 +9,7 @@
9#include "mem-events.h" 9#include "mem-events.h"
10#include "debug.h" 10#include "debug.h"
11#include "symbol.h" 11#include "symbol.h"
12#include "sort.h"
12 13
13unsigned int perf_mem_events__loads_ldlat = 30; 14unsigned int perf_mem_events__loads_ldlat = 30;
14 15
@@ -268,3 +269,130 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in
268 269
269 return i; 270 return i;
270} 271}
272
273int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
274{
275 union perf_mem_data_src *data_src = &mi->data_src;
276 u64 daddr = mi->daddr.addr;
277 u64 op = data_src->mem_op;
278 u64 lvl = data_src->mem_lvl;
279 u64 snoop = data_src->mem_snoop;
280 u64 lock = data_src->mem_lock;
281 int err = 0;
282
283#define P(a, b) PERF_MEM_##a##_##b
284
285 stats->nr_entries++;
286
287 if (lock & P(LOCK, LOCKED)) stats->locks++;
288
289 if (op & P(OP, LOAD)) {
290 /* load */
291 stats->load++;
292
293 if (!daddr) {
294 stats->ld_noadrs++;
295 return -1;
296 }
297
298 if (lvl & P(LVL, HIT)) {
299 if (lvl & P(LVL, UNC)) stats->ld_uncache++;
300 if (lvl & P(LVL, IO)) stats->ld_io++;
301 if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
302 if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
303 if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
304 if (lvl & P(LVL, L3 )) {
305 if (snoop & P(SNOOP, HITM))
306 stats->lcl_hitm++;
307 else
308 stats->ld_llchit++;
309 }
310
311 if (lvl & P(LVL, LOC_RAM)) {
312 stats->lcl_dram++;
313 if (snoop & P(SNOOP, HIT))
314 stats->ld_shared++;
315 else
316 stats->ld_excl++;
317 }
318
319 if ((lvl & P(LVL, REM_RAM1)) ||
320 (lvl & P(LVL, REM_RAM2))) {
321 stats->rmt_dram++;
322 if (snoop & P(SNOOP, HIT))
323 stats->ld_shared++;
324 else
325 stats->ld_excl++;
326 }
327 }
328
329 if ((lvl & P(LVL, REM_CCE1)) ||
330 (lvl & P(LVL, REM_CCE2))) {
331 if (snoop & P(SNOOP, HIT))
332 stats->rmt_hit++;
333 else if (snoop & P(SNOOP, HITM))
334 stats->rmt_hitm++;
335 }
336
337 if ((lvl & P(LVL, MISS)))
338 stats->ld_miss++;
339
340 } else if (op & P(OP, STORE)) {
341 /* store */
342 stats->store++;
343
344 if (!daddr) {
345 stats->st_noadrs++;
346 return -1;
347 }
348
349 if (lvl & P(LVL, HIT)) {
350 if (lvl & P(LVL, UNC)) stats->st_uncache++;
351 if (lvl & P(LVL, L1 )) stats->st_l1hit++;
352 }
353 if (lvl & P(LVL, MISS))
354 if (lvl & P(LVL, L1)) stats->st_l1miss++;
355 } else {
356 /* unparsable data_src? */
357 stats->noparse++;
358 return -1;
359 }
360
361 if (!mi->daddr.map || !mi->iaddr.map) {
362 stats->nomap++;
363 return -1;
364 }
365
366#undef P
367 return err;
368}
369
370void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
371{
372 stats->nr_entries += add->nr_entries;
373
374 stats->locks += add->locks;
375 stats->store += add->store;
376 stats->st_uncache += add->st_uncache;
377 stats->st_noadrs += add->st_noadrs;
378 stats->st_l1hit += add->st_l1hit;
379 stats->st_l1miss += add->st_l1miss;
380 stats->load += add->load;
381 stats->ld_excl += add->ld_excl;
382 stats->ld_shared += add->ld_shared;
383 stats->ld_uncache += add->ld_uncache;
384 stats->ld_io += add->ld_io;
385 stats->ld_miss += add->ld_miss;
386 stats->ld_noadrs += add->ld_noadrs;
387 stats->ld_fbhit += add->ld_fbhit;
388 stats->ld_l1hit += add->ld_l1hit;
389 stats->ld_l2hit += add->ld_l2hit;
390 stats->ld_llchit += add->ld_llchit;
391 stats->lcl_hitm += add->lcl_hitm;
392 stats->rmt_hitm += add->rmt_hitm;
393 stats->rmt_hit += add->rmt_hit;
394 stats->lcl_dram += add->lcl_dram;
395 stats->rmt_dram += add->rmt_dram;
396 stats->nomap += add->nomap;
397 stats->noparse += add->noparse;
398}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9d789d..faf80403b519 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
2#define __PERF_MEM_EVENTS_H 2#define __PERF_MEM_EVENTS_H
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include <stdint.h>
6#include <stdio.h>
7#include <linux/types.h>
8#include "stat.h"
5 9
6struct perf_mem_event { 10struct perf_mem_event {
7 bool record; 11 bool record;
@@ -33,4 +37,37 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
33 37
34int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); 38int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
35 39
40struct c2c_stats {
41 u32 nr_entries;
42
43 u32 locks; /* count of 'lock' transactions */
44 u32 store; /* count of all stores in trace */
45 u32 st_uncache; /* stores to uncacheable address */
46 u32 st_noadrs; /* cacheable store with no address */
47 u32 st_l1hit; /* count of stores that hit L1D */
48 u32 st_l1miss; /* count of stores that miss L1D */
49 u32 load; /* count of all loads in trace */
50 u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
51 u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
52 u32 ld_uncache; /* loads to uncacheable address */
53 u32 ld_io; /* loads to io address */
54 u32 ld_miss; /* loads miss */
55 u32 ld_noadrs; /* cacheable load with no address */
56 u32 ld_fbhit; /* count of loads hitting Fill Buffer */
57 u32 ld_l1hit; /* count of loads that hit L1D */
58 u32 ld_l2hit; /* count of loads that hit L2D */
59 u32 ld_llchit; /* count of loads that hit LLC */
60 u32 lcl_hitm; /* count of loads with local HITM */
61 u32 rmt_hitm; /* count of loads with remote HITM */
62 u32 rmt_hit; /* count of loads with remote hit clean; */
63 u32 lcl_dram; /* count of loads miss to local DRAM */
64 u32 rmt_dram; /* count of loads miss to remote DRAM */
65 u32 nomap; /* count of load/stores with no phys adrs */
66 u32 noparse; /* count of unparsable data sources */
67};
68
69struct hist_entry;
70int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
71void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
72
36#endif /* __PERF_MEM_EVENTS_H */ 73#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 452e15a10dd2..df622f4e301e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -315,7 +315,7 @@ struct sort_entry sort_sym = {
315 315
316/* --sort srcline */ 316/* --sort srcline */
317 317
318static char *hist_entry__get_srcline(struct hist_entry *he) 318char *hist_entry__get_srcline(struct hist_entry *he)
319{ 319{
320 struct map *map = he->ms.map; 320 struct map *map = he->ms.map;
321 321
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 099c97557d33..7aff317fc7c4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -280,4 +280,5 @@ int64_t
280sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); 280sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
281int64_t 281int64_t
282sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); 282sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
283char *hist_entry__get_srcline(struct hist_entry *he);
283#endif /* __PERF_SORT_H */ 284#endif /* __PERF_SORT_H */