aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-04-01 15:41:35 -0400
committerIngo Molnar <mingo@kernel.org>2013-04-01 15:41:35 -0400
commitb847d0501afec4c7b12eb276aec10a2834f953ea (patch)
tree69ff786e4280456345a03de9f70f85baf0e34d66
parent0a11953851213fd1d3eebcb68b4a537d458c70c2 (diff)
parentd06f7911792780c6e973a137b766530c8d031aeb (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat' back working. * We don't use Newt anymore, just plain libslang. * Kill a bunch of die() calls, from Namhyung Kim. * Add --no-demangle to report/top, from Namhyung Kim. * Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota. * Introduce per core aggregation in 'perf stat', from Stephane Eranian. * Add memory profiling via PEBS, from Stephane Eranian. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event.h56
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c35
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c182
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c2
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/uapi/linux/perf_event.h71
-rw-r--r--kernel/events/core.c15
-rw-r--r--tools/perf/Documentation/perf-mem.txt48
-rw-r--r--tools/perf/Documentation/perf-record.txt6
-rw-r--r--tools/perf/Documentation/perf-report.txt6
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt2
-rw-r--r--tools/perf/Makefile23
-rw-r--r--tools/perf/builtin-annotate.c2
-rw-r--r--tools/perf/builtin-diff.c7
-rw-r--r--tools/perf/builtin-mem.c242
-rw-r--r--tools/perf/builtin-record.c4
-rw-r--r--tools/perf/builtin-report.c147
-rw-r--r--tools/perf/builtin-sched.c1
-rw-r--r--tools/perf/builtin-stat.c236
-rw-r--r--tools/perf/builtin-top.c5
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/config/feature-tests.mak12
-rw-r--r--tools/perf/perf.c4
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/tests/hists_link.c4
-rw-r--r--tools/perf/ui/browser.c9
-rw-r--r--tools/perf/ui/browser.h1
-rw-r--r--tools/perf/ui/browsers/annotate.c1
-rw-r--r--tools/perf/ui/browsers/hists.c1
-rw-r--r--tools/perf/ui/browsers/map.c60
-rw-r--r--tools/perf/ui/browsers/scripts.c1
-rw-r--r--tools/perf/ui/tui/setup.c21
-rw-r--r--tools/perf/ui/ui.h2
-rw-r--r--tools/perf/util/annotate.h2
-rw-r--r--tools/perf/util/cpumap.c86
-rw-r--r--tools/perf/util/cpumap.h12
-rw-r--r--tools/perf/util/event.h9
-rw-r--r--tools/perf/util/evsel.c19
-rw-r--r--tools/perf/util/header.c11
-rw-r--r--tools/perf/util/hist.c110
-rw-r--r--tools/perf/util/hist.h23
-rw-r--r--tools/perf/util/machine.c42
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/perf/util/sort.c414
-rw-r--r--tools/perf/util/sort.h12
-rw-r--r--tools/perf/util/symbol-elf.c9
-rw-r--r--tools/perf/util/symbol.c1
-rw-r--r--tools/perf/util/symbol.h9
-rw-r--r--tools/perf/util/trace-event-info.c347
-rw-r--r--tools/perf/util/trace-event-read.c272
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/util.h2
57 files changed, 2217 insertions, 463 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..b31798d5e62e 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -71,6 +71,7 @@
71#define MSR_IA32_PEBS_ENABLE 0x000003f1 71#define MSR_IA32_PEBS_ENABLE 0x000003f1
72#define MSR_IA32_DS_AREA 0x00000600 72#define MSR_IA32_DS_AREA 0x00000600
73#define MSR_IA32_PERF_CAPABILITIES 0x00000345 73#define MSR_IA32_PERF_CAPABILITIES 0x00000345
74#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
74 75
75#define MSR_MTRRfix64K_00000 0x00000250 76#define MSR_MTRRfix64K_00000 0x00000250
76#define MSR_MTRRfix16K_80000 0x00000258 77#define MSR_MTRRfix16K_80000 0x00000258
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bf0f01aea994..5ed7a4c5baf7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = {
1316 */ 1316 */
1317static void __init filter_events(struct attribute **attrs) 1317static void __init filter_events(struct attribute **attrs)
1318{ 1318{
1319 struct device_attribute *d;
1320 struct perf_pmu_events_attr *pmu_attr;
1319 int i, j; 1321 int i, j;
1320 1322
1321 for (i = 0; attrs[i]; i++) { 1323 for (i = 0; attrs[i]; i++) {
1324 d = (struct device_attribute *)attrs[i];
1325 pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
1326 /* str trumps id */
1327 if (pmu_attr->event_str)
1328 continue;
1322 if (x86_pmu.event_map(i)) 1329 if (x86_pmu.event_map(i))
1323 continue; 1330 continue;
1324 1331
@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs)
1330 } 1337 }
1331} 1338}
1332 1339
1333static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 1340/* Merge two pointer arrays */
1341static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
1342{
1343 struct attribute **new;
1344 int j, i;
1345
1346 for (j = 0; a[j]; j++)
1347 ;
1348 for (i = 0; b[i]; i++)
1349 j++;
1350 j++;
1351
1352 new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
1353 if (!new)
1354 return NULL;
1355
1356 j = 0;
1357 for (i = 0; a[i]; i++)
1358 new[j++] = a[i];
1359 for (i = 0; b[i]; i++)
1360 new[j++] = b[i];
1361 new[j] = NULL;
1362
1363 return new;
1364}
1365
1366ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1334 char *page) 1367 char *page)
1335{ 1368{
1336 struct perf_pmu_events_attr *pmu_attr = \ 1369 struct perf_pmu_events_attr *pmu_attr = \
1337 container_of(attr, struct perf_pmu_events_attr, attr); 1370 container_of(attr, struct perf_pmu_events_attr, attr);
1338
1339 u64 config = x86_pmu.event_map(pmu_attr->id); 1371 u64 config = x86_pmu.event_map(pmu_attr->id);
1340 return x86_pmu.events_sysfs_show(page, config);
1341}
1342 1372
1343#define EVENT_VAR(_id) event_attr_##_id 1373 /* string trumps id */
1344#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 1374 if (pmu_attr->event_str)
1375 return sprintf(page, "%s", pmu_attr->event_str);
1345 1376
1346#define EVENT_ATTR(_name, _id) \ 1377 return x86_pmu.events_sysfs_show(page, config);
1347 PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ 1378}
1348 events_sysfs_show)
1349 1379
1350EVENT_ATTR(cpu-cycles, CPU_CYCLES ); 1380EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1351EVENT_ATTR(instructions, INSTRUCTIONS ); 1381EVENT_ATTR(instructions, INSTRUCTIONS );
@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void)
1459 1489
1460 unconstrained = (struct event_constraint) 1490 unconstrained = (struct event_constraint)
1461 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1491 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1462 0, x86_pmu.num_counters, 0); 1492 0, x86_pmu.num_counters, 0, 0);
1463 1493
1464 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1494 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1465 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1495 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1466 1496
1497 if (x86_pmu.event_attrs)
1498 x86_pmu_events_group.attrs = x86_pmu.event_attrs;
1499
1467 if (!x86_pmu.events_sysfs_show) 1500 if (!x86_pmu.events_sysfs_show)
1468 x86_pmu_events_group.attrs = &empty_attrs; 1501 x86_pmu_events_group.attrs = &empty_attrs;
1469 else 1502 else
1470 filter_events(x86_pmu_events_group.attrs); 1503 filter_events(x86_pmu_events_group.attrs);
1471 1504
1505 if (x86_pmu.cpu_events) {
1506 struct attribute **tmp;
1507
1508 tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
1509 if (!WARN_ON(!tmp))
1510 x86_pmu_events_group.attrs = tmp;
1511 }
1512
1472 pr_info("... version: %d\n", x86_pmu.version); 1513 pr_info("... version: %d\n", x86_pmu.version);
1473 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1514 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1474 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1515 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7f5c75c2afdd..ba9aadfa683b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 46 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
48 EXTRA_REG_LBR = 2, /* lbr_select */ 48 EXTRA_REG_LBR = 2, /* lbr_select */
49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
49 50
50 EXTRA_REG_MAX /* number of entries needed */ 51 EXTRA_REG_MAX /* number of entries needed */
51}; 52};
@@ -59,7 +60,13 @@ struct event_constraint {
59 u64 cmask; 60 u64 cmask;
60 int weight; 61 int weight;
61 int overlap; 62 int overlap;
63 int flags;
62}; 64};
65/*
66 * struct event_constraint flags
67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
63 70
64struct amd_nb { 71struct amd_nb {
65 int nb_id; /* NorthBridge id */ 72 int nb_id; /* NorthBridge id */
@@ -170,16 +177,17 @@ struct cpu_hw_events {
170 void *kfree_on_online; 177 void *kfree_on_online;
171}; 178};
172 179
173#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ 180#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
174 { .idxmsk64 = (n) }, \ 181 { .idxmsk64 = (n) }, \
175 .code = (c), \ 182 .code = (c), \
176 .cmask = (m), \ 183 .cmask = (m), \
177 .weight = (w), \ 184 .weight = (w), \
178 .overlap = (o), \ 185 .overlap = (o), \
186 .flags = f, \
179} 187}
180 188
181#define EVENT_CONSTRAINT(c, n, m) \ 189#define EVENT_CONSTRAINT(c, n, m) \
182 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) 190 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
183 191
184/* 192/*
185 * The overlap flag marks event constraints with overlapping counter 193 * The overlap flag marks event constraints with overlapping counter
@@ -203,7 +211,7 @@ struct cpu_hw_events {
203 * and its counter masks must be kept at a minimum. 211 * and its counter masks must be kept at a minimum.
204 */ 212 */
205#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ 213#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
206 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) 214 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
207 215
208/* 216/*
209 * Constraint on the Event code. 217 * Constraint on the Event code.
@@ -231,6 +239,14 @@ struct cpu_hw_events {
231#define INTEL_UEVENT_CONSTRAINT(c, n) \ 239#define INTEL_UEVENT_CONSTRAINT(c, n) \
232 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 240 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
233 241
242#define INTEL_PLD_CONSTRAINT(c, n) \
243 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
244 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
245
246#define INTEL_PST_CONSTRAINT(c, n) \
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249
234#define EVENT_CONSTRAINT_END \ 250#define EVENT_CONSTRAINT_END \
235 EVENT_CONSTRAINT(0, 0, 0) 251 EVENT_CONSTRAINT(0, 0, 0)
236 252
@@ -260,12 +276,22 @@ struct extra_reg {
260 .msr = (ms), \ 276 .msr = (ms), \
261 .config_mask = (m), \ 277 .config_mask = (m), \
262 .valid_mask = (vm), \ 278 .valid_mask = (vm), \
263 .idx = EXTRA_REG_##i \ 279 .idx = EXTRA_REG_##i, \
264 } 280 }
265 281
266#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 282#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
267 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) 283 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
268 284
285#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
286 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
287 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
288
289#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
290 INTEL_UEVENT_EXTRA_REG(c, \
291 MSR_PEBS_LD_LAT_THRESHOLD, \
292 0xffff, \
293 LDLAT)
294
269#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) 295#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
270 296
271union perf_capabilities { 297union perf_capabilities {
@@ -355,8 +381,10 @@ struct x86_pmu {
355 */ 381 */
356 int attr_rdpmc; 382 int attr_rdpmc;
357 struct attribute **format_attrs; 383 struct attribute **format_attrs;
384 struct attribute **event_attrs;
358 385
359 ssize_t (*events_sysfs_show)(char *page, u64 config); 386 ssize_t (*events_sysfs_show)(char *page, u64 config);
387 struct attribute **cpu_events;
360 388
361 /* 389 /*
362 * CPU Hotplug hooks 390 * CPU Hotplug hooks
@@ -421,6 +449,23 @@ do { \
421#define ERF_NO_HT_SHARING 1 449#define ERF_NO_HT_SHARING 1
422#define ERF_HAS_RSP_1 2 450#define ERF_HAS_RSP_1 2
423 451
452#define EVENT_VAR(_id) event_attr_##_id
453#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
454
455#define EVENT_ATTR(_name, _id) \
456static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
457 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
458 .id = PERF_COUNT_HW_##_id, \
459 .event_str = NULL, \
460};
461
462#define EVENT_ATTR_STR(_name, v, str) \
463static struct perf_pmu_events_attr event_attr_##v = { \
464 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
465 .id = 0, \
466 .event_str = str, \
467};
468
424extern struct x86_pmu x86_pmu __read_mostly; 469extern struct x86_pmu x86_pmu __read_mostly;
425 470
426DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 471DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
628 673
629int knc_pmu_init(void); 674int knc_pmu_init(void);
630 675
676ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
677 char *page);
678
631#else /* CONFIG_CPU_SUP_INTEL */ 679#else /* CONFIG_CPU_SUP_INTEL */
632 680
633static inline void reserve_ds_buffers(void) 681static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dab7580c47ae..e84c4ba44b59 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 81static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
82{ 82{
83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 83 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
84 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
84 EVENT_EXTRA_END 85 EVENT_EXTRA_END
85}; 86};
86 87
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
136{ 137{
137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 138 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
138 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 139 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
140 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
139 EVENT_EXTRA_END 141 EVENT_EXTRA_END
140}; 142};
141 143
@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
155static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 157static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
156 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), 158 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
157 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), 159 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
160 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
158 EVENT_EXTRA_END 161 EVENT_EXTRA_END
159}; 162};
160 163
164EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
165EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
166EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
167
168struct attribute *nhm_events_attrs[] = {
169 EVENT_PTR(mem_ld_nhm),
170 NULL,
171};
172
173struct attribute *snb_events_attrs[] = {
174 EVENT_PTR(mem_ld_snb),
175 EVENT_PTR(mem_st_snb),
176 NULL,
177};
178
161static u64 intel_pmu_event_map(int hw_event) 179static u64 intel_pmu_event_map(int hw_event)
162{ 180{
163 return intel_perfmon_event_map[hw_event]; 181 return intel_perfmon_event_map[hw_event];
@@ -1392,8 +1410,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1392 1410
1393 if (x86_pmu.event_constraints) { 1411 if (x86_pmu.event_constraints) {
1394 for_each_event_constraint(c, x86_pmu.event_constraints) { 1412 for_each_event_constraint(c, x86_pmu.event_constraints) {
1395 if ((event->hw.config & c->cmask) == c->code) 1413 if ((event->hw.config & c->cmask) == c->code) {
1414 /* hw.flags zeroed at initialization */
1415 event->hw.flags |= c->flags;
1396 return c; 1416 return c;
1417 }
1397 } 1418 }
1398 } 1419 }
1399 1420
@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1438static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1459static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1439 struct perf_event *event) 1460 struct perf_event *event)
1440{ 1461{
1462 event->hw.flags = 0;
1441 intel_put_shared_regs_event_constraints(cpuc, event); 1463 intel_put_shared_regs_event_constraints(cpuc, event);
1442} 1464}
1443 1465
@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void)
1761 1783
1762PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 1784PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
1763 1785
1786PMU_FORMAT_ATTR(ldlat, "config1:0-15");
1787
1764static struct attribute *intel_arch3_formats_attr[] = { 1788static struct attribute *intel_arch3_formats_attr[] = {
1765 &format_attr_event.attr, 1789 &format_attr_event.attr,
1766 &format_attr_umask.attr, 1790 &format_attr_umask.attr,
@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
1771 &format_attr_cmask.attr, 1795 &format_attr_cmask.attr,
1772 1796
1773 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1797 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1798 &format_attr_ldlat.attr, /* PEBS load latency */
1774 NULL, 1799 NULL,
1775}; 1800};
1776 1801
@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void)
2031 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 2056 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2032 x86_pmu.extra_regs = intel_nehalem_extra_regs; 2057 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2033 2058
2059 x86_pmu.cpu_events = nhm_events_attrs;
2060
2034 /* UOPS_ISSUED.STALLED_CYCLES */ 2061 /* UOPS_ISSUED.STALLED_CYCLES */
2035 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2062 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2036 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2063 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void)
2074 x86_pmu.extra_regs = intel_westmere_extra_regs; 2101 x86_pmu.extra_regs = intel_westmere_extra_regs;
2075 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2102 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2076 2103
2104 x86_pmu.cpu_events = nhm_events_attrs;
2105
2077 /* UOPS_ISSUED.STALLED_CYCLES */ 2106 /* UOPS_ISSUED.STALLED_CYCLES */
2078 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2107 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2079 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2108 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void)
2102 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2131 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2103 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2132 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2104 2133
2134 x86_pmu.cpu_events = snb_events_attrs;
2135
2105 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2136 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2106 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2137 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2107 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2138 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void)
2128 x86_pmu.er_flags |= ERF_HAS_RSP_1; 2159 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2129 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 2160 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2130 2161
2162 x86_pmu.cpu_events = snb_events_attrs;
2163
2131 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 2164 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2132 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 2165 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2133 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 2166 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a4f2ee..36dc13d1ad02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -24,6 +24,130 @@ struct pebs_record_32 {
24 24
25 */ 25 */
26 26
27union intel_x86_pebs_dse {
28 u64 val;
29 struct {
30 unsigned int ld_dse:4;
31 unsigned int ld_stlb_miss:1;
32 unsigned int ld_locked:1;
33 unsigned int ld_reserved:26;
34 };
35 struct {
36 unsigned int st_l1d_hit:1;
37 unsigned int st_reserved1:3;
38 unsigned int st_stlb_miss:1;
39 unsigned int st_locked:1;
40 unsigned int st_reserved2:26;
41 };
42};
43
44
45/*
46 * Map PEBS Load Latency Data Source encodings to generic
47 * memory data source information
48 */
49#define P(a, b) PERF_MEM_S(a, b)
50#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
51#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
52
53static const u64 pebs_data_source[] = {
54 P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
55 OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
56 OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
57 OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
58 OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
59 OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
60 OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
61 OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
62 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
63 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
64 OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
65 OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
66 OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
67 OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
68 OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
69 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
70};
71
72static u64 precise_store_data(u64 status)
73{
74 union intel_x86_pebs_dse dse;
75 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
76
77 dse.val = status;
78
79 /*
80 * bit 4: TLB access
81 * 1 = stored missed 2nd level TLB
82 *
83 * so it either hit the walker or the OS
84 * otherwise hit 2nd level TLB
85 */
86 if (dse.st_stlb_miss)
87 val |= P(TLB, MISS);
88 else
89 val |= P(TLB, HIT);
90
91 /*
92 * bit 0: hit L1 data cache
93 * if not set, then all we know is that
94 * it missed L1D
95 */
96 if (dse.st_l1d_hit)
97 val |= P(LVL, HIT);
98 else
99 val |= P(LVL, MISS);
100
101 /*
102 * bit 5: Locked prefix
103 */
104 if (dse.st_locked)
105 val |= P(LOCK, LOCKED);
106
107 return val;
108}
109
110static u64 load_latency_data(u64 status)
111{
112 union intel_x86_pebs_dse dse;
113 u64 val;
114 int model = boot_cpu_data.x86_model;
115 int fam = boot_cpu_data.x86;
116
117 dse.val = status;
118
119 /*
120 * use the mapping table for bit 0-3
121 */
122 val = pebs_data_source[dse.ld_dse];
123
124 /*
125 * Nehalem models do not support TLB, Lock infos
126 */
127 if (fam == 0x6 && (model == 26 || model == 30
128 || model == 31 || model == 46)) {
129 val |= P(TLB, NA) | P(LOCK, NA);
130 return val;
131 }
132 /*
133 * bit 4: TLB access
134 * 0 = did not miss 2nd level TLB
135 * 1 = missed 2nd level TLB
136 */
137 if (dse.ld_stlb_miss)
138 val |= P(TLB, MISS) | P(TLB, L2);
139 else
140 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
141
142 /*
143 * bit 5: locked prefix
144 */
145 if (dse.ld_locked)
146 val |= P(LOCK, LOCKED);
147
148 return val;
149}
150
27struct pebs_record_core { 151struct pebs_record_core {
28 u64 flags, ip; 152 u64 flags, ip;
29 u64 ax, bx, cx, dx; 153 u64 ax, bx, cx, dx;
@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
364}; 488};
365 489
366struct event_constraint intel_nehalem_pebs_event_constraints[] = { 490struct event_constraint intel_nehalem_pebs_event_constraints[] = {
367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 491 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 492 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 493 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
370 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 494 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379}; 503};
380 504
381struct event_constraint intel_westmere_pebs_event_constraints[] = { 505struct event_constraint intel_westmere_pebs_event_constraints[] = {
382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 506 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 507 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 508 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
385 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 509 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 523 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 524 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 525 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 526 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
527 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 528 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 529 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 530 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 538 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
414 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 539 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
415 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 540 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
416 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 541 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
542 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
417 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 543 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
418 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 544 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
419 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 545 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -430,8 +556,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
430 556
431 if (x86_pmu.pebs_constraints) { 557 if (x86_pmu.pebs_constraints) {
432 for_each_event_constraint(c, x86_pmu.pebs_constraints) { 558 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
433 if ((event->hw.config & c->cmask) == c->code) 559 if ((event->hw.config & c->cmask) == c->code) {
560 event->hw.flags |= c->flags;
434 return c; 561 return c;
562 }
435 } 563 }
436 } 564 }
437 565
@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
446 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 574 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
447 575
448 cpuc->pebs_enabled |= 1ULL << hwc->idx; 576 cpuc->pebs_enabled |= 1ULL << hwc->idx;
577
578 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
579 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
580 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
581 cpuc->pebs_enabled |= 1ULL << 63;
449} 582}
450 583
451void intel_pmu_pebs_disable(struct perf_event *event) 584void intel_pmu_pebs_disable(struct perf_event *event)
@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
558 struct pt_regs *iregs, void *__pebs) 691 struct pt_regs *iregs, void *__pebs)
559{ 692{
560 /* 693 /*
561 * We cast to pebs_record_core since that is a subset of 694 * We cast to pebs_record_nhm to get the load latency data
562 * both formats and we don't use the other fields in this 695 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
563 * routine.
564 */ 696 */
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 697 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct pebs_record_core *pebs = __pebs; 698 struct pebs_record_nhm *pebs = __pebs;
567 struct perf_sample_data data; 699 struct perf_sample_data data;
568 struct pt_regs regs; 700 struct pt_regs regs;
701 u64 sample_type;
702 int fll, fst;
569 703
570 if (!intel_pmu_save_and_restart(event)) 704 if (!intel_pmu_save_and_restart(event))
571 return; 705 return;
572 706
707 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
708 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
709
573 perf_sample_data_init(&data, 0, event->hw.last_period); 710 perf_sample_data_init(&data, 0, event->hw.last_period);
574 711
712 data.period = event->hw.last_period;
713 sample_type = event->attr.sample_type;
714
715 /*
716 * if PEBS-LL or PreciseStore
717 */
718 if (fll || fst) {
719 if (sample_type & PERF_SAMPLE_ADDR)
720 data.addr = pebs->dla;
721
722 /*
723 * Use latency for weight (only avail with PEBS-LL)
724 */
725 if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
726 data.weight = pebs->lat;
727
728 /*
729 * data.data_src encodes the data source
730 */
731 if (sample_type & PERF_SAMPLE_DATA_SRC) {
732 if (fll)
733 data.data_src.val = load_latency_data(pebs->dse);
734 else
735 data.data_src.val = precise_store_data(pebs->dse);
736 }
737 }
738
575 /* 739 /*
576 * We use the interrupt regs as a base because the PEBS record 740 * We use the interrupt regs as a base because the PEBS record
577 * does not contain a full regs set, specifically it seems to 741 * does not contain a full regs set, specifically it seems to
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index b43200dbfe7e..75da9e18b128 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
2438 2438
2439 type->unconstrainted = (struct event_constraint) 2439 type->unconstrainted = (struct event_constraint)
2440 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 2440 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
2441 0, type->num_counters, 0); 2441 0, type->num_counters, 0, 0);
2442 2442
2443 for (i = 0; i < type->num_boxes; i++) { 2443 for (i = 0; i < type->num_boxes; i++) {
2444 pmus[i].func_id = -1; 2444 pmus[i].func_id = -1;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8737e1cee8b2..42a6daaf4e0a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -127,6 +127,7 @@ struct hw_perf_event {
127 int event_base_rdpmc; 127 int event_base_rdpmc;
128 int idx; 128 int idx;
129 int last_cpu; 129 int last_cpu;
130 int flags;
130 131
131 struct hw_perf_event_extra extra_reg; 132 struct hw_perf_event_extra extra_reg;
132 struct hw_perf_event_extra branch_reg; 133 struct hw_perf_event_extra branch_reg;
@@ -567,11 +568,13 @@ struct perf_sample_data {
567 u32 reserved; 568 u32 reserved;
568 } cpu_entry; 569 } cpu_entry;
569 u64 period; 570 u64 period;
571 union perf_mem_data_src data_src;
570 struct perf_callchain_entry *callchain; 572 struct perf_callchain_entry *callchain;
571 struct perf_raw_record *raw; 573 struct perf_raw_record *raw;
572 struct perf_branch_stack *br_stack; 574 struct perf_branch_stack *br_stack;
573 struct perf_regs_user regs_user; 575 struct perf_regs_user regs_user;
574 u64 stack_user_size; 576 u64 stack_user_size;
577 u64 weight;
575}; 578};
576 579
577static inline void perf_sample_data_init(struct perf_sample_data *data, 580static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -585,6 +588,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
585 data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; 588 data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
586 data->regs_user.regs = NULL; 589 data->regs_user.regs = NULL;
587 data->stack_user_size = 0; 590 data->stack_user_size = 0;
591 data->weight = 0;
592 data->data_src.val = 0;
588} 593}
589 594
590extern void perf_output_sample(struct perf_output_handle *handle, 595extern void perf_output_sample(struct perf_output_handle *handle,
@@ -809,6 +814,7 @@ do { \
809struct perf_pmu_events_attr { 814struct perf_pmu_events_attr {
810 struct device_attribute attr; 815 struct device_attribute attr;
811 u64 id; 816 u64 id;
817 const char *event_str;
812}; 818};
813 819
814#define PMU_EVENT_ATTR(_name, _var, _id, _show) \ 820#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9fa9c622a7f4..964a450a6e2c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -132,8 +132,10 @@ enum perf_event_sample_format {
132 PERF_SAMPLE_BRANCH_STACK = 1U << 11, 132 PERF_SAMPLE_BRANCH_STACK = 1U << 11,
133 PERF_SAMPLE_REGS_USER = 1U << 12, 133 PERF_SAMPLE_REGS_USER = 1U << 12,
134 PERF_SAMPLE_STACK_USER = 1U << 13, 134 PERF_SAMPLE_STACK_USER = 1U << 13,
135 PERF_SAMPLE_WEIGHT = 1U << 14,
136 PERF_SAMPLE_DATA_SRC = 1U << 15,
135 137
136 PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ 138 PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */
137}; 139};
138 140
139/* 141/*
@@ -443,6 +445,7 @@ struct perf_event_mmap_page {
443#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) 445#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
444#define PERF_RECORD_MISC_GUEST_USER (5 << 0) 446#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
445 447
448#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
446/* 449/*
447 * Indicates that the content of PERF_SAMPLE_IP points to 450 * Indicates that the content of PERF_SAMPLE_IP points to
448 * the actual instruction that triggered the event. See also 451 * the actual instruction that triggered the event. See also
@@ -588,6 +591,9 @@ enum perf_event_type {
588 * { u64 size; 591 * { u64 size;
589 * char data[size]; 592 * char data[size];
590 * u64 dyn_size; } && PERF_SAMPLE_STACK_USER 593 * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
594 *
595 * { u64 weight; } && PERF_SAMPLE_WEIGHT
596 * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
591 * }; 597 * };
592 */ 598 */
593 PERF_RECORD_SAMPLE = 9, 599 PERF_RECORD_SAMPLE = 9,
@@ -613,4 +619,67 @@ enum perf_callchain_context {
613#define PERF_FLAG_FD_OUTPUT (1U << 1) 619#define PERF_FLAG_FD_OUTPUT (1U << 1)
614#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ 620#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
615 621
622union perf_mem_data_src {
623 __u64 val;
624 struct {
625 __u64 mem_op:5, /* type of opcode */
626 mem_lvl:14, /* memory hierarchy level */
627 mem_snoop:5, /* snoop mode */
628 mem_lock:2, /* lock instr */
629 mem_dtlb:7, /* tlb access */
630 mem_rsvd:31;
631 };
632};
633
634/* type of opcode (load/store/prefetch,code) */
635#define PERF_MEM_OP_NA 0x01 /* not available */
636#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
637#define PERF_MEM_OP_STORE 0x04 /* store instruction */
638#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
639#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
640#define PERF_MEM_OP_SHIFT 0
641
642/* memory hierarchy (memory level, hit or miss) */
643#define PERF_MEM_LVL_NA 0x01 /* not available */
644#define PERF_MEM_LVL_HIT 0x02 /* hit level */
645#define PERF_MEM_LVL_MISS 0x04 /* miss level */
646#define PERF_MEM_LVL_L1 0x08 /* L1 */
647#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
648#define PERF_MEM_LVL_L2 0x20 /* L2 hit */
649#define PERF_MEM_LVL_L3 0x40 /* L3 hit */
650#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
651#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
652#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
653#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
654#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
655#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
656#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
657#define PERF_MEM_LVL_SHIFT 5
658
659/* snoop mode */
660#define PERF_MEM_SNOOP_NA 0x01 /* not available */
661#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
662#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
663#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
664#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
665#define PERF_MEM_SNOOP_SHIFT 19
666
667/* locked instruction */
668#define PERF_MEM_LOCK_NA 0x01 /* not available */
669#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
670#define PERF_MEM_LOCK_SHIFT 24
671
672/* TLB access */
673#define PERF_MEM_TLB_NA 0x01 /* not available */
674#define PERF_MEM_TLB_HIT 0x02 /* hit level */
675#define PERF_MEM_TLB_MISS 0x04 /* miss level */
676#define PERF_MEM_TLB_L1 0x08 /* L1 */
677#define PERF_MEM_TLB_L2 0x10 /* L2 */
678#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
679#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
680#define PERF_MEM_TLB_SHIFT 26
681
682#define PERF_MEM_S(a, s) \
683 (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
684
616#endif /* _UAPI_LINUX_PERF_EVENT_H */ 685#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7b4a55d41efc..98c0845fcd20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -976,9 +976,15 @@ static void perf_event__header_size(struct perf_event *event)
976 if (sample_type & PERF_SAMPLE_PERIOD) 976 if (sample_type & PERF_SAMPLE_PERIOD)
977 size += sizeof(data->period); 977 size += sizeof(data->period);
978 978
979 if (sample_type & PERF_SAMPLE_WEIGHT)
980 size += sizeof(data->weight);
981
979 if (sample_type & PERF_SAMPLE_READ) 982 if (sample_type & PERF_SAMPLE_READ)
980 size += event->read_size; 983 size += event->read_size;
981 984
985 if (sample_type & PERF_SAMPLE_DATA_SRC)
986 size += sizeof(data->data_src.val);
987
982 event->header_size = size; 988 event->header_size = size;
983} 989}
984 990
@@ -4193,6 +4199,12 @@ void perf_output_sample(struct perf_output_handle *handle,
4193 perf_output_sample_ustack(handle, 4199 perf_output_sample_ustack(handle,
4194 data->stack_user_size, 4200 data->stack_user_size,
4195 data->regs_user.regs); 4201 data->regs_user.regs);
4202
4203 if (sample_type & PERF_SAMPLE_WEIGHT)
4204 perf_output_put(handle, data->weight);
4205
4206 if (sample_type & PERF_SAMPLE_DATA_SRC)
4207 perf_output_put(handle, data->data_src.val);
4196} 4208}
4197 4209
4198void perf_prepare_sample(struct perf_event_header *header, 4210void perf_prepare_sample(struct perf_event_header *header,
@@ -4779,6 +4791,9 @@ got_name:
4779 mmap_event->file_name = name; 4791 mmap_event->file_name = name;
4780 mmap_event->file_size = size; 4792 mmap_event->file_size = size;
4781 4793
4794 if (!(vma->vm_flags & VM_EXEC))
4795 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
4796
4782 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4797 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
4783 4798
4784 rcu_read_lock(); 4799 rcu_read_lock();
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
new file mode 100644
index 000000000000..888d51137fbe
--- /dev/null
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -0,0 +1,48 @@
1perf-mem(1)
2===========
3
4NAME
5----
6perf-mem - Profile memory accesses
7
8SYNOPSIS
9--------
10[verse]
11'perf mem' [<options>] (record [<command>] | report)
12
13DESCRIPTION
14-----------
15"perf mem -t <TYPE> record" runs a command and gathers memory operation data
16from it, into perf.data. Perf record options are accepted and are passed through.
17
18"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
19right set of options to display a memory access profile.
20
21OPTIONS
22-------
23<command>...::
24 Any command you can specify in a shell.
25
26-t::
27--type=::
28 Select the memory operation type: load or store (default: load)
29
30-D::
31--dump-raw-samples=::
32 Dump the raw decoded samples on the screen in a format that is easy to parse with
33 one sample per line.
34
35-x::
36--field-separator::
37 Specify the field separator used when dump raw samples (-D option). By default,
38 The separator is the space character.
39
40-C::
41--cpu-list::
42 Restrict dump of raw samples to those provided via this option. Note that the same
43 option can be passed in record mode. It will be interpreted the same way as perf
44 record.
45
46SEE ALSO
47--------
48linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 938e8904f64d..d4da111ef53d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -182,6 +182,12 @@ is enabled for all the sampling events. The sampled branch type is the same for
182The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k 182The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
183Note that this feature may not be available on all processors. 183Note that this feature may not be available on all processors.
184 184
185-W::
186--weight::
187Enable weightened sampling. An additional weight is recorded per sample and can be
188displayed with the weight and local_weight sort keys. This currently works for TSX
189abort events and some memory events in precise mode on modern Intel CPUs.
190
185SEE ALSO 191SEE ALSO
186-------- 192--------
187linkperf:perf-stat[1], linkperf:perf-list[1] 193linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 02284a0067f0..7d5f4f38aa52 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -59,7 +59,7 @@ OPTIONS
59--sort=:: 59--sort=::
60 Sort histogram entries by given key(s) - multiple keys can be specified 60 Sort histogram entries by given key(s) - multiple keys can be specified
61 in CSV format. Following sort keys are available: 61 in CSV format. Following sort keys are available:
62 pid, comm, dso, symbol, parent, cpu, srcline. 62 pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
63 63
64 Each key has following meaning: 64 Each key has following meaning:
65 65
@@ -206,6 +206,10 @@ OPTIONS
206--group:: 206--group::
207 Show event group information together. 207 Show event group information together.
208 208
209--demangle::
210 Demangle symbol names to human readable form. It's enabled by default,
211 disable with --no-demangle.
212
209SEE ALSO 213SEE ALSO
210-------- 214--------
211linkperf:perf-stat[1], linkperf:perf-annotate[1] 215linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 23e587ad549e..2fe87fb558f0 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -119,13 +119,19 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
119 Print count deltas every N milliseconds (minimum: 100ms) 119 Print count deltas every N milliseconds (minimum: 100ms)
120 example: perf stat -I 1000 -e cycles -a sleep 5 120 example: perf stat -I 1000 -e cycles -a sleep 5
121 121
122--aggr-socket:: 122--per-socket::
123Aggregate counts per processor socket for system-wide mode measurements. This 123Aggregate counts per processor socket for system-wide mode measurements. This
124is a useful mode to detect imbalance between sockets. To enable this mode, 124is a useful mode to detect imbalance between sockets. To enable this mode,
125use --aggr-socket in addition to -a. (system-wide). The output includes the 125use --per-socket in addition to -a. (system-wide). The output includes the
126socket number and the number of online processors on that socket. This is 126socket number and the number of online processors on that socket. This is
127useful to gauge the amount of aggregation. 127useful to gauge the amount of aggregation.
128 128
129--per-core::
130Aggregate counts per physical processor for system-wide mode measurements. This
131is a useful mode to detect imbalance between physical cores. To enable this mode,
132use --per-core in addition to -a. (system-wide). The output includes the
133core number and the number of online logical processors on that physical processor.
134
129EXAMPLES 135EXAMPLES
130-------- 136--------
131 137
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index a414bc95fd52..9f1a2fe54757 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -112,7 +112,7 @@ Default is to monitor all CPUS.
112 112
113-s:: 113-s::
114--sort:: 114--sort::
115 Sort by key(s): pid, comm, dso, symbol, parent, srcline. 115 Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
116 116
117-n:: 117-n::
118--show-nr-samples:: 118--show-nr-samples::
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0230b75ed7f9..b0f164b133d9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -35,7 +35,9 @@ include config/utilities.mak
35# 35#
36# Define WERROR=0 to disable treating any warnings as errors. 36# Define WERROR=0 to disable treating any warnings as errors.
37# 37#
38# Define NO_NEWT if you do not want TUI support. 38# Define NO_NEWT if you do not want TUI support. (deprecated)
39#
40# Define NO_SLANG if you do not want TUI support.
39# 41#
40# Define NO_GTK2 if you do not want GTK+ GUI support. 42# Define NO_GTK2 if you do not want GTK+ GUI support.
41# 43#
@@ -104,6 +106,10 @@ ifdef PARSER_DEBUG
104 PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG 106 PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG
105endif 107endif
106 108
109ifdef NO_NEWT
110 NO_SLANG=1
111endif
112
107CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) 113CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS)
108EXTLIBS = -lpthread -lrt -lelf -lm 114EXTLIBS = -lpthread -lrt -lelf -lm
109ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE 115ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
@@ -272,7 +278,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
272python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so 278python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
273 279
274PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) 280PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
275PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py 281PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT)
276 282
277$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) 283$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
278 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ 284 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
@@ -547,6 +553,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
547BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o 553BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
548BUILTIN_OBJS += $(OUTPUT)builtin-inject.o 554BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
549BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o 555BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
556BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
550 557
551PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) 558PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
552 559
@@ -679,15 +686,15 @@ ifndef NO_LIBAUDIT
679 endif 686 endif
680endif 687endif
681 688
682ifndef NO_NEWT 689ifndef NO_SLANG
683 FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt 690 FLAGS_SLANG=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
684 ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT),libnewt),y) 691 ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
685 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); 692 msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
686 else 693 else
687 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 694 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
688 BASIC_CFLAGS += -I/usr/include/slang 695 BASIC_CFLAGS += -I/usr/include/slang
689 BASIC_CFLAGS += -DNEWT_SUPPORT 696 BASIC_CFLAGS += -DSLANG_SUPPORT
690 EXTLIBS += -lnewt -lslang 697 EXTLIBS += -lslang
691 LIB_OBJS += $(OUTPUT)ui/browser.o 698 LIB_OBJS += $(OUTPUT)ui/browser.o
692 LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o 699 LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
693 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o 700 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ae36f3cb5410..db491e9a812b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
63 return 0; 63 return 0;
64 } 64 }
65 65
66 he = __hists__add_entry(&evsel->hists, al, NULL, 1); 66 he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
67 if (he == NULL) 67 if (he == NULL)
68 return -ENOMEM; 68 return -ENOMEM;
69 69
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index d207a97a2db1..2d0462d89a97 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -231,9 +231,10 @@ int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
231} 231}
232 232
233static int hists__add_entry(struct hists *self, 233static int hists__add_entry(struct hists *self,
234 struct addr_location *al, u64 period) 234 struct addr_location *al, u64 period,
235 u64 weight)
235{ 236{
236 if (__hists__add_entry(self, al, NULL, period) != NULL) 237 if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
237 return 0; 238 return 0;
238 return -ENOMEM; 239 return -ENOMEM;
239} 240}
@@ -255,7 +256,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
255 if (al.filtered) 256 if (al.filtered)
256 return 0; 257 return 0;
257 258
258 if (hists__add_entry(&evsel->hists, &al, sample->period)) { 259 if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
259 pr_warning("problem incrementing symbol period, skipping event\n"); 260 pr_warning("problem incrementing symbol period, skipping event\n");
260 return -1; 261 return -1;
261 } 262 }
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
new file mode 100644
index 000000000000..a8ff6d264e50
--- /dev/null
+++ b/tools/perf/builtin-mem.c
@@ -0,0 +1,242 @@
1#include "builtin.h"
2#include "perf.h"
3
4#include "util/parse-options.h"
5#include "util/trace-event.h"
6#include "util/tool.h"
7#include "util/session.h"
8
9#define MEM_OPERATION_LOAD "load"
10#define MEM_OPERATION_STORE "store"
11
12static const char *mem_operation = MEM_OPERATION_LOAD;
13
14struct perf_mem {
15 struct perf_tool tool;
16 char const *input_name;
17 symbol_filter_t annotate_init;
18 bool hide_unresolved;
19 bool dump_raw;
20 const char *cpu_list;
21 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
22};
23
24static const char * const mem_usage[] = {
25 "perf mem [<options>] {record <command> |report}",
26 NULL
27};
28
29static int __cmd_record(int argc, const char **argv)
30{
31 int rec_argc, i = 0, j;
32 const char **rec_argv;
33 char event[64];
34 int ret;
35
36 rec_argc = argc + 4;
37 rec_argv = calloc(rec_argc + 1, sizeof(char *));
38 if (!rec_argv)
39 return -1;
40
41 rec_argv[i++] = strdup("record");
42 if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
43 rec_argv[i++] = strdup("-W");
44 rec_argv[i++] = strdup("-d");
45 rec_argv[i++] = strdup("-e");
46
47 if (strcmp(mem_operation, MEM_OPERATION_LOAD))
48 sprintf(event, "cpu/mem-stores/pp");
49 else
50 sprintf(event, "cpu/mem-loads/pp");
51
52 rec_argv[i++] = strdup(event);
53 for (j = 1; j < argc; j++, i++)
54 rec_argv[i] = argv[j];
55
56 ret = cmd_record(i, rec_argv, NULL);
57 free(rec_argv);
58 return ret;
59}
60
61static int
62dump_raw_samples(struct perf_tool *tool,
63 union perf_event *event,
64 struct perf_sample *sample,
65 struct perf_evsel *evsel __maybe_unused,
66 struct machine *machine)
67{
68 struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
69 struct addr_location al;
70 const char *fmt;
71
72 if (perf_event__preprocess_sample(event, machine, &al, sample,
73 mem->annotate_init) < 0) {
74 fprintf(stderr, "problem processing %d event, skipping it.\n",
75 event->header.type);
76 return -1;
77 }
78
79 if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
80 return 0;
81
82 if (al.map != NULL)
83 al.map->dso->hit = 1;
84
85 if (symbol_conf.field_sep) {
86 fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
87 "%s0x%"PRIx64"%s%s:%s\n";
88 } else {
89 fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
90 "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
91 symbol_conf.field_sep = " ";
92 }
93
94 printf(fmt,
95 sample->pid,
96 symbol_conf.field_sep,
97 sample->tid,
98 symbol_conf.field_sep,
99 event->ip.ip,
100 symbol_conf.field_sep,
101 sample->addr,
102 symbol_conf.field_sep,
103 sample->weight,
104 symbol_conf.field_sep,
105 sample->data_src,
106 symbol_conf.field_sep,
107 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
108 al.sym ? al.sym->name : "???");
109
110 return 0;
111}
112
113static int process_sample_event(struct perf_tool *tool,
114 union perf_event *event,
115 struct perf_sample *sample,
116 struct perf_evsel *evsel,
117 struct machine *machine)
118{
119 return dump_raw_samples(tool, event, sample, evsel, machine);
120}
121
122static int report_raw_events(struct perf_mem *mem)
123{
124 int err = -EINVAL;
125 int ret;
126 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
127 0, false, &mem->tool);
128
129 if (session == NULL)
130 return -ENOMEM;
131
132 if (mem->cpu_list) {
133 ret = perf_session__cpu_bitmap(session, mem->cpu_list,
134 mem->cpu_bitmap);
135 if (ret)
136 goto out_delete;
137 }
138
139 if (symbol__init() < 0)
140 return -1;
141
142 printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
143
144 err = perf_session__process_events(session, &mem->tool);
145 if (err)
146 return err;
147
148 return 0;
149
150out_delete:
151 perf_session__delete(session);
152 return err;
153}
154
155static int report_events(int argc, const char **argv, struct perf_mem *mem)
156{
157 const char **rep_argv;
158 int ret, i = 0, j, rep_argc;
159
160 if (mem->dump_raw)
161 return report_raw_events(mem);
162
163 rep_argc = argc + 3;
164 rep_argv = calloc(rep_argc + 1, sizeof(char *));
165 if (!rep_argv)
166 return -1;
167
168 rep_argv[i++] = strdup("report");
169 rep_argv[i++] = strdup("--mem-mode");
170 rep_argv[i++] = strdup("-n"); /* display number of samples */
171
172 /*
173 * there is no weight (cost) associated with stores, so don't print
174 * the column
175 */
176 if (strcmp(mem_operation, MEM_OPERATION_LOAD))
177 rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
178 "dso_daddr,tlb,locked");
179
180 for (j = 1; j < argc; j++, i++)
181 rep_argv[i] = argv[j];
182
183 ret = cmd_report(i, rep_argv, NULL);
184 free(rep_argv);
185 return ret;
186}
187
188int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
189{
190 struct stat st;
191 struct perf_mem mem = {
192 .tool = {
193 .sample = process_sample_event,
194 .mmap = perf_event__process_mmap,
195 .comm = perf_event__process_comm,
196 .lost = perf_event__process_lost,
197 .fork = perf_event__process_fork,
198 .build_id = perf_event__process_build_id,
199 .ordered_samples = true,
200 },
201 .input_name = "perf.data",
202 };
203 const struct option mem_options[] = {
204 OPT_STRING('t', "type", &mem_operation,
205 "type", "memory operations(load/store)"),
206 OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
207 "dump raw samples in ASCII"),
208 OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
209 "Only display entries resolved to a symbol"),
210 OPT_STRING('i', "input", &input_name, "file",
211 "input file name"),
212 OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
213 "list of cpus to profile"),
214 OPT_STRING('x', "field-separator", &symbol_conf.field_sep,
215 "separator",
216 "separator for columns, no spaces will be added"
217 " between columns '.' is reserved."),
218 OPT_END()
219 };
220
221 argc = parse_options(argc, argv, mem_options, mem_usage,
222 PARSE_OPT_STOP_AT_NON_OPTION);
223
224 if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
225 usage_with_options(mem_usage, mem_options);
226
227 if (!mem.input_name || !strlen(mem.input_name)) {
228 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
229 mem.input_name = "-";
230 else
231 mem.input_name = "perf.data";
232 }
233
234 if (!strncmp(argv[0], "rec", 3))
235 return __cmd_record(argc, argv);
236 else if (!strncmp(argv[0], "rep", 3))
237 return report_events(argc, argv, &mem);
238 else
239 usage_with_options(mem_usage, mem_options);
240
241 return 0;
242}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9f2344a2c506..cdf58ecc04b1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -5,8 +5,6 @@
5 * (or a CPU, or a PID) into the perf.data output file - for 5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report. 6 * later analysis via perf report.
7 */ 7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h" 8#include "builtin.h"
11 9
12#include "perf.h" 10#include "perf.h"
@@ -955,6 +953,8 @@ const struct option record_options[] = {
955 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 953 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
956 "branch filter mask", "branch stack filter modes", 954 "branch filter mask", "branch stack filter modes",
957 parse_branch_stack), 955 parse_branch_stack),
956 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
957 "sample by weight (on special events only)"),
958 OPT_END() 958 OPT_END()
959}; 959};
960 960
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 296bd219977a..bd0ca81eeaca 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -46,6 +46,7 @@ struct perf_report {
46 bool show_full_info; 46 bool show_full_info;
47 bool show_threads; 47 bool show_threads;
48 bool inverted_callchain; 48 bool inverted_callchain;
49 bool mem_mode;
49 struct perf_read_values show_threads_values; 50 struct perf_read_values show_threads_values;
50 const char *pretty_printing_style; 51 const char *pretty_printing_style;
51 symbol_filter_t annotate_init; 52 symbol_filter_t annotate_init;
@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb)
64 return perf_default_config(var, value, cb); 65 return perf_default_config(var, value, cb);
65} 66}
66 67
68static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
69 struct addr_location *al,
70 struct perf_sample *sample,
71 struct perf_evsel *evsel,
72 struct machine *machine,
73 union perf_event *event)
74{
75 struct perf_report *rep = container_of(tool, struct perf_report, tool);
76 struct symbol *parent = NULL;
77 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
78 int err = 0;
79 struct hist_entry *he;
80 struct mem_info *mi, *mx;
81 uint64_t cost;
82
83 if ((sort__has_parent || symbol_conf.use_callchain) &&
84 sample->callchain) {
85 err = machine__resolve_callchain(machine, evsel, al->thread,
86 sample, &parent);
87 if (err)
88 return err;
89 }
90
91 mi = machine__resolve_mem(machine, al->thread, sample, cpumode);
92 if (!mi)
93 return -ENOMEM;
94
95 if (rep->hide_unresolved && !al->sym)
96 return 0;
97
98 cost = sample->weight;
99 if (!cost)
100 cost = 1;
101
102 /*
103 * must pass period=weight in order to get the correct
104 * sorting from hists__collapse_resort() which is solely
105 * based on periods. We want sorting be done on nr_events * weight
106 * and this is indirectly achieved by passing period=weight here
107 * and the he_stat__add_period() function.
108 */
109 he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost);
110 if (!he)
111 return -ENOMEM;
112
113 /*
114 * In the TUI browser, we are doing integrated annotation,
115 * so we don't allocate the extra space needed because the stdio
116 * code will not use it.
117 */
118 if (sort__has_sym && he->ms.sym && use_browser > 0) {
119 struct annotation *notes = symbol__annotation(he->ms.sym);
120
121 assert(evsel != NULL);
122
123 if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
124 goto out;
125
126 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
127 if (err)
128 goto out;
129 }
130
131 if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) {
132 struct annotation *notes;
133
134 mx = he->mem_info;
135
136 notes = symbol__annotation(mx->daddr.sym);
137 if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0)
138 goto out;
139
140 err = symbol__inc_addr_samples(mx->daddr.sym,
141 mx->daddr.map,
142 evsel->idx,
143 mx->daddr.al_addr);
144 if (err)
145 goto out;
146 }
147
148 evsel->hists.stats.total_period += cost;
149 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
150 err = 0;
151
152 if (symbol_conf.use_callchain) {
153 err = callchain_append(he->callchain,
154 &callchain_cursor,
155 sample->period);
156 }
157out:
158 return err;
159}
160
67static int perf_report__add_branch_hist_entry(struct perf_tool *tool, 161static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
68 struct addr_location *al, 162 struct addr_location *al,
69 struct perf_sample *sample, 163 struct perf_sample *sample,
@@ -98,7 +192,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
98 * and not events sampled. Thus we use a pseudo period of 1. 192 * and not events sampled. Thus we use a pseudo period of 1.
99 */ 193 */
100 he = __hists__add_branch_entry(&evsel->hists, al, parent, 194 he = __hists__add_branch_entry(&evsel->hists, al, parent,
101 &bi[i], 1); 195 &bi[i], 1, 1);
102 if (he) { 196 if (he) {
103 struct annotation *notes; 197 struct annotation *notes;
104 err = -ENOMEM; 198 err = -ENOMEM;
@@ -156,7 +250,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
156 return err; 250 return err;
157 } 251 }
158 252
159 he = __hists__add_entry(&evsel->hists, al, parent, sample->period); 253 he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
254 sample->weight);
160 if (he == NULL) 255 if (he == NULL)
161 return -ENOMEM; 256 return -ENOMEM;
162 257
@@ -168,7 +263,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
168 return err; 263 return err;
169 } 264 }
170 /* 265 /*
171 * Only in the newt browser we are doing integrated annotation, 266 * Only in the TUI browser we are doing integrated annotation,
172 * so we don't allocated the extra space needed because the stdio 267 * so we don't allocated the extra space needed because the stdio
173 * code will not use it. 268 * code will not use it.
174 */ 269 */
@@ -219,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool,
219 pr_debug("problem adding lbr entry, skipping event\n"); 314 pr_debug("problem adding lbr entry, skipping event\n");
220 return -1; 315 return -1;
221 } 316 }
317 } else if (rep->mem_mode == 1) {
318 if (perf_report__add_mem_hist_entry(tool, &al, sample,
319 evsel, machine, event)) {
320 pr_debug("problem adding mem entry, skipping event\n");
321 return -1;
322 }
222 } else { 323 } else {
223 if (al.map != NULL) 324 if (al.map != NULL)
224 al.map->dso->hit = 1; 325 al.map->dso->hit = 1;
@@ -302,7 +403,8 @@ static void sig_handler(int sig __maybe_unused)
302 session_done = 1; 403 session_done = 1;
303} 404}
304 405
305static size_t hists__fprintf_nr_sample_events(struct hists *self, 406static size_t hists__fprintf_nr_sample_events(struct perf_report *rep,
407 struct hists *self,
306 const char *evname, FILE *fp) 408 const char *evname, FILE *fp)
307{ 409{
308 size_t ret; 410 size_t ret;
@@ -330,7 +432,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
330 if (evname != NULL) 432 if (evname != NULL)
331 ret += fprintf(fp, " of event '%s'", evname); 433 ret += fprintf(fp, " of event '%s'", evname);
332 434
333 ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); 435 if (rep->mem_mode) {
436 ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
437 ret += fprintf(fp, "\n# Sort order : %s", sort_order);
438 } else
439 ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
334 return ret + fprintf(fp, "\n#\n"); 440 return ret + fprintf(fp, "\n#\n");
335} 441}
336 442
@@ -348,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
348 !perf_evsel__is_group_leader(pos)) 454 !perf_evsel__is_group_leader(pos))
349 continue; 455 continue;
350 456
351 hists__fprintf_nr_sample_events(hists, evname, stdout); 457 hists__fprintf_nr_sample_events(rep, hists, evname, stdout);
352 hists__fprintf(hists, true, 0, 0, stdout); 458 hists__fprintf(hists, true, 0, 0, stdout);
353 fprintf(stdout, "\n\n"); 459 fprintf(stdout, "\n\n");
354 } 460 }
@@ -644,7 +750,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
644 "Use the stdio interface"), 750 "Use the stdio interface"),
645 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 751 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
646 "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," 752 "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
647 " dso_to, dso_from, symbol_to, symbol_from, mispredict"), 753 " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
754 " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
755 "snoop, locked"),
648 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 756 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
649 "Show sample percentage for different cpu modes"), 757 "Show sample percentage for different cpu modes"),
650 OPT_STRING('p', "parent", &parent_pattern, "regex", 758 OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -692,6 +800,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
692 "use branch records for histogram filling", parse_branch_mode), 800 "use branch records for histogram filling", parse_branch_mode),
693 OPT_STRING(0, "objdump", &objdump_path, "path", 801 OPT_STRING(0, "objdump", &objdump_path, "path",
694 "objdump binary to use for disassembly and annotations"), 802 "objdump binary to use for disassembly and annotations"),
803 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
804 "Disable symbol demangling"),
805 OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
695 OPT_END() 806 OPT_END()
696 }; 807 };
697 808
@@ -749,12 +860,24 @@ repeat:
749 "dso_to,symbol_to"; 860 "dso_to,symbol_to";
750 861
751 } 862 }
863 if (report.mem_mode) {
864 if (sort__branch_mode == 1) {
865 fprintf(stderr, "branch and mem mode incompatible\n");
866 goto error;
867 }
868 /*
869 * if no sort_order is provided, then specify
870 * branch-mode specific order
871 */
872 if (sort_order == default_sort_order)
873 sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
874 }
752 875
753 if (setup_sorting() < 0) 876 if (setup_sorting() < 0)
754 usage_with_options(report_usage, options); 877 usage_with_options(report_usage, options);
755 878
756 /* 879 /*
757 * Only in the newt browser we are doing integrated annotation, 880 * Only in the TUI browser we are doing integrated annotation,
758 * so don't allocate extra space that won't be used in the stdio 881 * so don't allocate extra space that won't be used in the stdio
759 * implementation. 882 * implementation.
760 */ 883 */
@@ -814,6 +937,14 @@ repeat:
814 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); 937 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
815 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); 938 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
816 } else { 939 } else {
940 if (report.mem_mode) {
941 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout);
942 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout);
943 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout);
944 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout);
945 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout);
946 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout);
947 }
817 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); 948 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
818 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); 949 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
819 } 950 }
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 138229439a93..2da2a6ca22bf 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1671,7 +1671,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1671 .sample = perf_sched__process_tracepoint_sample, 1671 .sample = perf_sched__process_tracepoint_sample,
1672 .comm = perf_event__process_comm, 1672 .comm = perf_event__process_comm,
1673 .lost = perf_event__process_lost, 1673 .lost = perf_event__process_lost,
1674 .exit = perf_event__process_exit,
1675 .fork = perf_event__process_fork, 1674 .fork = perf_event__process_fork,
1676 .ordered_samples = true, 1675 .ordered_samples = true,
1677 }, 1676 },
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ba0bdd87c279..7e910bab1097 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -68,7 +68,7 @@
68static void print_stat(int argc, const char **argv); 68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix); 70static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr_socket(char *prefix); 71static void print_aggr(char *prefix);
72 72
73static struct perf_evlist *evsel_list; 73static struct perf_evlist *evsel_list;
74 74
@@ -76,11 +76,17 @@ static struct perf_target target = {
76 .uid = UINT_MAX, 76 .uid = UINT_MAX,
77}; 77};
78 78
79enum aggr_mode {
80 AGGR_NONE,
81 AGGR_GLOBAL,
82 AGGR_SOCKET,
83 AGGR_CORE,
84};
85
79static int run_count = 1; 86static int run_count = 1;
80static bool no_inherit = false; 87static bool no_inherit = false;
81static bool scale = true; 88static bool scale = true;
82static bool no_aggr = false; 89static enum aggr_mode aggr_mode = AGGR_GLOBAL;
83static bool aggr_socket = false;
84static pid_t child_pid = -1; 90static pid_t child_pid = -1;
85static bool null_run = false; 91static bool null_run = false;
86static int detailed_run = 0; 92static int detailed_run = 0;
@@ -96,7 +102,8 @@ static bool sync_run = false;
96static unsigned int interval = 0; 102static unsigned int interval = 0;
97static bool forever = false; 103static bool forever = false;
98static struct timespec ref_time; 104static struct timespec ref_time;
99static struct cpu_map *sock_map; 105static struct cpu_map *aggr_map;
106static int (*aggr_get_id)(struct cpu_map *m, int cpu);
100 107
101static volatile int done = 0; 108static volatile int done = 0;
102 109
@@ -355,41 +362,55 @@ static void print_interval(void)
355 struct timespec ts, rs; 362 struct timespec ts, rs;
356 char prefix[64]; 363 char prefix[64];
357 364
358 if (no_aggr) { 365 if (aggr_mode == AGGR_GLOBAL) {
359 list_for_each_entry(counter, &evsel_list->entries, node) { 366 list_for_each_entry(counter, &evsel_list->entries, node) {
360 ps = counter->priv; 367 ps = counter->priv;
361 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 368 memset(ps->res_stats, 0, sizeof(ps->res_stats));
362 read_counter(counter); 369 read_counter_aggr(counter);
363 } 370 }
364 } else { 371 } else {
365 list_for_each_entry(counter, &evsel_list->entries, node) { 372 list_for_each_entry(counter, &evsel_list->entries, node) {
366 ps = counter->priv; 373 ps = counter->priv;
367 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 374 memset(ps->res_stats, 0, sizeof(ps->res_stats));
368 read_counter_aggr(counter); 375 read_counter(counter);
369 } 376 }
370 } 377 }
378
371 clock_gettime(CLOCK_MONOTONIC, &ts); 379 clock_gettime(CLOCK_MONOTONIC, &ts);
372 diff_timespec(&rs, &ts, &ref_time); 380 diff_timespec(&rs, &ts, &ref_time);
373 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 381 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
374 382
375 if (num_print_interval == 0 && !csv_output) { 383 if (num_print_interval == 0 && !csv_output) {
376 if (aggr_socket) 384 switch (aggr_mode) {
385 case AGGR_SOCKET:
377 fprintf(output, "# time socket cpus counts events\n"); 386 fprintf(output, "# time socket cpus counts events\n");
378 else if (no_aggr) 387 break;
388 case AGGR_CORE:
389 fprintf(output, "# time core cpus counts events\n");
390 break;
391 case AGGR_NONE:
379 fprintf(output, "# time CPU counts events\n"); 392 fprintf(output, "# time CPU counts events\n");
380 else 393 break;
394 case AGGR_GLOBAL:
395 default:
381 fprintf(output, "# time counts events\n"); 396 fprintf(output, "# time counts events\n");
397 }
382 } 398 }
383 399
384 if (++num_print_interval == 25) 400 if (++num_print_interval == 25)
385 num_print_interval = 0; 401 num_print_interval = 0;
386 402
387 if (aggr_socket) 403 switch (aggr_mode) {
388 print_aggr_socket(prefix); 404 case AGGR_CORE:
389 else if (no_aggr) { 405 case AGGR_SOCKET:
406 print_aggr(prefix);
407 break;
408 case AGGR_NONE:
390 list_for_each_entry(counter, &evsel_list->entries, node) 409 list_for_each_entry(counter, &evsel_list->entries, node)
391 print_counter(counter, prefix); 410 print_counter(counter, prefix);
392 } else { 411 break;
412 case AGGR_GLOBAL:
413 default:
393 list_for_each_entry(counter, &evsel_list->entries, node) 414 list_for_each_entry(counter, &evsel_list->entries, node)
394 print_counter_aggr(counter, prefix); 415 print_counter_aggr(counter, prefix);
395 } 416 }
@@ -412,12 +433,6 @@ static int __run_perf_stat(int argc, const char **argv)
412 ts.tv_nsec = 0; 433 ts.tv_nsec = 0;
413 } 434 }
414 435
415 if (aggr_socket
416 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
417 perror("cannot build socket map");
418 return -1;
419 }
420
421 if (forks) { 436 if (forks) {
422 if (perf_evlist__prepare_workload(evsel_list, &target, argv, 437 if (perf_evlist__prepare_workload(evsel_list, &target, argv,
423 false, false) < 0) { 438 false, false) < 0) {
@@ -493,17 +508,17 @@ static int __run_perf_stat(int argc, const char **argv)
493 508
494 update_stats(&walltime_nsecs_stats, t1 - t0); 509 update_stats(&walltime_nsecs_stats, t1 - t0);
495 510
496 if (no_aggr) { 511 if (aggr_mode == AGGR_GLOBAL) {
497 list_for_each_entry(counter, &evsel_list->entries, node) {
498 read_counter(counter);
499 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
500 }
501 } else {
502 list_for_each_entry(counter, &evsel_list->entries, node) { 512 list_for_each_entry(counter, &evsel_list->entries, node) {
503 read_counter_aggr(counter); 513 read_counter_aggr(counter);
504 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 514 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
505 thread_map__nr(evsel_list->threads)); 515 thread_map__nr(evsel_list->threads));
506 } 516 }
517 } else {
518 list_for_each_entry(counter, &evsel_list->entries, node) {
519 read_counter(counter);
520 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
521 }
507 } 522 }
508 523
509 return WEXITSTATUS(status); 524 return WEXITSTATUS(status);
@@ -556,26 +571,47 @@ static void print_noise(struct perf_evsel *evsel, double avg)
556 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 571 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
557} 572}
558 573
559static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 574static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
560{ 575{
561 double msecs = avg / 1e6; 576 switch (aggr_mode) {
562 char cpustr[16] = { '\0', }; 577 case AGGR_CORE:
563 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; 578 fprintf(output, "S%d-C%*d%s%*d%s",
564 579 cpu_map__id_to_socket(id),
565 if (aggr_socket) 580 csv_output ? 0 : -8,
566 sprintf(cpustr, "S%*d%s%*d%s", 581 cpu_map__id_to_cpu(id),
582 csv_sep,
583 csv_output ? 0 : 4,
584 nr,
585 csv_sep);
586 break;
587 case AGGR_SOCKET:
588 fprintf(output, "S%*d%s%*d%s",
567 csv_output ? 0 : -5, 589 csv_output ? 0 : -5,
568 cpu, 590 id,
569 csv_sep, 591 csv_sep,
570 csv_output ? 0 : 4, 592 csv_output ? 0 : 4,
571 nr, 593 nr,
572 csv_sep); 594 csv_sep);
573 else if (no_aggr) 595 break;
574 sprintf(cpustr, "CPU%*d%s", 596 case AGGR_NONE:
597 fprintf(output, "CPU%*d%s",
575 csv_output ? 0 : -4, 598 csv_output ? 0 : -4,
576 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 599 perf_evsel__cpus(evsel)->map[id], csv_sep);
600 break;
601 case AGGR_GLOBAL:
602 default:
603 break;
604 }
605}
606
607static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
608{
609 double msecs = avg / 1e6;
610 const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
611
612 aggr_printout(evsel, cpu, nr);
577 613
578 fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); 614 fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
579 615
580 if (evsel->cgrp) 616 if (evsel->cgrp)
581 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 617 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -772,32 +808,21 @@ static void print_ll_cache_misses(int cpu,
772static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 808static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
773{ 809{
774 double total, ratio = 0.0; 810 double total, ratio = 0.0;
775 char cpustr[16] = { '\0', };
776 const char *fmt; 811 const char *fmt;
777 812
778 if (csv_output) 813 if (csv_output)
779 fmt = "%s%.0f%s%s"; 814 fmt = "%.0f%s%s";
780 else if (big_num) 815 else if (big_num)
781 fmt = "%s%'18.0f%s%-25s"; 816 fmt = "%'18.0f%s%-25s";
782 else 817 else
783 fmt = "%s%18.0f%s%-25s"; 818 fmt = "%18.0f%s%-25s";
784 819
785 if (aggr_socket) 820 aggr_printout(evsel, cpu, nr);
786 sprintf(cpustr, "S%*d%s%*d%s", 821
787 csv_output ? 0 : -5, 822 if (aggr_mode == AGGR_GLOBAL)
788 cpu,
789 csv_sep,
790 csv_output ? 0 : 4,
791 nr,
792 csv_sep);
793 else if (no_aggr)
794 sprintf(cpustr, "CPU%*d%s",
795 csv_output ? 0 : -4,
796 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
797 else
798 cpu = 0; 823 cpu = 0;
799 824
800 fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); 825 fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
801 826
802 if (evsel->cgrp) 827 if (evsel->cgrp)
803 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 828 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -896,23 +921,23 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
896 } 921 }
897} 922}
898 923
899static void print_aggr_socket(char *prefix) 924static void print_aggr(char *prefix)
900{ 925{
901 struct perf_evsel *counter; 926 struct perf_evsel *counter;
927 int cpu, s, s2, id, nr;
902 u64 ena, run, val; 928 u64 ena, run, val;
903 int cpu, s, s2, sock, nr;
904 929
905 if (!sock_map) 930 if (!(aggr_map || aggr_get_id))
906 return; 931 return;
907 932
908 for (s = 0; s < sock_map->nr; s++) { 933 for (s = 0; s < aggr_map->nr; s++) {
909 sock = cpu_map__socket(sock_map, s); 934 id = aggr_map->map[s];
910 list_for_each_entry(counter, &evsel_list->entries, node) { 935 list_for_each_entry(counter, &evsel_list->entries, node) {
911 val = ena = run = 0; 936 val = ena = run = 0;
912 nr = 0; 937 nr = 0;
913 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 938 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
914 s2 = cpu_map__get_socket(evsel_list->cpus, cpu); 939 s2 = aggr_get_id(evsel_list->cpus, cpu);
915 if (s2 != sock) 940 if (s2 != id)
916 continue; 941 continue;
917 val += counter->counts->cpu[cpu].val; 942 val += counter->counts->cpu[cpu].val;
918 ena += counter->counts->cpu[cpu].ena; 943 ena += counter->counts->cpu[cpu].ena;
@@ -923,18 +948,15 @@ static void print_aggr_socket(char *prefix)
923 fprintf(output, "%s", prefix); 948 fprintf(output, "%s", prefix);
924 949
925 if (run == 0 || ena == 0) { 950 if (run == 0 || ena == 0) {
926 fprintf(output, "S%*d%s%*d%s%*s%s%*s", 951 aggr_printout(counter, cpu, nr);
927 csv_output ? 0 : -5, 952
928 s, 953 fprintf(output, "%*s%s%*s",
929 csv_sep,
930 csv_output ? 0 : 4,
931 nr,
932 csv_sep,
933 csv_output ? 0 : 18, 954 csv_output ? 0 : 18,
934 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 955 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
935 csv_sep, 956 csv_sep,
936 csv_output ? 0 : -24, 957 csv_output ? 0 : -24,
937 perf_evsel__name(counter)); 958 perf_evsel__name(counter));
959
938 if (counter->cgrp) 960 if (counter->cgrp)
939 fprintf(output, "%s%s", 961 fprintf(output, "%s%s",
940 csv_sep, counter->cgrp->name); 962 csv_sep, counter->cgrp->name);
@@ -944,9 +966,9 @@ static void print_aggr_socket(char *prefix)
944 } 966 }
945 967
946 if (nsec_counter(counter)) 968 if (nsec_counter(counter))
947 nsec_printout(sock, nr, counter, val); 969 nsec_printout(id, nr, counter, val);
948 else 970 else
949 abs_printout(sock, nr, counter, val); 971 abs_printout(id, nr, counter, val);
950 972
951 if (!csv_output) { 973 if (!csv_output) {
952 print_noise(counter, 1.0); 974 print_noise(counter, 1.0);
@@ -1087,14 +1109,21 @@ static void print_stat(int argc, const char **argv)
1087 fprintf(output, ":\n\n"); 1109 fprintf(output, ":\n\n");
1088 } 1110 }
1089 1111
1090 if (aggr_socket) 1112 switch (aggr_mode) {
1091 print_aggr_socket(NULL); 1113 case AGGR_CORE:
1092 else if (no_aggr) { 1114 case AGGR_SOCKET:
1093 list_for_each_entry(counter, &evsel_list->entries, node) 1115 print_aggr(NULL);
1094 print_counter(counter, NULL); 1116 break;
1095 } else { 1117 case AGGR_GLOBAL:
1096 list_for_each_entry(counter, &evsel_list->entries, node) 1118 list_for_each_entry(counter, &evsel_list->entries, node)
1097 print_counter_aggr(counter, NULL); 1119 print_counter_aggr(counter, NULL);
1120 break;
1121 case AGGR_NONE:
1122 list_for_each_entry(counter, &evsel_list->entries, node)
1123 print_counter(counter, NULL);
1124 break;
1125 default:
1126 break;
1098 } 1127 }
1099 1128
1100 if (!csv_output) { 1129 if (!csv_output) {
@@ -1140,6 +1169,32 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1140 return 0; 1169 return 0;
1141} 1170}
1142 1171
1172static int perf_stat_init_aggr_mode(void)
1173{
1174 switch (aggr_mode) {
1175 case AGGR_SOCKET:
1176 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1177 perror("cannot build socket map");
1178 return -1;
1179 }
1180 aggr_get_id = cpu_map__get_socket;
1181 break;
1182 case AGGR_CORE:
1183 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1184 perror("cannot build core map");
1185 return -1;
1186 }
1187 aggr_get_id = cpu_map__get_core;
1188 break;
1189 case AGGR_NONE:
1190 case AGGR_GLOBAL:
1191 default:
1192 break;
1193 }
1194 return 0;
1195}
1196
1197
1143/* 1198/*
1144 * Add default attributes, if there were no attributes specified or 1199 * Add default attributes, if there were no attributes specified or
1145 * if -d/--detailed, -d -d or -d -d -d is used: 1200 * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1322,7 +1377,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1322 stat__set_big_num), 1377 stat__set_big_num),
1323 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1378 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1324 "list of cpus to monitor in system-wide"), 1379 "list of cpus to monitor in system-wide"),
1325 OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), 1380 OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1381 "disable CPU count aggregation", AGGR_NONE),
1326 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1382 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1327 "print counts with custom separator"), 1383 "print counts with custom separator"),
1328 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1384 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
@@ -1337,7 +1393,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1337 "command to run after to the measured command"), 1393 "command to run after to the measured command"),
1338 OPT_UINTEGER('I', "interval-print", &interval, 1394 OPT_UINTEGER('I', "interval-print", &interval,
1339 "print counts at regular interval in ms (>= 100)"), 1395 "print counts at regular interval in ms (>= 100)"),
1340 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), 1396 OPT_SET_UINT(0, "per-socket", &aggr_mode,
1397 "aggregate counts per processor socket", AGGR_SOCKET),
1398 OPT_SET_UINT(0, "per-core", &aggr_mode,
1399 "aggregate counts per physical processor core", AGGR_CORE),
1341 OPT_END() 1400 OPT_END()
1342 }; 1401 };
1343 const char * const stat_usage[] = { 1402 const char * const stat_usage[] = {
@@ -1420,19 +1479,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1420 } 1479 }
1421 1480
1422 /* no_aggr, cgroup are for system-wide only */ 1481 /* no_aggr, cgroup are for system-wide only */
1423 if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { 1482 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
1483 && !perf_target__has_cpu(&target)) {
1424 fprintf(stderr, "both cgroup and no-aggregation " 1484 fprintf(stderr, "both cgroup and no-aggregation "
1425 "modes only available in system-wide mode\n"); 1485 "modes only available in system-wide mode\n");
1426 1486
1427 usage_with_options(stat_usage, options); 1487 usage_with_options(stat_usage, options);
1428 } 1488 return -1;
1429
1430 if (aggr_socket) {
1431 if (!perf_target__has_cpu(&target)) {
1432 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1433 usage_with_options(stat_usage, options);
1434 }
1435 no_aggr = true;
1436 } 1489 }
1437 1490
1438 if (add_default_attributes()) 1491 if (add_default_attributes())
@@ -1458,6 +1511,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1458 if (perf_evlist__alloc_stats(evsel_list, interval)) 1511 if (perf_evlist__alloc_stats(evsel_list, interval))
1459 goto out_free_maps; 1512 goto out_free_maps;
1460 1513
1514 if (perf_stat_init_aggr_mode())
1515 goto out;
1516
1461 /* 1517 /*
1462 * We dont want to block the signals - that would cause 1518 * We dont want to block the signals - that would cause
1463 * child tasks to inherit that and Ctrl-C would not work. 1519 * child tasks to inherit that and Ctrl-C would not work.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b5520ad0dbb8..67bdb9f14ad6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -251,7 +251,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
251{ 251{
252 struct hist_entry *he; 252 struct hist_entry *he;
253 253
254 he = __hists__add_entry(&evsel->hists, al, NULL, sample->period); 254 he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
255 sample->weight);
255 if (he == NULL) 256 if (he == NULL)
256 return NULL; 257 return NULL;
257 258
@@ -1088,7 +1089,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1088 OPT_INCR('v', "verbose", &verbose, 1089 OPT_INCR('v', "verbose", &verbose,
1089 "be more verbose (show counter open errors, etc)"), 1090 "be more verbose (show counter open errors, etc)"),
1090 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1091 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1091 "sort by key(s): pid, comm, dso, symbol, parent"), 1092 "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
1092 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 1093 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1093 "Show a column with the number of samples"), 1094 "Show a column with the number of samples"),
1094 OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, 1095 OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 08143bd854c7..b210d62907e4 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -36,6 +36,7 @@ extern int cmd_kvm(int argc, const char **argv, const char *prefix);
36extern int cmd_test(int argc, const char **argv, const char *prefix); 36extern int cmd_test(int argc, const char **argv, const char *prefix);
37extern int cmd_trace(int argc, const char **argv, const char *prefix); 37extern int cmd_trace(int argc, const char **argv, const char *prefix);
38extern int cmd_inject(int argc, const char **argv, const char *prefix); 38extern int cmd_inject(int argc, const char **argv, const char *prefix);
39extern int cmd_mem(int argc, const char **argv, const char *prefix);
39 40
40extern int find_scripts(char **scripts_array, char **scripts_path_array); 41extern int find_scripts(char **scripts_array, char **scripts_path_array);
41#endif 42#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index a28e31be6cb4..0906fc401c52 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -14,6 +14,7 @@ perf-kmem mainporcelain common
14perf-kvm mainporcelain common 14perf-kvm mainporcelain common
15perf-list mainporcelain common 15perf-list mainporcelain common
16perf-lock mainporcelain common 16perf-lock mainporcelain common
17perf-mem mainporcelain common
17perf-probe mainporcelain full 18perf-probe mainporcelain full
18perf-record mainporcelain common 19perf-record mainporcelain common
19perf-report mainporcelain common 20perf-report mainporcelain common
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index b4eabb44e381..708fb8e9822a 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -61,15 +61,13 @@ int main(void)
61} 61}
62endef 62endef
63 63
64ifndef NO_NEWT 64ifndef NO_SLANG
65define SOURCE_NEWT 65define SOURCE_SLANG
66#include <newt.h> 66#include <slang.h>
67 67
68int main(void) 68int main(void)
69{ 69{
70 newtInit(); 70 return SLsmg_init_smg();
71 newtCls();
72 return newtFinished();
73} 71}
74endef 72endef
75endif 73endif
@@ -235,4 +233,4 @@ int main(void)
235 numa_available(); 233 numa_available();
236 return 0; 234 return 0;
237} 235}
238endef \ No newline at end of file 236endef
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index f6ba7b73f40e..85e1aed95204 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -60,6 +60,7 @@ static struct cmd_struct commands[] = {
60 { "trace", cmd_trace, 0 }, 60 { "trace", cmd_trace, 0 },
61#endif 61#endif
62 { "inject", cmd_inject, 0 }, 62 { "inject", cmd_inject, 0 },
63 { "mem", cmd_mem, 0 },
63}; 64};
64 65
65struct pager_config { 66struct pager_config {
@@ -517,9 +518,8 @@ int main(int argc, const char **argv)
517 518
518 while (1) { 519 while (1) {
519 static int done_help; 520 static int done_help;
520 static int was_alias; 521 int was_alias = run_argv(&argc, &argv);
521 522
522 was_alias = run_argv(&argc, &argv);
523 if (errno != ENOENT) 523 if (errno != ENOENT)
524 break; 524 break;
525 525
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 74659ecf93e0..32bd102c32b6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -218,6 +218,7 @@ struct perf_record_opts {
218 bool pipe_output; 218 bool pipe_output;
219 bool raw_samples; 219 bool raw_samples;
220 bool sample_address; 220 bool sample_address;
221 bool sample_weight;
221 bool sample_time; 222 bool sample_time;
222 bool period; 223 bool period;
223 unsigned int freq; 224 unsigned int freq;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index e0c0267858a1..89085a9615e2 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
223 &sample, 0) < 0) 223 &sample, 0) < 0)
224 goto out; 224 goto out;
225 225
226 he = __hists__add_entry(&evsel->hists, &al, NULL, 1); 226 he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
227 if (he == NULL) 227 if (he == NULL)
228 goto out; 228 goto out;
229 229
@@ -247,7 +247,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
247 &sample, 0) < 0) 247 &sample, 0) < 0)
248 goto out; 248 goto out;
249 249
250 he = __hists__add_entry(&evsel->hists, &al, NULL, 1); 250 he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
251 if (he == NULL) 251 if (he == NULL)
252 goto out; 252 goto out;
253 253
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 809ea4632a34..bbc782e364b0 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -2,7 +2,6 @@
2#include "../cache.h" 2#include "../cache.h"
3#include "../../perf.h" 3#include "../../perf.h"
4#include "libslang.h" 4#include "libslang.h"
5#include <newt.h>
6#include "ui.h" 5#include "ui.h"
7#include "util.h" 6#include "util.h"
8#include <linux/compiler.h> 7#include <linux/compiler.h>
@@ -234,7 +233,7 @@ void ui_browser__reset_index(struct ui_browser *browser)
234void __ui_browser__show_title(struct ui_browser *browser, const char *title) 233void __ui_browser__show_title(struct ui_browser *browser, const char *title)
235{ 234{
236 SLsmg_gotorc(0, 0); 235 SLsmg_gotorc(0, 0);
237 ui_browser__set_color(browser, NEWT_COLORSET_ROOT); 236 ui_browser__set_color(browser, HE_COLORSET_ROOT);
238 slsmg_write_nstring(title, browser->width + 1); 237 slsmg_write_nstring(title, browser->width + 1);
239} 238}
240 239
@@ -514,6 +513,12 @@ static struct ui_browser_colorset {
514 .bg = "default", 513 .bg = "default",
515 }, 514 },
516 { 515 {
516 .colorset = HE_COLORSET_ROOT,
517 .name = "root",
518 .fg = "white",
519 .bg = "blue",
520 },
521 {
517 .name = NULL, 522 .name = NULL,
518 } 523 }
519}; 524};
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index af70314605e5..404ff66a3e36 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -11,6 +11,7 @@
11#define HE_COLORSET_SELECTED 53 11#define HE_COLORSET_SELECTED 53
12#define HE_COLORSET_CODE 54 12#define HE_COLORSET_CODE 54
13#define HE_COLORSET_ADDR 55 13#define HE_COLORSET_ADDR 55
14#define HE_COLORSET_ROOT 56
14 15
15struct ui_browser { 16struct ui_browser {
16 u64 index, top_idx; 17 u64 index, top_idx;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index f56247a03a22..cc64d3f7fc36 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -10,7 +10,6 @@
10#include "../../util/symbol.h" 10#include "../../util/symbol.h"
11#include "../../util/evsel.h" 11#include "../../util/evsel.h"
12#include <pthread.h> 12#include <pthread.h>
13#include <newt.h>
14 13
15struct browser_disasm_line { 14struct browser_disasm_line {
16 struct rb_node rb_node; 15 struct rb_node rb_node;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a5843fd6ab51..d88a2d0acb6d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2,7 +2,6 @@
2#include "../libslang.h" 2#include "../libslang.h"
3#include <stdlib.h> 3#include <stdlib.h>
4#include <string.h> 4#include <string.h>
5#include <newt.h>
6#include <linux/rbtree.h> 5#include <linux/rbtree.h>
7 6
8#include "../../util/evsel.h" 7#include "../../util/evsel.h"
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 98851d55a53e..95c7cfb8f2c6 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -1,6 +1,5 @@
1#include "../libslang.h" 1#include "../libslang.h"
2#include <elf.h> 2#include <elf.h>
3#include <newt.h>
4#include <inttypes.h> 3#include <inttypes.h>
5#include <sys/ttydefaults.h> 4#include <sys/ttydefaults.h>
6#include <string.h> 5#include <string.h>
@@ -10,41 +9,9 @@
10#include "../../util/symbol.h" 9#include "../../util/symbol.h"
11#include "../browser.h" 10#include "../browser.h"
12#include "../helpline.h" 11#include "../helpline.h"
12#include "../keysyms.h"
13#include "map.h" 13#include "map.h"
14 14
15static int ui_entry__read(const char *title, char *bf, size_t size, int width)
16{
17 struct newtExitStruct es;
18 newtComponent form, entry;
19 const char *result;
20 int err = -1;
21
22 newtCenteredWindow(width, 1, title);
23 form = newtForm(NULL, NULL, 0);
24 if (form == NULL)
25 return -1;
26
27 entry = newtEntry(0, 0, "0x", width, &result, NEWT_FLAG_SCROLL);
28 if (entry == NULL)
29 goto out_free_form;
30
31 newtFormAddComponent(form, entry);
32 newtFormAddHotKey(form, NEWT_KEY_ENTER);
33 newtFormAddHotKey(form, NEWT_KEY_ESCAPE);
34 newtFormAddHotKey(form, NEWT_KEY_LEFT);
35 newtFormAddHotKey(form, CTRL('c'));
36 newtFormRun(form, &es);
37
38 if (result != NULL) {
39 strncpy(bf, result, size);
40 err = 0;
41 }
42out_free_form:
43 newtPopWindow();
44 newtFormDestroy(form);
45 return err;
46}
47
48struct map_browser { 15struct map_browser {
49 struct ui_browser b; 16 struct ui_browser b;
50 struct map *map; 17 struct map *map;
@@ -78,10 +45,11 @@ static int map_browser__search(struct map_browser *self)
78{ 45{
79 char target[512]; 46 char target[512];
80 struct symbol *sym; 47 struct symbol *sym;
81 int err = ui_entry__read("Search by name/addr", target, sizeof(target), 40); 48 int err = ui_browser__input_window("Search by name/addr",
82 49 "Prefix with 0x to search by address",
83 if (err) 50 target, "ENTER: OK, ESC: Cancel", 0);
84 return err; 51 if (err != K_ENTER)
52 return -1;
85 53
86 if (target[0] == '0' && tolower(target[1]) == 'x') { 54 if (target[0] == '0' && tolower(target[1]) == 'x') {
87 u64 addr = strtoull(target, NULL, 16); 55 u64 addr = strtoull(target, NULL, 16);
@@ -112,12 +80,20 @@ static int map_browser__run(struct map_browser *self)
112 while (1) { 80 while (1) {
113 key = ui_browser__run(&self->b, 0); 81 key = ui_browser__run(&self->b, 0);
114 82
115 if (verbose && key == '/') 83 switch (key) {
116 map_browser__search(self); 84 case '/':
117 else 85 if (verbose)
86 map_browser__search(self);
87 default:
118 break; 88 break;
89 case K_LEFT:
90 case K_ESC:
91 case 'q':
92 case CTRL('c'):
93 goto out;
94 }
119 } 95 }
120 96out:
121 ui_browser__hide(&self->b); 97 ui_browser__hide(&self->b);
122 return key; 98 return key;
123} 99}
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index cbbd44b0d93e..12f009e61e94 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -1,5 +1,4 @@
1#include <elf.h> 1#include <elf.h>
2#include <newt.h>
3#include <inttypes.h> 2#include <inttypes.h>
4#include <sys/ttydefaults.h> 3#include <sys/ttydefaults.h>
5#include <string.h> 4#include <string.h>
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index 81efa192e86c..b9401482d110 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -1,4 +1,3 @@
1#include <newt.h>
2#include <signal.h> 1#include <signal.h>
3#include <stdbool.h> 2#include <stdbool.h>
4 3
@@ -88,13 +87,6 @@ int ui__getch(int delay_secs)
88 return SLkp_getkey(); 87 return SLkp_getkey();
89} 88}
90 89
91static void newt_suspend(void *d __maybe_unused)
92{
93 newtSuspend();
94 raise(SIGTSTP);
95 newtResume();
96}
97
98static void ui__signal(int sig) 90static void ui__signal(int sig)
99{ 91{
100 ui__exit(false); 92 ui__exit(false);
@@ -106,7 +98,17 @@ int ui__init(void)
106{ 98{
107 int err; 99 int err;
108 100
109 newtInit(); 101 SLutf8_enable(-1);
102 SLtt_get_terminfo();
103 SLtt_get_screen_size();
104
105 err = SLsmg_init_smg();
106 if (err < 0)
107 goto out;
108 err = SLang_init_tty(0, 0, 0);
109 if (err < 0)
110 goto out;
111
110 err = SLkp_init(); 112 err = SLkp_init();
111 if (err < 0) { 113 if (err < 0) {
112 pr_err("TUI initialization failed.\n"); 114 pr_err("TUI initialization failed.\n");
@@ -115,7 +117,6 @@ int ui__init(void)
115 117
116 SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB); 118 SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
117 119
118 newtSetSuspendCallback(newt_suspend, NULL);
119 ui_helpline__init(); 120 ui_helpline__init();
120 ui_browser__init(); 121 ui_browser__init();
121 ui_progress__init(); 122 ui_progress__init();
diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h
index d86359c99907..70cb0d4eb8aa 100644
--- a/tools/perf/ui/ui.h
+++ b/tools/perf/ui/ui.h
@@ -12,7 +12,7 @@ extern int use_browser;
12void setup_browser(bool fallback_to_pager); 12void setup_browser(bool fallback_to_pager);
13void exit_browser(bool wait_for_ok); 13void exit_browser(bool wait_for_ok);
14 14
15#ifdef NEWT_SUPPORT 15#ifdef SLANG_SUPPORT
16int ui__init(void); 16int ui__init(void);
17void ui__exit(bool wait_for_ok); 17void ui__exit(bool wait_for_ok);
18#else 18#else
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 6f3c16f01ab4..af755156d278 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
150 struct perf_evsel *evsel, bool print_lines, 150 struct perf_evsel *evsel, bool print_lines,
151 bool full_paths, int min_pcnt, int max_lines); 151 bool full_paths, int min_pcnt, int max_lines);
152 152
153#ifdef NEWT_SUPPORT 153#ifdef SLANG_SUPPORT
154int symbol__tui_annotate(struct symbol *sym, struct map *map, 154int symbol__tui_annotate(struct symbol *sym, struct map *map,
155 struct perf_evsel *evsel, 155 struct perf_evsel *evsel,
156 struct hist_browser_timer *hbt); 156 struct hist_browser_timer *hbt);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index f817046e22b1..beb8cf9f9976 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -4,6 +4,7 @@
4#include "cpumap.h" 4#include "cpumap.h"
5#include <assert.h> 5#include <assert.h>
6#include <stdio.h> 6#include <stdio.h>
7#include <stdlib.h>
7 8
8static struct cpu_map *cpu_map__default_new(void) 9static struct cpu_map *cpu_map__default_new(void)
9{ 10{
@@ -219,7 +220,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
219 if (!mnt) 220 if (!mnt)
220 return -1; 221 return -1;
221 222
222 sprintf(path, 223 snprintf(path, PATH_MAX,
223 "%s/devices/system/cpu/cpu%d/topology/physical_package_id", 224 "%s/devices/system/cpu/cpu%d/topology/physical_package_id",
224 mnt, cpu); 225 mnt, cpu);
225 226
@@ -231,27 +232,88 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
231 return ret == 1 ? cpu : -1; 232 return ret == 1 ? cpu : -1;
232} 233}
233 234
234int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) 235static int cmp_ids(const void *a, const void *b)
235{ 236{
236 struct cpu_map *sock; 237 return *(int *)a - *(int *)b;
238}
239
240static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
241 int (*f)(struct cpu_map *map, int cpu))
242{
243 struct cpu_map *c;
237 int nr = cpus->nr; 244 int nr = cpus->nr;
238 int cpu, s1, s2; 245 int cpu, s1, s2;
239 246
240 sock = calloc(1, sizeof(*sock) + nr * sizeof(int)); 247 /* allocate as much as possible */
241 if (!sock) 248 c = calloc(1, sizeof(*c) + nr * sizeof(int));
249 if (!c)
242 return -1; 250 return -1;
243 251
244 for (cpu = 0; cpu < nr; cpu++) { 252 for (cpu = 0; cpu < nr; cpu++) {
245 s1 = cpu_map__get_socket(cpus, cpu); 253 s1 = f(cpus, cpu);
246 for (s2 = 0; s2 < sock->nr; s2++) { 254 for (s2 = 0; s2 < c->nr; s2++) {
247 if (s1 == sock->map[s2]) 255 if (s1 == c->map[s2])
248 break; 256 break;
249 } 257 }
250 if (s2 == sock->nr) { 258 if (s2 == c->nr) {
251 sock->map[sock->nr] = s1; 259 c->map[c->nr] = s1;
252 sock->nr++; 260 c->nr++;
253 } 261 }
254 } 262 }
255 *sockp = sock; 263 /* ensure we process id in increasing order */
264 qsort(c->map, c->nr, sizeof(int), cmp_ids);
265
266 *res = c;
256 return 0; 267 return 0;
257} 268}
269
270int cpu_map__get_core(struct cpu_map *map, int idx)
271{
272 FILE *fp;
273 const char *mnt;
274 char path[PATH_MAX];
275 int cpu, ret, s;
276
277 if (idx > map->nr)
278 return -1;
279
280 cpu = map->map[idx];
281
282 mnt = sysfs_find_mountpoint();
283 if (!mnt)
284 return -1;
285
286 snprintf(path, PATH_MAX,
287 "%s/devices/system/cpu/cpu%d/topology/core_id",
288 mnt, cpu);
289
290 fp = fopen(path, "r");
291 if (!fp)
292 return -1;
293 ret = fscanf(fp, "%d", &cpu);
294 fclose(fp);
295 if (ret != 1)
296 return -1;
297
298 s = cpu_map__get_socket(map, idx);
299 if (s == -1)
300 return -1;
301
302 /*
303 * encode socket in upper 16 bits
304 * core_id is relative to socket, and
305 * we need a global id. So we combine
306 * socket+ core id
307 */
308 return (s << 16) | (cpu & 0xffff);
309}
310
311int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
312{
313 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
314}
315
316int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
317{
318 return cpu_map__build_map(cpus, corep, cpu_map__get_core);
319}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 161b00756a12..9bed02e5fb3d 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
15struct cpu_map *cpu_map__read(FILE *file); 15struct cpu_map *cpu_map__read(FILE *file);
16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); 16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
17int cpu_map__get_socket(struct cpu_map *map, int idx); 17int cpu_map__get_socket(struct cpu_map *map, int idx);
18int cpu_map__get_core(struct cpu_map *map, int idx);
18int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 19int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
20int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
19 21
20static inline int cpu_map__socket(struct cpu_map *sock, int s) 22static inline int cpu_map__socket(struct cpu_map *sock, int s)
21{ 23{
@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
24 return sock->map[s]; 26 return sock->map[s];
25} 27}
26 28
29static inline int cpu_map__id_to_socket(int id)
30{
31 return id >> 16;
32}
33
34static inline int cpu_map__id_to_cpu(int id)
35{
36 return id & 0xffff;
37}
38
27static inline int cpu_map__nr(const struct cpu_map *map) 39static inline int cpu_map__nr(const struct cpu_map *map)
28{ 40{
29 return map ? map->nr : 1; 41 return map ? map->nr : 1;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0d573ff4771a..181389535c0c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -88,8 +88,10 @@ struct perf_sample {
88 u64 id; 88 u64 id;
89 u64 stream_id; 89 u64 stream_id;
90 u64 period; 90 u64 period;
91 u64 weight;
91 u32 cpu; 92 u32 cpu;
92 u32 raw_size; 93 u32 raw_size;
94 u64 data_src;
93 void *raw_data; 95 void *raw_data;
94 struct ip_callchain *callchain; 96 struct ip_callchain *callchain;
95 struct branch_stack *branch_stack; 97 struct branch_stack *branch_stack;
@@ -97,6 +99,13 @@ struct perf_sample {
97 struct stack_dump user_stack; 99 struct stack_dump user_stack;
98}; 100};
99 101
102#define PERF_MEM_DATA_SRC_NONE \
103 (PERF_MEM_S(OP, NA) |\
104 PERF_MEM_S(LVL, NA) |\
105 PERF_MEM_S(SNOOP, NA) |\
106 PERF_MEM_S(LOCK, NA) |\
107 PERF_MEM_S(TLB, NA))
108
100struct build_id_event { 109struct build_id_event {
101 struct perf_event_header header; 110 struct perf_event_header header;
102 pid_t pid; 111 pid_t pid;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1adb824610f0..07b1a3ad3e24 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -554,6 +554,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
554 perf_evsel__set_sample_bit(evsel, CPU); 554 perf_evsel__set_sample_bit(evsel, CPU);
555 } 555 }
556 556
557 if (opts->sample_address)
558 attr->sample_type |= PERF_SAMPLE_DATA_SRC;
559
557 if (opts->no_delay) { 560 if (opts->no_delay) {
558 attr->watermark = 0; 561 attr->watermark = 0;
559 attr->wakeup_events = 1; 562 attr->wakeup_events = 1;
@@ -563,6 +566,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
563 attr->branch_sample_type = opts->branch_stack; 566 attr->branch_sample_type = opts->branch_stack;
564 } 567 }
565 568
569 if (opts->sample_weight)
570 attr->sample_type |= PERF_SAMPLE_WEIGHT;
571
566 attr->mmap = track; 572 attr->mmap = track;
567 attr->comm = track; 573 attr->comm = track;
568 574
@@ -1017,6 +1023,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
1017 data->cpu = data->pid = data->tid = -1; 1023 data->cpu = data->pid = data->tid = -1;
1018 data->stream_id = data->id = data->time = -1ULL; 1024 data->stream_id = data->id = data->time = -1ULL;
1019 data->period = 1; 1025 data->period = 1;
1026 data->weight = 0;
1020 1027
1021 if (event->header.type != PERF_RECORD_SAMPLE) { 1028 if (event->header.type != PERF_RECORD_SAMPLE) {
1022 if (!evsel->attr.sample_id_all) 1029 if (!evsel->attr.sample_id_all)
@@ -1167,6 +1174,18 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
1167 } 1174 }
1168 } 1175 }
1169 1176
1177 data->weight = 0;
1178 if (type & PERF_SAMPLE_WEIGHT) {
1179 data->weight = *array;
1180 array++;
1181 }
1182
1183 data->data_src = PERF_MEM_DATA_SRC_NONE;
1184 if (type & PERF_SAMPLE_DATA_SRC) {
1185 data->data_src = *array;
1186 array++;
1187 }
1188
1170 return 0; 1189 return 0;
1171} 1190}
1172 1191
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a9b7349f7c5f..326068a593a5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,5 +1,3 @@
1#define _FILE_OFFSET_BITS 64
2
3#include "util.h" 1#include "util.h"
4#include <sys/types.h> 2#include <sys/types.h>
5#include <byteswap.h> 3#include <byteswap.h>
@@ -1672,8 +1670,8 @@ static int process_tracing_data(struct perf_file_section *section __maybe_unused
1672 struct perf_header *ph __maybe_unused, 1670 struct perf_header *ph __maybe_unused,
1673 int fd, void *data) 1671 int fd, void *data)
1674{ 1672{
1675 trace_report(fd, data, false); 1673 ssize_t ret = trace_report(fd, data, false);
1676 return 0; 1674 return ret < 0 ? -1 : 0;
1677} 1675}
1678 1676
1679static int process_build_id(struct perf_file_section *section, 1677static int process_build_id(struct perf_file_section *section,
@@ -2752,6 +2750,11 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
2752 if (evsel->tp_format) 2750 if (evsel->tp_format)
2753 return 0; 2751 return 0;
2754 2752
2753 if (pevent == NULL) {
2754 pr_debug("broken or missing trace data\n");
2755 return -1;
2756 }
2757
2755 event = pevent_find_event(pevent, evsel->attr.config); 2758 event = pevent_find_event(pevent, evsel->attr.config);
2756 if (event == NULL) 2759 if (event == NULL)
2757 return -1; 2760 return -1;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f855941bebea..6b32721f829a 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -67,12 +67,16 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
67void hists__calc_col_len(struct hists *hists, struct hist_entry *h) 67void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
68{ 68{
69 const unsigned int unresolved_col_width = BITS_PER_LONG / 4; 69 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
70 int symlen;
70 u16 len; 71 u16 len;
71 72
72 if (h->ms.sym) 73 if (h->ms.sym)
73 hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); 74 hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
74 else 75 else {
76 symlen = unresolved_col_width + 4 + 2;
77 hists__new_col_len(hists, HISTC_SYMBOL, symlen);
75 hists__set_unres_dso_col_len(hists, HISTC_DSO); 78 hists__set_unres_dso_col_len(hists, HISTC_DSO);
79 }
76 80
77 len = thread__comm_len(h->thread); 81 len = thread__comm_len(h->thread);
78 if (hists__new_col_len(hists, HISTC_COMM, len)) 82 if (hists__new_col_len(hists, HISTC_COMM, len))
@@ -87,7 +91,6 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
87 hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen); 91 hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
88 92
89 if (h->branch_info) { 93 if (h->branch_info) {
90 int symlen;
91 /* 94 /*
92 * +4 accounts for '[x] ' priv level info 95 * +4 accounts for '[x] ' priv level info
93 * +2 account of 0x prefix on raw addresses 96 * +2 account of 0x prefix on raw addresses
@@ -116,6 +119,42 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
116 hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); 119 hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
117 } 120 }
118 } 121 }
122
123 if (h->mem_info) {
124 /*
125 * +4 accounts for '[x] ' priv level info
126 * +2 account of 0x prefix on raw addresses
127 */
128 if (h->mem_info->daddr.sym) {
129 symlen = (int)h->mem_info->daddr.sym->namelen + 4
130 + unresolved_col_width + 2;
131 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
132 symlen);
133 } else {
134 symlen = unresolved_col_width + 4 + 2;
135 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
136 symlen);
137 }
138 if (h->mem_info->daddr.map) {
139 symlen = dso__name_len(h->mem_info->daddr.map->dso);
140 hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
141 symlen);
142 } else {
143 symlen = unresolved_col_width + 4 + 2;
144 hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
145 }
146 } else {
147 symlen = unresolved_col_width + 4 + 2;
148 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
149 hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
150 }
151
152 hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
153 hists__new_col_len(hists, HISTC_MEM_TLB, 22);
154 hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
155 hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
156 hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
157 hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
119} 158}
120 159
121void hists__output_recalc_col_len(struct hists *hists, int max_rows) 160void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -155,9 +194,12 @@ static void hist_entry__add_cpumode_period(struct hist_entry *he,
155 } 194 }
156} 195}
157 196
158static void he_stat__add_period(struct he_stat *he_stat, u64 period) 197static void he_stat__add_period(struct he_stat *he_stat, u64 period,
198 u64 weight)
159{ 199{
200
160 he_stat->period += period; 201 he_stat->period += period;
202 he_stat->weight += weight;
161 he_stat->nr_events += 1; 203 he_stat->nr_events += 1;
162} 204}
163 205
@@ -169,12 +211,14 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
169 dest->period_guest_sys += src->period_guest_sys; 211 dest->period_guest_sys += src->period_guest_sys;
170 dest->period_guest_us += src->period_guest_us; 212 dest->period_guest_us += src->period_guest_us;
171 dest->nr_events += src->nr_events; 213 dest->nr_events += src->nr_events;
214 dest->weight += src->weight;
172} 215}
173 216
174static void hist_entry__decay(struct hist_entry *he) 217static void hist_entry__decay(struct hist_entry *he)
175{ 218{
176 he->stat.period = (he->stat.period * 7) / 8; 219 he->stat.period = (he->stat.period * 7) / 8;
177 he->stat.nr_events = (he->stat.nr_events * 7) / 8; 220 he->stat.nr_events = (he->stat.nr_events * 7) / 8;
221 /* XXX need decay for weight too? */
178} 222}
179 223
180static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) 224static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
@@ -239,7 +283,7 @@ void hists__decay_entries_threaded(struct hists *hists,
239static struct hist_entry *hist_entry__new(struct hist_entry *template) 283static struct hist_entry *hist_entry__new(struct hist_entry *template)
240{ 284{
241 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; 285 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
242 struct hist_entry *he = malloc(sizeof(*he) + callchain_size); 286 struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
243 287
244 if (he != NULL) { 288 if (he != NULL) {
245 *he = *template; 289 *he = *template;
@@ -254,6 +298,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
254 he->branch_info->to.map->referenced = true; 298 he->branch_info->to.map->referenced = true;
255 } 299 }
256 300
301 if (he->mem_info) {
302 if (he->mem_info->iaddr.map)
303 he->mem_info->iaddr.map->referenced = true;
304 if (he->mem_info->daddr.map)
305 he->mem_info->daddr.map->referenced = true;
306 }
307
257 if (symbol_conf.use_callchain) 308 if (symbol_conf.use_callchain)
258 callchain_init(he->callchain); 309 callchain_init(he->callchain);
259 310
@@ -282,7 +333,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
282static struct hist_entry *add_hist_entry(struct hists *hists, 333static struct hist_entry *add_hist_entry(struct hists *hists,
283 struct hist_entry *entry, 334 struct hist_entry *entry,
284 struct addr_location *al, 335 struct addr_location *al,
285 u64 period) 336 u64 period,
337 u64 weight)
286{ 338{
287 struct rb_node **p; 339 struct rb_node **p;
288 struct rb_node *parent = NULL; 340 struct rb_node *parent = NULL;
@@ -306,7 +358,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
306 cmp = hist_entry__cmp(he, entry); 358 cmp = hist_entry__cmp(he, entry);
307 359
308 if (!cmp) { 360 if (!cmp) {
309 he_stat__add_period(&he->stat, period); 361 he_stat__add_period(&he->stat, period, weight);
310 362
311 /* If the map of an existing hist_entry has 363 /* If the map of an existing hist_entry has
312 * become out-of-date due to an exec() or 364 * become out-of-date due to an exec() or
@@ -341,11 +393,42 @@ out_unlock:
341 return he; 393 return he;
342} 394}
343 395
396struct hist_entry *__hists__add_mem_entry(struct hists *self,
397 struct addr_location *al,
398 struct symbol *sym_parent,
399 struct mem_info *mi,
400 u64 period,
401 u64 weight)
402{
403 struct hist_entry entry = {
404 .thread = al->thread,
405 .ms = {
406 .map = al->map,
407 .sym = al->sym,
408 },
409 .stat = {
410 .period = period,
411 .weight = weight,
412 .nr_events = 1,
413 },
414 .cpu = al->cpu,
415 .ip = al->addr,
416 .level = al->level,
417 .parent = sym_parent,
418 .filtered = symbol__parent_filter(sym_parent),
419 .hists = self,
420 .mem_info = mi,
421 .branch_info = NULL,
422 };
423 return add_hist_entry(self, &entry, al, period, weight);
424}
425
344struct hist_entry *__hists__add_branch_entry(struct hists *self, 426struct hist_entry *__hists__add_branch_entry(struct hists *self,
345 struct addr_location *al, 427 struct addr_location *al,
346 struct symbol *sym_parent, 428 struct symbol *sym_parent,
347 struct branch_info *bi, 429 struct branch_info *bi,
348 u64 period) 430 u64 period,
431 u64 weight)
349{ 432{
350 struct hist_entry entry = { 433 struct hist_entry entry = {
351 .thread = al->thread, 434 .thread = al->thread,
@@ -359,19 +442,22 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
359 .stat = { 442 .stat = {
360 .period = period, 443 .period = period,
361 .nr_events = 1, 444 .nr_events = 1,
445 .weight = weight,
362 }, 446 },
363 .parent = sym_parent, 447 .parent = sym_parent,
364 .filtered = symbol__parent_filter(sym_parent), 448 .filtered = symbol__parent_filter(sym_parent),
365 .branch_info = bi, 449 .branch_info = bi,
366 .hists = self, 450 .hists = self,
451 .mem_info = NULL,
367 }; 452 };
368 453
369 return add_hist_entry(self, &entry, al, period); 454 return add_hist_entry(self, &entry, al, period, weight);
370} 455}
371 456
372struct hist_entry *__hists__add_entry(struct hists *self, 457struct hist_entry *__hists__add_entry(struct hists *self,
373 struct addr_location *al, 458 struct addr_location *al,
374 struct symbol *sym_parent, u64 period) 459 struct symbol *sym_parent, u64 period,
460 u64 weight)
375{ 461{
376 struct hist_entry entry = { 462 struct hist_entry entry = {
377 .thread = al->thread, 463 .thread = al->thread,
@@ -385,13 +471,16 @@ struct hist_entry *__hists__add_entry(struct hists *self,
385 .stat = { 471 .stat = {
386 .period = period, 472 .period = period,
387 .nr_events = 1, 473 .nr_events = 1,
474 .weight = weight,
388 }, 475 },
389 .parent = sym_parent, 476 .parent = sym_parent,
390 .filtered = symbol__parent_filter(sym_parent), 477 .filtered = symbol__parent_filter(sym_parent),
391 .hists = self, 478 .hists = self,
479 .branch_info = NULL,
480 .mem_info = NULL,
392 }; 481 };
393 482
394 return add_hist_entry(self, &entry, al, period); 483 return add_hist_entry(self, &entry, al, period, weight);
395} 484}
396 485
397int64_t 486int64_t
@@ -431,6 +520,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
431void hist_entry__free(struct hist_entry *he) 520void hist_entry__free(struct hist_entry *he)
432{ 521{
433 free(he->branch_info); 522 free(he->branch_info);
523 free(he->mem_info);
434 free(he); 524 free(he);
435} 525}
436 526
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 848331377bdb..14c2fe20aa62 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -49,6 +49,14 @@ enum hist_column {
49 HISTC_DSO_FROM, 49 HISTC_DSO_FROM,
50 HISTC_DSO_TO, 50 HISTC_DSO_TO,
51 HISTC_SRCLINE, 51 HISTC_SRCLINE,
52 HISTC_LOCAL_WEIGHT,
53 HISTC_GLOBAL_WEIGHT,
54 HISTC_MEM_DADDR_SYMBOL,
55 HISTC_MEM_DADDR_DSO,
56 HISTC_MEM_LOCKED,
57 HISTC_MEM_TLB,
58 HISTC_MEM_LVL,
59 HISTC_MEM_SNOOP,
52 HISTC_NR_COLS, /* Last entry */ 60 HISTC_NR_COLS, /* Last entry */
53}; 61};
54 62
@@ -73,7 +81,8 @@ struct hists {
73 81
74struct hist_entry *__hists__add_entry(struct hists *self, 82struct hist_entry *__hists__add_entry(struct hists *self,
75 struct addr_location *al, 83 struct addr_location *al,
76 struct symbol *parent, u64 period); 84 struct symbol *parent, u64 period,
85 u64 weight);
77int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); 86int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
78int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); 87int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
79int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, 88int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size,
@@ -84,7 +93,15 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
84 struct addr_location *al, 93 struct addr_location *al,
85 struct symbol *sym_parent, 94 struct symbol *sym_parent,
86 struct branch_info *bi, 95 struct branch_info *bi,
87 u64 period); 96 u64 period,
97 u64 weight);
98
99struct hist_entry *__hists__add_mem_entry(struct hists *self,
100 struct addr_location *al,
101 struct symbol *sym_parent,
102 struct mem_info *mi,
103 u64 period,
104 u64 weight);
88 105
89void hists__output_resort(struct hists *self); 106void hists__output_resort(struct hists *self);
90void hists__output_resort_threaded(struct hists *hists); 107void hists__output_resort_threaded(struct hists *hists);
@@ -175,7 +192,7 @@ struct hist_browser_timer {
175 int refresh; 192 int refresh;
176}; 193};
177 194
178#ifdef NEWT_SUPPORT 195#ifdef SLANG_SUPPORT
179#include "../ui/keysyms.h" 196#include "../ui/keysyms.h"
180int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, 197int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
181 struct hist_browser_timer *hbt); 198 struct hist_browser_timer *hbt);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c5e3b123782b..b2ecad6ec46b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -955,6 +955,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
955 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 955 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
956 struct thread *thread; 956 struct thread *thread;
957 struct map *map; 957 struct map *map;
958 enum map_type type;
958 int ret = 0; 959 int ret = 0;
959 960
960 if (dump_trace) 961 if (dump_trace)
@@ -971,10 +972,17 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
971 thread = machine__findnew_thread(machine, event->mmap.pid); 972 thread = machine__findnew_thread(machine, event->mmap.pid);
972 if (thread == NULL) 973 if (thread == NULL)
973 goto out_problem; 974 goto out_problem;
975
976 if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
977 type = MAP__VARIABLE;
978 else
979 type = MAP__FUNCTION;
980
974 map = map__new(&machine->user_dsos, event->mmap.start, 981 map = map__new(&machine->user_dsos, event->mmap.start,
975 event->mmap.len, event->mmap.pgoff, 982 event->mmap.len, event->mmap.pgoff,
976 event->mmap.pid, event->mmap.filename, 983 event->mmap.pid, event->mmap.filename,
977 MAP__FUNCTION); 984 type);
985
978 if (map == NULL) 986 if (map == NULL)
979 goto out_problem; 987 goto out_problem;
980 988
@@ -1097,6 +1105,38 @@ found:
1097 ams->map = al.map; 1105 ams->map = al.map;
1098} 1106}
1099 1107
1108static void ip__resolve_data(struct machine *machine, struct thread *thread,
1109 u8 m, struct addr_map_symbol *ams, u64 addr)
1110{
1111 struct addr_location al;
1112
1113 memset(&al, 0, sizeof(al));
1114
1115 thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, &al,
1116 NULL);
1117 ams->addr = addr;
1118 ams->al_addr = al.addr;
1119 ams->sym = al.sym;
1120 ams->map = al.map;
1121}
1122
1123struct mem_info *machine__resolve_mem(struct machine *machine,
1124 struct thread *thr,
1125 struct perf_sample *sample,
1126 u8 cpumode)
1127{
1128 struct mem_info *mi = zalloc(sizeof(*mi));
1129
1130 if (!mi)
1131 return NULL;
1132
1133 ip__resolve_ams(machine, thr, &mi->iaddr, sample->ip);
1134 ip__resolve_data(machine, thr, cpumode, &mi->daddr, sample->addr);
1135 mi->data_src.val = sample->data_src;
1136
1137 return mi;
1138}
1139
1100struct branch_info *machine__resolve_bstack(struct machine *machine, 1140struct branch_info *machine__resolve_bstack(struct machine *machine,
1101 struct thread *thr, 1141 struct thread *thr,
1102 struct branch_stack *bs) 1142 struct branch_stack *bs)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e0b2c00b2e75..77940680f1fc 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -76,6 +76,9 @@ void machine__delete(struct machine *machine);
76struct branch_info *machine__resolve_bstack(struct machine *machine, 76struct branch_info *machine__resolve_bstack(struct machine *machine,
77 struct thread *thread, 77 struct thread *thread,
78 struct branch_stack *bs); 78 struct branch_stack *bs);
79struct mem_info *machine__resolve_mem(struct machine *machine,
80 struct thread *thread,
81 struct perf_sample *sample, u8 cpumode);
79int machine__resolve_callchain(struct machine *machine, 82int machine__resolve_callchain(struct machine *machine,
80 struct perf_evsel *evsel, 83 struct perf_evsel *evsel,
81 struct thread *thread, 84 struct thread *thread,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ab265c2cfab3..cf1fe01b7e89 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,3 @@
1#define _FILE_OFFSET_BITS 64
2
3#include <linux/kernel.h> 1#include <linux/kernel.h>
4 2
5#include <byteswap.h> 3#include <byteswap.h>
@@ -800,6 +798,12 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
800 798
801 if (sample_type & PERF_SAMPLE_STACK_USER) 799 if (sample_type & PERF_SAMPLE_STACK_USER)
802 stack_user__printf(&sample->user_stack); 800 stack_user__printf(&sample->user_stack);
801
802 if (sample_type & PERF_SAMPLE_WEIGHT)
803 printf("... weight: %" PRIu64 "\n", sample->weight);
804
805 if (sample_type & PERF_SAMPLE_DATA_SRC)
806 printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
803} 807}
804 808
805static struct machine * 809static struct machine *
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index d41926cb9e3f..5f52d492590c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -198,11 +198,19 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
198 } 198 }
199 199
200 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); 200 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
201 if (sym) 201 if (sym && map) {
202 ret += repsep_snprintf(bf + ret, size - ret, "%-*s", 202 if (map->type == MAP__VARIABLE) {
203 width - ret, 203 ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
204 sym->name); 204 ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
205 else { 205 ip - map->unmap_ip(map, sym->start));
206 ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
207 width - ret, "");
208 } else {
209 ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
210 width - ret,
211 sym->name);
212 }
213 } else {
206 size_t len = BITS_PER_LONG / 4; 214 size_t len = BITS_PER_LONG / 4;
207 ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", 215 ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
208 len, ip); 216 len, ip);
@@ -457,6 +465,304 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
457 return repsep_snprintf(bf, size, "%-*s", width, out); 465 return repsep_snprintf(bf, size, "%-*s", width, out);
458} 466}
459 467
468/* --sort daddr_sym */
469static int64_t
470sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
471{
472 uint64_t l = 0, r = 0;
473
474 if (left->mem_info)
475 l = left->mem_info->daddr.addr;
476 if (right->mem_info)
477 r = right->mem_info->daddr.addr;
478
479 return (int64_t)(r - l);
480}
481
482static int hist_entry__daddr_snprintf(struct hist_entry *self, char *bf,
483 size_t size, unsigned int width)
484{
485 uint64_t addr = 0;
486 struct map *map = NULL;
487 struct symbol *sym = NULL;
488
489 if (self->mem_info) {
490 addr = self->mem_info->daddr.addr;
491 map = self->mem_info->daddr.map;
492 sym = self->mem_info->daddr.sym;
493 }
494 return _hist_entry__sym_snprintf(map, sym, addr, self->level, bf, size,
495 width);
496}
497
498static int64_t
499sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
500{
501 struct map *map_l = NULL;
502 struct map *map_r = NULL;
503
504 if (left->mem_info)
505 map_l = left->mem_info->daddr.map;
506 if (right->mem_info)
507 map_r = right->mem_info->daddr.map;
508
509 return _sort__dso_cmp(map_l, map_r);
510}
511
512static int hist_entry__dso_daddr_snprintf(struct hist_entry *self, char *bf,
513 size_t size, unsigned int width)
514{
515 struct map *map = NULL;
516
517 if (self->mem_info)
518 map = self->mem_info->daddr.map;
519
520 return _hist_entry__dso_snprintf(map, bf, size, width);
521}
522
523static int64_t
524sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
525{
526 union perf_mem_data_src data_src_l;
527 union perf_mem_data_src data_src_r;
528
529 if (left->mem_info)
530 data_src_l = left->mem_info->data_src;
531 else
532 data_src_l.mem_lock = PERF_MEM_LOCK_NA;
533
534 if (right->mem_info)
535 data_src_r = right->mem_info->data_src;
536 else
537 data_src_r.mem_lock = PERF_MEM_LOCK_NA;
538
539 return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
540}
541
542static int hist_entry__locked_snprintf(struct hist_entry *self, char *bf,
543 size_t size, unsigned int width)
544{
545 const char *out;
546 u64 mask = PERF_MEM_LOCK_NA;
547
548 if (self->mem_info)
549 mask = self->mem_info->data_src.mem_lock;
550
551 if (mask & PERF_MEM_LOCK_NA)
552 out = "N/A";
553 else if (mask & PERF_MEM_LOCK_LOCKED)
554 out = "Yes";
555 else
556 out = "No";
557
558 return repsep_snprintf(bf, size, "%-*s", width, out);
559}
560
561static int64_t
562sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
563{
564 union perf_mem_data_src data_src_l;
565 union perf_mem_data_src data_src_r;
566
567 if (left->mem_info)
568 data_src_l = left->mem_info->data_src;
569 else
570 data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
571
572 if (right->mem_info)
573 data_src_r = right->mem_info->data_src;
574 else
575 data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
576
577 return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
578}
579
580static const char * const tlb_access[] = {
581 "N/A",
582 "HIT",
583 "MISS",
584 "L1",
585 "L2",
586 "Walker",
587 "Fault",
588};
589#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
590
591static int hist_entry__tlb_snprintf(struct hist_entry *self, char *bf,
592 size_t size, unsigned int width)
593{
594 char out[64];
595 size_t sz = sizeof(out) - 1; /* -1 for null termination */
596 size_t l = 0, i;
597 u64 m = PERF_MEM_TLB_NA;
598 u64 hit, miss;
599
600 out[0] = '\0';
601
602 if (self->mem_info)
603 m = self->mem_info->data_src.mem_dtlb;
604
605 hit = m & PERF_MEM_TLB_HIT;
606 miss = m & PERF_MEM_TLB_MISS;
607
608 /* already taken care of */
609 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
610
611 for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
612 if (!(m & 0x1))
613 continue;
614 if (l) {
615 strcat(out, " or ");
616 l += 4;
617 }
618 strncat(out, tlb_access[i], sz - l);
619 l += strlen(tlb_access[i]);
620 }
621 if (*out == '\0')
622 strcpy(out, "N/A");
623 if (hit)
624 strncat(out, " hit", sz - l);
625 if (miss)
626 strncat(out, " miss", sz - l);
627
628 return repsep_snprintf(bf, size, "%-*s", width, out);
629}
630
631static int64_t
632sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
633{
634 union perf_mem_data_src data_src_l;
635 union perf_mem_data_src data_src_r;
636
637 if (left->mem_info)
638 data_src_l = left->mem_info->data_src;
639 else
640 data_src_l.mem_lvl = PERF_MEM_LVL_NA;
641
642 if (right->mem_info)
643 data_src_r = right->mem_info->data_src;
644 else
645 data_src_r.mem_lvl = PERF_MEM_LVL_NA;
646
647 return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
648}
649
650static const char * const mem_lvl[] = {
651 "N/A",
652 "HIT",
653 "MISS",
654 "L1",
655 "LFB",
656 "L2",
657 "L3",
658 "Local RAM",
659 "Remote RAM (1 hop)",
660 "Remote RAM (2 hops)",
661 "Remote Cache (1 hop)",
662 "Remote Cache (2 hops)",
663 "I/O",
664 "Uncached",
665};
666#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
667
668static int hist_entry__lvl_snprintf(struct hist_entry *self, char *bf,
669 size_t size, unsigned int width)
670{
671 char out[64];
672 size_t sz = sizeof(out) - 1; /* -1 for null termination */
673 size_t i, l = 0;
674 u64 m = PERF_MEM_LVL_NA;
675 u64 hit, miss;
676
677 if (self->mem_info)
678 m = self->mem_info->data_src.mem_lvl;
679
680 out[0] = '\0';
681
682 hit = m & PERF_MEM_LVL_HIT;
683 miss = m & PERF_MEM_LVL_MISS;
684
685 /* already taken care of */
686 m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
687
688 for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
689 if (!(m & 0x1))
690 continue;
691 if (l) {
692 strcat(out, " or ");
693 l += 4;
694 }
695 strncat(out, mem_lvl[i], sz - l);
696 l += strlen(mem_lvl[i]);
697 }
698 if (*out == '\0')
699 strcpy(out, "N/A");
700 if (hit)
701 strncat(out, " hit", sz - l);
702 if (miss)
703 strncat(out, " miss", sz - l);
704
705 return repsep_snprintf(bf, size, "%-*s", width, out);
706}
707
708static int64_t
709sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
710{
711 union perf_mem_data_src data_src_l;
712 union perf_mem_data_src data_src_r;
713
714 if (left->mem_info)
715 data_src_l = left->mem_info->data_src;
716 else
717 data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
718
719 if (right->mem_info)
720 data_src_r = right->mem_info->data_src;
721 else
722 data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
723
724 return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
725}
726
727static const char * const snoop_access[] = {
728 "N/A",
729 "None",
730 "Miss",
731 "Hit",
732 "HitM",
733};
734#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
735
736static int hist_entry__snoop_snprintf(struct hist_entry *self, char *bf,
737 size_t size, unsigned int width)
738{
739 char out[64];
740 size_t sz = sizeof(out) - 1; /* -1 for null termination */
741 size_t i, l = 0;
742 u64 m = PERF_MEM_SNOOP_NA;
743
744 out[0] = '\0';
745
746 if (self->mem_info)
747 m = self->mem_info->data_src.mem_snoop;
748
749 for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
750 if (!(m & 0x1))
751 continue;
752 if (l) {
753 strcat(out, " or ");
754 l += 4;
755 }
756 strncat(out, snoop_access[i], sz - l);
757 l += strlen(snoop_access[i]);
758 }
759
760 if (*out == '\0')
761 strcpy(out, "N/A");
762
763 return repsep_snprintf(bf, size, "%-*s", width, out);
764}
765
460struct sort_entry sort_mispredict = { 766struct sort_entry sort_mispredict = {
461 .se_header = "Branch Mispredicted", 767 .se_header = "Branch Mispredicted",
462 .se_cmp = sort__mispredict_cmp, 768 .se_cmp = sort__mispredict_cmp,
@@ -464,6 +770,91 @@ struct sort_entry sort_mispredict = {
464 .se_width_idx = HISTC_MISPREDICT, 770 .se_width_idx = HISTC_MISPREDICT,
465}; 771};
466 772
773static u64 he_weight(struct hist_entry *he)
774{
775 return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
776}
777
778static int64_t
779sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
780{
781 return he_weight(left) - he_weight(right);
782}
783
784static int hist_entry__local_weight_snprintf(struct hist_entry *self, char *bf,
785 size_t size, unsigned int width)
786{
787 return repsep_snprintf(bf, size, "%-*llu", width, he_weight(self));
788}
789
790struct sort_entry sort_local_weight = {
791 .se_header = "Local Weight",
792 .se_cmp = sort__local_weight_cmp,
793 .se_snprintf = hist_entry__local_weight_snprintf,
794 .se_width_idx = HISTC_LOCAL_WEIGHT,
795};
796
797static int64_t
798sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
799{
800 return left->stat.weight - right->stat.weight;
801}
802
803static int hist_entry__global_weight_snprintf(struct hist_entry *self, char *bf,
804 size_t size, unsigned int width)
805{
806 return repsep_snprintf(bf, size, "%-*llu", width, self->stat.weight);
807}
808
809struct sort_entry sort_global_weight = {
810 .se_header = "Weight",
811 .se_cmp = sort__global_weight_cmp,
812 .se_snprintf = hist_entry__global_weight_snprintf,
813 .se_width_idx = HISTC_GLOBAL_WEIGHT,
814};
815
816struct sort_entry sort_mem_daddr_sym = {
817 .se_header = "Data Symbol",
818 .se_cmp = sort__daddr_cmp,
819 .se_snprintf = hist_entry__daddr_snprintf,
820 .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
821};
822
823struct sort_entry sort_mem_daddr_dso = {
824 .se_header = "Data Object",
825 .se_cmp = sort__dso_daddr_cmp,
826 .se_snprintf = hist_entry__dso_daddr_snprintf,
827 .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
828};
829
830struct sort_entry sort_mem_locked = {
831 .se_header = "Locked",
832 .se_cmp = sort__locked_cmp,
833 .se_snprintf = hist_entry__locked_snprintf,
834 .se_width_idx = HISTC_MEM_LOCKED,
835};
836
837struct sort_entry sort_mem_tlb = {
838 .se_header = "TLB access",
839 .se_cmp = sort__tlb_cmp,
840 .se_snprintf = hist_entry__tlb_snprintf,
841 .se_width_idx = HISTC_MEM_TLB,
842};
843
844struct sort_entry sort_mem_lvl = {
845 .se_header = "Memory access",
846 .se_cmp = sort__lvl_cmp,
847 .se_snprintf = hist_entry__lvl_snprintf,
848 .se_width_idx = HISTC_MEM_LVL,
849};
850
851struct sort_entry sort_mem_snoop = {
852 .se_header = "Snoop",
853 .se_cmp = sort__snoop_cmp,
854 .se_snprintf = hist_entry__snoop_snprintf,
855 .se_width_idx = HISTC_MEM_SNOOP,
856};
857
467struct sort_dimension { 858struct sort_dimension {
468 const char *name; 859 const char *name;
469 struct sort_entry *entry; 860 struct sort_entry *entry;
@@ -480,6 +871,14 @@ static struct sort_dimension common_sort_dimensions[] = {
480 DIM(SORT_PARENT, "parent", sort_parent), 871 DIM(SORT_PARENT, "parent", sort_parent),
481 DIM(SORT_CPU, "cpu", sort_cpu), 872 DIM(SORT_CPU, "cpu", sort_cpu),
482 DIM(SORT_SRCLINE, "srcline", sort_srcline), 873 DIM(SORT_SRCLINE, "srcline", sort_srcline),
874 DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
875 DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
876 DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
877 DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
878 DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
879 DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
880 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
881 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
483}; 882};
484 883
485#undef DIM 884#undef DIM
@@ -516,7 +915,10 @@ int sort_dimension__add(const char *tok)
516 return -EINVAL; 915 return -EINVAL;
517 } 916 }
518 sort__has_parent = 1; 917 sort__has_parent = 1;
519 } else if (sd->entry == &sort_sym) { 918 } else if (sd->entry == &sort_sym ||
919 sd->entry == &sort_sym_from ||
920 sd->entry == &sort_sym_to ||
921 sd->entry == &sort_mem_daddr_sym) {
520 sort__has_sym = 1; 922 sort__has_sym = 1;
521 } 923 }
522 924
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b13e56f6ccbe..f24bdf64238c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -49,6 +49,7 @@ struct he_stat {
49 u64 period_us; 49 u64 period_us;
50 u64 period_guest_sys; 50 u64 period_guest_sys;
51 u64 period_guest_us; 51 u64 period_guest_us;
52 u64 weight;
52 u32 nr_events; 53 u32 nr_events;
53}; 54};
54 55
@@ -100,7 +101,8 @@ struct hist_entry {
100 struct rb_root sorted_chain; 101 struct rb_root sorted_chain;
101 struct branch_info *branch_info; 102 struct branch_info *branch_info;
102 struct hists *hists; 103 struct hists *hists;
103 struct callchain_root callchain[0]; 104 struct mem_info *mem_info;
105 struct callchain_root callchain[0]; /* must be last member */
104}; 106};
105 107
106static inline bool hist_entry__has_pairs(struct hist_entry *he) 108static inline bool hist_entry__has_pairs(struct hist_entry *he)
@@ -130,6 +132,14 @@ enum sort_type {
130 SORT_PARENT, 132 SORT_PARENT,
131 SORT_CPU, 133 SORT_CPU,
132 SORT_SRCLINE, 134 SORT_SRCLINE,
135 SORT_LOCAL_WEIGHT,
136 SORT_GLOBAL_WEIGHT,
137 SORT_MEM_DADDR_SYMBOL,
138 SORT_MEM_DADDR_DSO,
139 SORT_MEM_LOCKED,
140 SORT_MEM_TLB,
141 SORT_MEM_LVL,
142 SORT_MEM_SNOOP,
133 143
134 /* branch stack specific sort keys */ 144 /* branch stack specific sort keys */
135 __SORT_BRANCH_STACK, 145 __SORT_BRANCH_STACK,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 54efcb5659ac..4b12bf850325 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -806,9 +806,12 @@ int dso__load_sym(struct dso *dso, struct map *map,
806 * DWARF DW_compile_unit has this, but we don't always have access 806 * DWARF DW_compile_unit has this, but we don't always have access
807 * to it... 807 * to it...
808 */ 808 */
809 demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); 809 if (symbol_conf.demangle) {
810 if (demangled != NULL) 810 demangled = bfd_demangle(NULL, elf_name,
811 elf_name = demangled; 811 DMGL_PARAMS | DMGL_ANSI);
812 if (demangled != NULL)
813 elf_name = demangled;
814 }
812new_symbol: 815new_symbol:
813 f = symbol__new(sym.st_value, sym.st_size, 816 f = symbol__new(sym.st_value, sym.st_size,
814 GELF_ST_BIND(sym.st_info), elf_name); 817 GELF_ST_BIND(sym.st_info), elf_name);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e6432d85b43d..8cf3b5426a9a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -36,6 +36,7 @@ struct symbol_conf symbol_conf = {
36 .use_modules = true, 36 .use_modules = true,
37 .try_vmlinux_path = true, 37 .try_vmlinux_path = true,
38 .annotate_src = true, 38 .annotate_src = true,
39 .demangle = true,
39 .symfs = "", 40 .symfs = "",
40}; 41};
41 42
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b62ca37c4b77..5f720dc076da 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -97,7 +97,8 @@ struct symbol_conf {
97 kptr_restrict, 97 kptr_restrict,
98 annotate_asm_raw, 98 annotate_asm_raw,
99 annotate_src, 99 annotate_src,
100 event_group; 100 event_group,
101 demangle;
101 const char *vmlinux_name, 102 const char *vmlinux_name,
102 *kallsyms_name, 103 *kallsyms_name,
103 *source_prefix, 104 *source_prefix,
@@ -155,6 +156,12 @@ struct branch_info {
155 struct branch_flags flags; 156 struct branch_flags flags;
156}; 157};
157 158
159struct mem_info {
160 struct addr_map_symbol iaddr;
161 struct addr_map_symbol daddr;
162 union perf_mem_data_src data_src;
163};
164
158struct addr_location { 165struct addr_location {
159 struct thread *thread; 166 struct thread *thread;
160 struct map *map; 167 struct map *map;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 5729f434c5b1..3917eb9a8479 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -43,26 +43,15 @@
43 43
44#define VERSION "0.5" 44#define VERSION "0.5"
45 45
46static const char *output_file = "trace.info";
47static int output_fd; 46static int output_fd;
48 47
49 48
50static void *malloc_or_die(unsigned int size)
51{
52 void *data;
53
54 data = malloc(size);
55 if (!data)
56 die("malloc");
57 return data;
58}
59
60static const char *find_debugfs(void) 49static const char *find_debugfs(void)
61{ 50{
62 const char *path = perf_debugfs_mount(NULL); 51 const char *path = perf_debugfs_mount(NULL);
63 52
64 if (!path) 53 if (!path)
65 die("Your kernel not support debugfs filesystem"); 54 pr_debug("Your kernel does not support the debugfs filesystem");
66 55
67 return path; 56 return path;
68} 57}
@@ -81,8 +70,12 @@ static const char *find_tracing_dir(void)
81 return tracing; 70 return tracing;
82 71
83 debugfs = find_debugfs(); 72 debugfs = find_debugfs();
73 if (!debugfs)
74 return NULL;
84 75
85 tracing = malloc_or_die(strlen(debugfs) + 9); 76 tracing = malloc(strlen(debugfs) + 9);
77 if (!tracing)
78 return NULL;
86 79
87 sprintf(tracing, "%s/tracing", debugfs); 80 sprintf(tracing, "%s/tracing", debugfs);
88 81
@@ -99,7 +92,9 @@ static char *get_tracing_file(const char *name)
99 if (!tracing) 92 if (!tracing)
100 return NULL; 93 return NULL;
101 94
102 file = malloc_or_die(strlen(tracing) + strlen(name) + 2); 95 file = malloc(strlen(tracing) + strlen(name) + 2);
96 if (!file)
97 return NULL;
103 98
104 sprintf(file, "%s/%s", tracing, name); 99 sprintf(file, "%s/%s", tracing, name);
105 return file; 100 return file;
@@ -110,17 +105,6 @@ static void put_tracing_file(char *file)
110 free(file); 105 free(file);
111} 106}
112 107
113static ssize_t write_or_die(const void *buf, size_t len)
114{
115 int ret;
116
117 ret = write(output_fd, buf, len);
118 if (ret < 0)
119 die("writing to '%s'", output_file);
120
121 return ret;
122}
123
124int bigendian(void) 108int bigendian(void)
125{ 109{
126 unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; 110 unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
@@ -131,59 +115,106 @@ int bigendian(void)
131} 115}
132 116
133/* unfortunately, you can not stat debugfs or proc files for size */ 117/* unfortunately, you can not stat debugfs or proc files for size */
134static void record_file(const char *file, size_t hdr_sz) 118static int record_file(const char *file, ssize_t hdr_sz)
135{ 119{
136 unsigned long long size = 0; 120 unsigned long long size = 0;
137 char buf[BUFSIZ], *sizep; 121 char buf[BUFSIZ], *sizep;
138 off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR); 122 off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
139 int r, fd; 123 int r, fd;
124 int err = -EIO;
140 125
141 fd = open(file, O_RDONLY); 126 fd = open(file, O_RDONLY);
142 if (fd < 0) 127 if (fd < 0) {
143 die("Can't read '%s'", file); 128 pr_debug("Can't read '%s'", file);
129 return -errno;
130 }
144 131
145 /* put in zeros for file size, then fill true size later */ 132 /* put in zeros for file size, then fill true size later */
146 if (hdr_sz) 133 if (hdr_sz) {
147 write_or_die(&size, hdr_sz); 134 if (write(output_fd, &size, hdr_sz) != hdr_sz)
135 goto out;
136 }
148 137
149 do { 138 do {
150 r = read(fd, buf, BUFSIZ); 139 r = read(fd, buf, BUFSIZ);
151 if (r > 0) { 140 if (r > 0) {
152 size += r; 141 size += r;
153 write_or_die(buf, r); 142 if (write(output_fd, buf, r) != r)
143 goto out;
154 } 144 }
155 } while (r > 0); 145 } while (r > 0);
156 close(fd);
157 146
158 /* ugh, handle big-endian hdr_size == 4 */ 147 /* ugh, handle big-endian hdr_size == 4 */
159 sizep = (char*)&size; 148 sizep = (char*)&size;
160 if (bigendian()) 149 if (bigendian())
161 sizep += sizeof(u64) - hdr_sz; 150 sizep += sizeof(u64) - hdr_sz;
162 151
163 if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) 152 if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
164 die("writing to %s", output_file); 153 pr_debug("writing file size failed\n");
154 goto out;
155 }
156
157 err = 0;
158out:
159 close(fd);
160 return err;
165} 161}
166 162
167static void read_header_files(void) 163static int read_header_files(void)
168{ 164{
169 char *path; 165 char *path;
170 struct stat st; 166 struct stat st;
167 int err = -EIO;
171 168
172 path = get_tracing_file("events/header_page"); 169 path = get_tracing_file("events/header_page");
173 if (stat(path, &st) < 0) 170 if (!path) {
174 die("can't read '%s'", path); 171 pr_debug("can't get tracing/events/header_page");
172 return -ENOMEM;
173 }
174
175 if (stat(path, &st) < 0) {
176 pr_debug("can't read '%s'", path);
177 goto out;
178 }
179
180 if (write(output_fd, "header_page", 12) != 12) {
181 pr_debug("can't write header_page\n");
182 goto out;
183 }
184
185 if (record_file(path, 8) < 0) {
186 pr_debug("can't record header_page file\n");
187 goto out;
188 }
175 189
176 write_or_die("header_page", 12);
177 record_file(path, 8);
178 put_tracing_file(path); 190 put_tracing_file(path);
179 191
180 path = get_tracing_file("events/header_event"); 192 path = get_tracing_file("events/header_event");
181 if (stat(path, &st) < 0) 193 if (!path) {
182 die("can't read '%s'", path); 194 pr_debug("can't get tracing/events/header_event");
195 err = -ENOMEM;
196 goto out;
197 }
198
199 if (stat(path, &st) < 0) {
200 pr_debug("can't read '%s'", path);
201 goto out;
202 }
203
204 if (write(output_fd, "header_event", 13) != 13) {
205 pr_debug("can't write header_event\n");
206 goto out;
207 }
208
209 if (record_file(path, 8) < 0) {
210 pr_debug("can't record header_event file\n");
211 goto out;
212 }
183 213
184 write_or_die("header_event", 13); 214 err = 0;
185 record_file(path, 8); 215out:
186 put_tracing_file(path); 216 put_tracing_file(path);
217 return err;
187} 218}
188 219
189static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) 220static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -197,7 +228,7 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
197 return false; 228 return false;
198} 229}
199 230
200static void copy_event_system(const char *sys, struct tracepoint_path *tps) 231static int copy_event_system(const char *sys, struct tracepoint_path *tps)
201{ 232{
202 struct dirent *dent; 233 struct dirent *dent;
203 struct stat st; 234 struct stat st;
@@ -205,10 +236,13 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
205 DIR *dir; 236 DIR *dir;
206 int count = 0; 237 int count = 0;
207 int ret; 238 int ret;
239 int err;
208 240
209 dir = opendir(sys); 241 dir = opendir(sys);
210 if (!dir) 242 if (!dir) {
211 die("can't read directory '%s'", sys); 243 pr_debug("can't read directory '%s'", sys);
244 return -errno;
245 }
212 246
213 while ((dent = readdir(dir))) { 247 while ((dent = readdir(dir))) {
214 if (dent->d_type != DT_DIR || 248 if (dent->d_type != DT_DIR ||
@@ -216,7 +250,11 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
216 strcmp(dent->d_name, "..") == 0 || 250 strcmp(dent->d_name, "..") == 0 ||
217 !name_in_tp_list(dent->d_name, tps)) 251 !name_in_tp_list(dent->d_name, tps))
218 continue; 252 continue;
219 format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); 253 format = malloc(strlen(sys) + strlen(dent->d_name) + 10);
254 if (!format) {
255 err = -ENOMEM;
256 goto out;
257 }
220 sprintf(format, "%s/%s/format", sys, dent->d_name); 258 sprintf(format, "%s/%s/format", sys, dent->d_name);
221 ret = stat(format, &st); 259 ret = stat(format, &st);
222 free(format); 260 free(format);
@@ -225,7 +263,11 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
225 count++; 263 count++;
226 } 264 }
227 265
228 write_or_die(&count, 4); 266 if (write(output_fd, &count, 4) != 4) {
267 err = -EIO;
268 pr_debug("can't write count\n");
269 goto out;
270 }
229 271
230 rewinddir(dir); 272 rewinddir(dir);
231 while ((dent = readdir(dir))) { 273 while ((dent = readdir(dir))) {
@@ -234,27 +276,45 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
234 strcmp(dent->d_name, "..") == 0 || 276 strcmp(dent->d_name, "..") == 0 ||
235 !name_in_tp_list(dent->d_name, tps)) 277 !name_in_tp_list(dent->d_name, tps))
236 continue; 278 continue;
237 format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); 279 format = malloc(strlen(sys) + strlen(dent->d_name) + 10);
280 if (!format) {
281 err = -ENOMEM;
282 goto out;
283 }
238 sprintf(format, "%s/%s/format", sys, dent->d_name); 284 sprintf(format, "%s/%s/format", sys, dent->d_name);
239 ret = stat(format, &st); 285 ret = stat(format, &st);
240 286
241 if (ret >= 0) 287 if (ret >= 0) {
242 record_file(format, 8); 288 err = record_file(format, 8);
243 289 if (err) {
290 free(format);
291 goto out;
292 }
293 }
244 free(format); 294 free(format);
245 } 295 }
296 err = 0;
297out:
246 closedir(dir); 298 closedir(dir);
299 return err;
247} 300}
248 301
249static void read_ftrace_files(struct tracepoint_path *tps) 302static int read_ftrace_files(struct tracepoint_path *tps)
250{ 303{
251 char *path; 304 char *path;
305 int ret;
252 306
253 path = get_tracing_file("events/ftrace"); 307 path = get_tracing_file("events/ftrace");
308 if (!path) {
309 pr_debug("can't get tracing/events/ftrace");
310 return -ENOMEM;
311 }
254 312
255 copy_event_system(path, tps); 313 ret = copy_event_system(path, tps);
256 314
257 put_tracing_file(path); 315 put_tracing_file(path);
316
317 return ret;
258} 318}
259 319
260static bool system_in_tp_list(char *sys, struct tracepoint_path *tps) 320static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -268,7 +328,7 @@ static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
268 return false; 328 return false;
269} 329}
270 330
271static void read_event_files(struct tracepoint_path *tps) 331static int read_event_files(struct tracepoint_path *tps)
272{ 332{
273 struct dirent *dent; 333 struct dirent *dent;
274 struct stat st; 334 struct stat st;
@@ -277,12 +337,20 @@ static void read_event_files(struct tracepoint_path *tps)
277 DIR *dir; 337 DIR *dir;
278 int count = 0; 338 int count = 0;
279 int ret; 339 int ret;
340 int err;
280 341
281 path = get_tracing_file("events"); 342 path = get_tracing_file("events");
343 if (!path) {
344 pr_debug("can't get tracing/events");
345 return -ENOMEM;
346 }
282 347
283 dir = opendir(path); 348 dir = opendir(path);
284 if (!dir) 349 if (!dir) {
285 die("can't read directory '%s'", path); 350 err = -errno;
351 pr_debug("can't read directory '%s'", path);
352 goto out;
353 }
286 354
287 while ((dent = readdir(dir))) { 355 while ((dent = readdir(dir))) {
288 if (dent->d_type != DT_DIR || 356 if (dent->d_type != DT_DIR ||
@@ -294,7 +362,11 @@ static void read_event_files(struct tracepoint_path *tps)
294 count++; 362 count++;
295 } 363 }
296 364
297 write_or_die(&count, 4); 365 if (write(output_fd, &count, 4) != 4) {
366 err = -EIO;
367 pr_debug("can't write count\n");
368 goto out;
369 }
298 370
299 rewinddir(dir); 371 rewinddir(dir);
300 while ((dent = readdir(dir))) { 372 while ((dent = readdir(dir))) {
@@ -304,56 +376,90 @@ static void read_event_files(struct tracepoint_path *tps)
304 strcmp(dent->d_name, "ftrace") == 0 || 376 strcmp(dent->d_name, "ftrace") == 0 ||
305 !system_in_tp_list(dent->d_name, tps)) 377 !system_in_tp_list(dent->d_name, tps))
306 continue; 378 continue;
307 sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); 379 sys = malloc(strlen(path) + strlen(dent->d_name) + 2);
380 if (!sys) {
381 err = -ENOMEM;
382 goto out;
383 }
308 sprintf(sys, "%s/%s", path, dent->d_name); 384 sprintf(sys, "%s/%s", path, dent->d_name);
309 ret = stat(sys, &st); 385 ret = stat(sys, &st);
310 if (ret >= 0) { 386 if (ret >= 0) {
311 write_or_die(dent->d_name, strlen(dent->d_name) + 1); 387 ssize_t size = strlen(dent->d_name) + 1;
312 copy_event_system(sys, tps); 388
389 if (write(output_fd, dent->d_name, size) != size ||
390 copy_event_system(sys, tps) < 0) {
391 err = -EIO;
392 free(sys);
393 goto out;
394 }
313 } 395 }
314 free(sys); 396 free(sys);
315 } 397 }
316 398 err = 0;
399out:
317 closedir(dir); 400 closedir(dir);
318 put_tracing_file(path); 401 put_tracing_file(path);
402
403 return err;
319} 404}
320 405
321static void read_proc_kallsyms(void) 406static int read_proc_kallsyms(void)
322{ 407{
323 unsigned int size; 408 unsigned int size;
324 const char *path = "/proc/kallsyms"; 409 const char *path = "/proc/kallsyms";
325 struct stat st; 410 struct stat st;
326 int ret; 411 int ret, err = 0;
327 412
328 ret = stat(path, &st); 413 ret = stat(path, &st);
329 if (ret < 0) { 414 if (ret < 0) {
330 /* not found */ 415 /* not found */
331 size = 0; 416 size = 0;
332 write_or_die(&size, 4); 417 if (write(output_fd, &size, 4) != 4)
333 return; 418 err = -EIO;
419 return err;
334 } 420 }
335 record_file(path, 4); 421 return record_file(path, 4);
336} 422}
337 423
338static void read_ftrace_printk(void) 424static int read_ftrace_printk(void)
339{ 425{
340 unsigned int size; 426 unsigned int size;
341 char *path; 427 char *path;
342 struct stat st; 428 struct stat st;
343 int ret; 429 int ret, err = 0;
344 430
345 path = get_tracing_file("printk_formats"); 431 path = get_tracing_file("printk_formats");
432 if (!path) {
433 pr_debug("can't get tracing/printk_formats");
434 return -ENOMEM;
435 }
436
346 ret = stat(path, &st); 437 ret = stat(path, &st);
347 if (ret < 0) { 438 if (ret < 0) {
348 /* not found */ 439 /* not found */
349 size = 0; 440 size = 0;
350 write_or_die(&size, 4); 441 if (write(output_fd, &size, 4) != 4)
442 err = -EIO;
351 goto out; 443 goto out;
352 } 444 }
353 record_file(path, 4); 445 err = record_file(path, 4);
354 446
355out: 447out:
356 put_tracing_file(path); 448 put_tracing_file(path);
449 return err;
450}
451
452static void
453put_tracepoints_path(struct tracepoint_path *tps)
454{
455 while (tps) {
456 struct tracepoint_path *t = tps;
457
458 tps = tps->next;
459 free(t->name);
460 free(t->system);
461 free(t);
462 }
357} 463}
358 464
359static struct tracepoint_path * 465static struct tracepoint_path *
@@ -368,27 +474,17 @@ get_tracepoints_path(struct list_head *pattrs)
368 continue; 474 continue;
369 ++nr_tracepoints; 475 ++nr_tracepoints;
370 ppath->next = tracepoint_id_to_path(pos->attr.config); 476 ppath->next = tracepoint_id_to_path(pos->attr.config);
371 if (!ppath->next) 477 if (!ppath->next) {
372 die("%s\n", "No memory to alloc tracepoints list"); 478 pr_debug("No memory to alloc tracepoints list\n");
479 put_tracepoints_path(&path);
480 return NULL;
481 }
373 ppath = ppath->next; 482 ppath = ppath->next;
374 } 483 }
375 484
376 return nr_tracepoints > 0 ? path.next : NULL; 485 return nr_tracepoints > 0 ? path.next : NULL;
377} 486}
378 487
379static void
380put_tracepoints_path(struct tracepoint_path *tps)
381{
382 while (tps) {
383 struct tracepoint_path *t = tps;
384
385 tps = tps->next;
386 free(t->name);
387 free(t->system);
388 free(t);
389 }
390}
391
392bool have_tracepoints(struct list_head *pattrs) 488bool have_tracepoints(struct list_head *pattrs)
393{ 489{
394 struct perf_evsel *pos; 490 struct perf_evsel *pos;
@@ -400,9 +496,10 @@ bool have_tracepoints(struct list_head *pattrs)
400 return false; 496 return false;
401} 497}
402 498
403static void tracing_data_header(void) 499static int tracing_data_header(void)
404{ 500{
405 char buf[20]; 501 char buf[20];
502 ssize_t size;
406 503
407 /* just guessing this is someone's birthday.. ;) */ 504 /* just guessing this is someone's birthday.. ;) */
408 buf[0] = 23; 505 buf[0] = 23;
@@ -410,9 +507,12 @@ static void tracing_data_header(void)
410 buf[2] = 68; 507 buf[2] = 68;
411 memcpy(buf + 3, "tracing", 7); 508 memcpy(buf + 3, "tracing", 7);
412 509
413 write_or_die(buf, 10); 510 if (write(output_fd, buf, 10) != 10)
511 return -1;
414 512
415 write_or_die(VERSION, strlen(VERSION) + 1); 513 size = strlen(VERSION) + 1;
514 if (write(output_fd, VERSION, size) != size)
515 return -1;
416 516
417 /* save endian */ 517 /* save endian */
418 if (bigendian()) 518 if (bigendian())
@@ -422,14 +522,19 @@ static void tracing_data_header(void)
422 522
423 read_trace_init(buf[0], buf[0]); 523 read_trace_init(buf[0], buf[0]);
424 524
425 write_or_die(buf, 1); 525 if (write(output_fd, buf, 1) != 1)
526 return -1;
426 527
427 /* save size of long */ 528 /* save size of long */
428 buf[0] = sizeof(long); 529 buf[0] = sizeof(long);
429 write_or_die(buf, 1); 530 if (write(output_fd, buf, 1) != 1)
531 return -1;
430 532
431 /* save page_size */ 533 /* save page_size */
432 write_or_die(&page_size, 4); 534 if (write(output_fd, &page_size, 4) != 4)
535 return -1;
536
537 return 0;
433} 538}
434 539
435struct tracing_data *tracing_data_get(struct list_head *pattrs, 540struct tracing_data *tracing_data_get(struct list_head *pattrs,
@@ -437,6 +542,7 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
437{ 542{
438 struct tracepoint_path *tps; 543 struct tracepoint_path *tps;
439 struct tracing_data *tdata; 544 struct tracing_data *tdata;
545 int err;
440 546
441 output_fd = fd; 547 output_fd = fd;
442 548
@@ -444,7 +550,10 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
444 if (!tps) 550 if (!tps)
445 return NULL; 551 return NULL;
446 552
447 tdata = malloc_or_die(sizeof(*tdata)); 553 tdata = malloc(sizeof(*tdata));
554 if (!tdata)
555 return NULL;
556
448 tdata->temp = temp; 557 tdata->temp = temp;
449 tdata->size = 0; 558 tdata->size = 0;
450 559
@@ -453,12 +562,16 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
453 562
454 snprintf(tdata->temp_file, sizeof(tdata->temp_file), 563 snprintf(tdata->temp_file, sizeof(tdata->temp_file),
455 "/tmp/perf-XXXXXX"); 564 "/tmp/perf-XXXXXX");
456 if (!mkstemp(tdata->temp_file)) 565 if (!mkstemp(tdata->temp_file)) {
457 die("Can't make temp file"); 566 pr_debug("Can't make temp file");
567 return NULL;
568 }
458 569
459 temp_fd = open(tdata->temp_file, O_RDWR); 570 temp_fd = open(tdata->temp_file, O_RDWR);
460 if (temp_fd < 0) 571 if (temp_fd < 0) {
461 die("Can't read '%s'", tdata->temp_file); 572 pr_debug("Can't read '%s'", tdata->temp_file);
573 return NULL;
574 }
462 575
463 /* 576 /*
464 * Set the temp file the default output, so all the 577 * Set the temp file the default output, so all the
@@ -467,13 +580,24 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
467 output_fd = temp_fd; 580 output_fd = temp_fd;
468 } 581 }
469 582
470 tracing_data_header(); 583 err = tracing_data_header();
471 read_header_files(); 584 if (err)
472 read_ftrace_files(tps); 585 goto out;
473 read_event_files(tps); 586 err = read_header_files();
474 read_proc_kallsyms(); 587 if (err)
475 read_ftrace_printk(); 588 goto out;
589 err = read_ftrace_files(tps);
590 if (err)
591 goto out;
592 err = read_event_files(tps);
593 if (err)
594 goto out;
595 err = read_proc_kallsyms();
596 if (err)
597 goto out;
598 err = read_ftrace_printk();
476 599
600out:
477 /* 601 /*
478 * All tracing data are stored by now, we can restore 602 * All tracing data are stored by now, we can restore
479 * the default output file in case we used temp file. 603 * the default output file in case we used temp file.
@@ -484,22 +608,31 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
484 output_fd = fd; 608 output_fd = fd;
485 } 609 }
486 610
611 if (err) {
612 free(tdata);
613 tdata = NULL;
614 }
615
487 put_tracepoints_path(tps); 616 put_tracepoints_path(tps);
488 return tdata; 617 return tdata;
489} 618}
490 619
491void tracing_data_put(struct tracing_data *tdata) 620int tracing_data_put(struct tracing_data *tdata)
492{ 621{
622 int err = 0;
623
493 if (tdata->temp) { 624 if (tdata->temp) {
494 record_file(tdata->temp_file, 0); 625 err = record_file(tdata->temp_file, 0);
495 unlink(tdata->temp_file); 626 unlink(tdata->temp_file);
496 } 627 }
497 628
498 free(tdata); 629 free(tdata);
630 return err;
499} 631}
500 632
501int read_tracing_data(int fd, struct list_head *pattrs) 633int read_tracing_data(int fd, struct list_head *pattrs)
502{ 634{
635 int err;
503 struct tracing_data *tdata; 636 struct tracing_data *tdata;
504 637
505 /* 638 /*
@@ -510,6 +643,6 @@ int read_tracing_data(int fd, struct list_head *pattrs)
510 if (!tdata) 643 if (!tdata)
511 return -ENOMEM; 644 return -ENOMEM;
512 645
513 tracing_data_put(tdata); 646 err = tracing_data_put(tdata);
514 return 0; 647 return err;
515} 648}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 7cb24635adf2..af215c0d2379 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -18,8 +18,6 @@
18 * 18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 20 */
21#define _FILE_OFFSET_BITS 64
22
23#include <dirent.h> 21#include <dirent.h>
24#include <stdio.h> 22#include <stdio.h>
25#include <stdlib.h> 23#include <stdlib.h>
@@ -45,20 +43,10 @@ int file_bigendian;
45int host_bigendian; 43int host_bigendian;
46static int long_size; 44static int long_size;
47 45
48static ssize_t calc_data_size; 46static ssize_t trace_data_size;
49static bool repipe; 47static bool repipe;
50 48
51static void *malloc_or_die(int size) 49static int __do_read(int fd, void *buf, int size)
52{
53 void *ret;
54
55 ret = malloc(size);
56 if (!ret)
57 die("malloc");
58 return ret;
59}
60
61static int do_read(int fd, void *buf, int size)
62{ 50{
63 int rsize = size; 51 int rsize = size;
64 52
@@ -71,8 +59,10 @@ static int do_read(int fd, void *buf, int size)
71 if (repipe) { 59 if (repipe) {
72 int retw = write(STDOUT_FILENO, buf, ret); 60 int retw = write(STDOUT_FILENO, buf, ret);
73 61
74 if (retw <= 0 || retw != ret) 62 if (retw <= 0 || retw != ret) {
75 die("repiping input file"); 63 pr_debug("repiping input file");
64 return -1;
65 }
76 } 66 }
77 67
78 size -= ret; 68 size -= ret;
@@ -82,17 +72,18 @@ static int do_read(int fd, void *buf, int size)
82 return rsize; 72 return rsize;
83} 73}
84 74
85static int read_or_die(void *data, int size) 75static int do_read(void *data, int size)
86{ 76{
87 int r; 77 int r;
88 78
89 r = do_read(input_fd, data, size); 79 r = __do_read(input_fd, data, size);
90 if (r <= 0) 80 if (r <= 0) {
91 die("reading input file (size expected=%d received=%d)", 81 pr_debug("reading input file (size expected=%d received=%d)",
92 size, r); 82 size, r);
83 return -1;
84 }
93 85
94 if (calc_data_size) 86 trace_data_size += r;
95 calc_data_size += r;
96 87
97 return r; 88 return r;
98} 89}
@@ -105,7 +96,7 @@ static void skip(int size)
105 96
106 while (size) { 97 while (size) {
107 r = size > BUFSIZ ? BUFSIZ : size; 98 r = size > BUFSIZ ? BUFSIZ : size;
108 read_or_die(buf, r); 99 do_read(buf, r);
109 size -= r; 100 size -= r;
110 }; 101 };
111} 102}
@@ -114,7 +105,8 @@ static unsigned int read4(struct pevent *pevent)
114{ 105{
115 unsigned int data; 106 unsigned int data;
116 107
117 read_or_die(&data, 4); 108 if (do_read(&data, 4) < 0)
109 return 0;
118 return __data2host4(pevent, data); 110 return __data2host4(pevent, data);
119} 111}
120 112
@@ -122,7 +114,8 @@ static unsigned long long read8(struct pevent *pevent)
122{ 114{
123 unsigned long long data; 115 unsigned long long data;
124 116
125 read_or_die(&data, 8); 117 if (do_read(&data, 8) < 0)
118 return 0;
126 return __data2host8(pevent, data); 119 return __data2host8(pevent, data);
127} 120}
128 121
@@ -136,17 +129,23 @@ static char *read_string(void)
136 129
137 for (;;) { 130 for (;;) {
138 r = read(input_fd, &c, 1); 131 r = read(input_fd, &c, 1);
139 if (r < 0) 132 if (r < 0) {
140 die("reading input file"); 133 pr_debug("reading input file");
134 goto out;
135 }
141 136
142 if (!r) 137 if (!r) {
143 die("no data"); 138 pr_debug("no data");
139 goto out;
140 }
144 141
145 if (repipe) { 142 if (repipe) {
146 int retw = write(STDOUT_FILENO, &c, 1); 143 int retw = write(STDOUT_FILENO, &c, 1);
147 144
148 if (retw <= 0 || retw != r) 145 if (retw <= 0 || retw != r) {
149 die("repiping input file string"); 146 pr_debug("repiping input file string");
147 goto out;
148 }
150 } 149 }
151 150
152 buf[size++] = c; 151 buf[size++] = c;
@@ -155,60 +154,79 @@ static char *read_string(void)
155 break; 154 break;
156 } 155 }
157 156
158 if (calc_data_size) 157 trace_data_size += size;
159 calc_data_size += size;
160
161 str = malloc_or_die(size);
162 memcpy(str, buf, size);
163 158
159 str = malloc(size);
160 if (str)
161 memcpy(str, buf, size);
162out:
164 return str; 163 return str;
165} 164}
166 165
167static void read_proc_kallsyms(struct pevent *pevent) 166static int read_proc_kallsyms(struct pevent *pevent)
168{ 167{
169 unsigned int size; 168 unsigned int size;
170 char *buf; 169 char *buf;
171 170
172 size = read4(pevent); 171 size = read4(pevent);
173 if (!size) 172 if (!size)
174 return; 173 return 0;
175 174
176 buf = malloc_or_die(size + 1); 175 buf = malloc(size + 1);
177 read_or_die(buf, size); 176 if (buf == NULL)
177 return -1;
178
179 if (do_read(buf, size) < 0) {
180 free(buf);
181 return -1;
182 }
178 buf[size] = '\0'; 183 buf[size] = '\0';
179 184
180 parse_proc_kallsyms(pevent, buf, size); 185 parse_proc_kallsyms(pevent, buf, size);
181 186
182 free(buf); 187 free(buf);
188 return 0;
183} 189}
184 190
185static void read_ftrace_printk(struct pevent *pevent) 191static int read_ftrace_printk(struct pevent *pevent)
186{ 192{
187 unsigned int size; 193 unsigned int size;
188 char *buf; 194 char *buf;
189 195
196 /* it can have 0 size */
190 size = read4(pevent); 197 size = read4(pevent);
191 if (!size) 198 if (!size)
192 return; 199 return 0;
193 200
194 buf = malloc_or_die(size); 201 buf = malloc(size);
195 read_or_die(buf, size); 202 if (buf == NULL)
203 return -1;
204
205 if (do_read(buf, size) < 0) {
206 free(buf);
207 return -1;
208 }
196 209
197 parse_ftrace_printk(pevent, buf, size); 210 parse_ftrace_printk(pevent, buf, size);
198 211
199 free(buf); 212 free(buf);
213 return 0;
200} 214}
201 215
202static void read_header_files(struct pevent *pevent) 216static int read_header_files(struct pevent *pevent)
203{ 217{
204 unsigned long long size; 218 unsigned long long size;
205 char *header_event; 219 char *header_event;
206 char buf[BUFSIZ]; 220 char buf[BUFSIZ];
221 int ret = 0;
207 222
208 read_or_die(buf, 12); 223 if (do_read(buf, 12) < 0)
224 return -1;
209 225
210 if (memcmp(buf, "header_page", 12) != 0) 226 if (memcmp(buf, "header_page", 12) != 0) {
211 die("did not read header page"); 227 pr_debug("did not read header page");
228 return -1;
229 }
212 230
213 size = read8(pevent); 231 size = read8(pevent);
214 skip(size); 232 skip(size);
@@ -219,70 +237,107 @@ static void read_header_files(struct pevent *pevent)
219 */ 237 */
220 long_size = header_page_size_size; 238 long_size = header_page_size_size;
221 239
222 read_or_die(buf, 13); 240 if (do_read(buf, 13) < 0)
223 if (memcmp(buf, "header_event", 13) != 0) 241 return -1;
224 die("did not read header event"); 242
243 if (memcmp(buf, "header_event", 13) != 0) {
244 pr_debug("did not read header event");
245 return -1;
246 }
225 247
226 size = read8(pevent); 248 size = read8(pevent);
227 header_event = malloc_or_die(size); 249 header_event = malloc(size);
228 read_or_die(header_event, size); 250 if (header_event == NULL)
251 return -1;
252
253 if (do_read(header_event, size) < 0)
254 ret = -1;
255
229 free(header_event); 256 free(header_event);
257 return ret;
230} 258}
231 259
232static void read_ftrace_file(struct pevent *pevent, unsigned long long size) 260static int read_ftrace_file(struct pevent *pevent, unsigned long long size)
233{ 261{
234 char *buf; 262 char *buf;
235 263
236 buf = malloc_or_die(size); 264 buf = malloc(size);
237 read_or_die(buf, size); 265 if (buf == NULL)
266 return -1;
267
268 if (do_read(buf, size) < 0) {
269 free(buf);
270 return -1;
271 }
272
238 parse_ftrace_file(pevent, buf, size); 273 parse_ftrace_file(pevent, buf, size);
239 free(buf); 274 free(buf);
275 return 0;
240} 276}
241 277
242static void read_event_file(struct pevent *pevent, char *sys, 278static int read_event_file(struct pevent *pevent, char *sys,
243 unsigned long long size) 279 unsigned long long size)
244{ 280{
245 char *buf; 281 char *buf;
246 282
247 buf = malloc_or_die(size); 283 buf = malloc(size);
248 read_or_die(buf, size); 284 if (buf == NULL)
285 return -1;
286
287 if (do_read(buf, size) < 0) {
288 free(buf);
289 return -1;
290 }
291
249 parse_event_file(pevent, buf, size, sys); 292 parse_event_file(pevent, buf, size, sys);
250 free(buf); 293 free(buf);
294 return 0;
251} 295}
252 296
253static void read_ftrace_files(struct pevent *pevent) 297static int read_ftrace_files(struct pevent *pevent)
254{ 298{
255 unsigned long long size; 299 unsigned long long size;
256 int count; 300 int count;
257 int i; 301 int i;
302 int ret;
258 303
259 count = read4(pevent); 304 count = read4(pevent);
260 305
261 for (i = 0; i < count; i++) { 306 for (i = 0; i < count; i++) {
262 size = read8(pevent); 307 size = read8(pevent);
263 read_ftrace_file(pevent, size); 308 ret = read_ftrace_file(pevent, size);
309 if (ret)
310 return ret;
264 } 311 }
312 return 0;
265} 313}
266 314
267static void read_event_files(struct pevent *pevent) 315static int read_event_files(struct pevent *pevent)
268{ 316{
269 unsigned long long size; 317 unsigned long long size;
270 char *sys; 318 char *sys;
271 int systems; 319 int systems;
272 int count; 320 int count;
273 int i,x; 321 int i,x;
322 int ret;
274 323
275 systems = read4(pevent); 324 systems = read4(pevent);
276 325
277 for (i = 0; i < systems; i++) { 326 for (i = 0; i < systems; i++) {
278 sys = read_string(); 327 sys = read_string();
328 if (sys == NULL)
329 return -1;
279 330
280 count = read4(pevent); 331 count = read4(pevent);
332
281 for (x=0; x < count; x++) { 333 for (x=0; x < count; x++) {
282 size = read8(pevent); 334 size = read8(pevent);
283 read_event_file(pevent, sys, size); 335 ret = read_event_file(pevent, sys, size);
336 if (ret)
337 return ret;
284 } 338 }
285 } 339 }
340 return 0;
286} 341}
287 342
288ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) 343ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
@@ -293,58 +348,85 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
293 int show_version = 0; 348 int show_version = 0;
294 int show_funcs = 0; 349 int show_funcs = 0;
295 int show_printk = 0; 350 int show_printk = 0;
296 ssize_t size; 351 ssize_t size = -1;
352 struct pevent *pevent;
353 int err;
297 354
298 calc_data_size = 1; 355 *ppevent = NULL;
299 repipe = __repipe;
300 356
357 repipe = __repipe;
301 input_fd = fd; 358 input_fd = fd;
302 359
303 read_or_die(buf, 3); 360 if (do_read(buf, 3) < 0)
304 if (memcmp(buf, test, 3) != 0) 361 return -1;
305 die("no trace data in the file"); 362 if (memcmp(buf, test, 3) != 0) {
363 pr_debug("no trace data in the file");
364 return -1;
365 }
306 366
307 read_or_die(buf, 7); 367 if (do_read(buf, 7) < 0)
308 if (memcmp(buf, "tracing", 7) != 0) 368 return -1;
309 die("not a trace file (missing 'tracing' tag)"); 369 if (memcmp(buf, "tracing", 7) != 0) {
370 pr_debug("not a trace file (missing 'tracing' tag)");
371 return -1;
372 }
310 373
311 version = read_string(); 374 version = read_string();
375 if (version == NULL)
376 return -1;
312 if (show_version) 377 if (show_version)
313 printf("version = %s\n", version); 378 printf("version = %s\n", version);
314 free(version); 379 free(version);
315 380
316 read_or_die(buf, 1); 381 if (do_read(buf, 1) < 0)
382 return -1;
317 file_bigendian = buf[0]; 383 file_bigendian = buf[0];
318 host_bigendian = bigendian(); 384 host_bigendian = bigendian();
319 385
320 *ppevent = read_trace_init(file_bigendian, host_bigendian); 386 pevent = read_trace_init(file_bigendian, host_bigendian);
321 if (*ppevent == NULL) 387 if (pevent == NULL) {
322 die("read_trace_init failed"); 388 pr_debug("read_trace_init failed");
389 goto out;
390 }
323 391
324 read_or_die(buf, 1); 392 if (do_read(buf, 1) < 0)
393 goto out;
325 long_size = buf[0]; 394 long_size = buf[0];
326 395
327 page_size = read4(*ppevent); 396 page_size = read4(pevent);
328 397 if (!page_size)
329 read_header_files(*ppevent); 398 goto out;
330 399
331 read_ftrace_files(*ppevent); 400 err = read_header_files(pevent);
332 read_event_files(*ppevent); 401 if (err)
333 read_proc_kallsyms(*ppevent); 402 goto out;
334 read_ftrace_printk(*ppevent); 403 err = read_ftrace_files(pevent);
335 404 if (err)
336 size = calc_data_size - 1; 405 goto out;
337 calc_data_size = 0; 406 err = read_event_files(pevent);
407 if (err)
408 goto out;
409 err = read_proc_kallsyms(pevent);
410 if (err)
411 goto out;
412 err = read_ftrace_printk(pevent);
413 if (err)
414 goto out;
415
416 size = trace_data_size;
338 repipe = false; 417 repipe = false;
339 418
340 if (show_funcs) { 419 if (show_funcs) {
341 pevent_print_funcs(*ppevent); 420 pevent_print_funcs(pevent);
342 return size; 421 } else if (show_printk) {
343 } 422 pevent_print_printk(pevent);
344 if (show_printk) {
345 pevent_print_printk(*ppevent);
346 return size;
347 } 423 }
348 424
425 *ppevent = pevent;
426 pevent = NULL;
427
428out:
429 if (pevent)
430 pevent_free(pevent);
349 return size; 431 return size;
350} 432}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 28ccde8ba20f..1978c398ad87 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -68,7 +68,7 @@ struct tracing_data {
68 68
69struct tracing_data *tracing_data_get(struct list_head *pattrs, 69struct tracing_data *tracing_data_get(struct list_head *pattrs,
70 int fd, bool temp); 70 int fd, bool temp);
71void tracing_data_put(struct tracing_data *tdata); 71int tracing_data_put(struct tracing_data *tdata);
72 72
73 73
74struct addr_location; 74struct addr_location;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 6a0781c3a573..a45710b70a55 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -1,8 +1,6 @@
1#ifndef GIT_COMPAT_UTIL_H 1#ifndef GIT_COMPAT_UTIL_H
2#define GIT_COMPAT_UTIL_H 2#define GIT_COMPAT_UTIL_H
3 3
4#define _FILE_OFFSET_BITS 64
5
6#ifndef FLEX_ARRAY 4#ifndef FLEX_ARRAY
7/* 5/*
8 * See if our compiler is known to support flexible array members. 6 * See if our compiler is known to support flexible array members.