summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r--tools/perf/builtin-trace.c637
1 files changed, 498 insertions, 139 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4f0bbffee05f..0f633f0d6be8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,4 +1,3 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/* 1/*
3 * builtin-trace.c 2 * builtin-trace.c
4 * 3 *
@@ -15,6 +14,7 @@
15 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 */ 15 */
17 16
17#include "util/record.h"
18#include <traceevent/event-parse.h> 18#include <traceevent/event-parse.h>
19#include <api/fs/tracing_path.h> 19#include <api/fs/tracing_path.h>
20#include <bpf/bpf.h> 20#include <bpf/bpf.h>
@@ -25,9 +25,12 @@
25#include "util/color.h" 25#include "util/color.h"
26#include "util/config.h" 26#include "util/config.h"
27#include "util/debug.h" 27#include "util/debug.h"
28#include "util/dso.h"
28#include "util/env.h" 29#include "util/env.h"
29#include "util/event.h" 30#include "util/event.h"
30#include "util/evlist.h" 31#include "util/evlist.h"
32#include "util/evswitch.h"
33#include <subcmd/pager.h>
31#include <subcmd/exec-cmd.h> 34#include <subcmd/exec-cmd.h>
32#include "util/machine.h" 35#include "util/machine.h"
33#include "util/map.h" 36#include "util/map.h"
@@ -40,6 +43,8 @@
40#include "util/intlist.h" 43#include "util/intlist.h"
41#include "util/thread_map.h" 44#include "util/thread_map.h"
42#include "util/stat.h" 45#include "util/stat.h"
46#include "util/tool.h"
47#include "util/util.h"
43#include "trace/beauty/beauty.h" 48#include "trace/beauty/beauty.h"
44#include "trace-event.h" 49#include "trace-event.h"
45#include "util/parse-events.h" 50#include "util/parse-events.h"
@@ -49,6 +54,7 @@
49#include "string2.h" 54#include "string2.h"
50#include "syscalltbl.h" 55#include "syscalltbl.h"
51#include "rb_resort.h" 56#include "rb_resort.h"
57#include "../perf.h"
52 58
53#include <errno.h> 59#include <errno.h>
54#include <inttypes.h> 60#include <inttypes.h>
@@ -80,28 +86,34 @@ struct trace {
80 struct perf_tool tool; 86 struct perf_tool tool;
81 struct syscalltbl *sctbl; 87 struct syscalltbl *sctbl;
82 struct { 88 struct {
83 int max;
84 struct syscall *table; 89 struct syscall *table;
85 struct bpf_map *map; 90 struct bpf_map *map;
91 struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
92 struct bpf_map *sys_enter,
93 *sys_exit;
94 } prog_array;
86 struct { 95 struct {
87 struct perf_evsel *sys_enter, 96 struct evsel *sys_enter,
88 *sys_exit, 97 *sys_exit,
89 *augmented; 98 *augmented;
90 } events; 99 } events;
100 struct bpf_program *unaugmented_prog;
91 } syscalls; 101 } syscalls;
92 struct { 102 struct {
93 struct bpf_map *map; 103 struct bpf_map *map;
94 } dump; 104 } dump;
95 struct record_opts opts; 105 struct record_opts opts;
96 struct perf_evlist *evlist; 106 struct evlist *evlist;
97 struct machine *host; 107 struct machine *host;
98 struct thread *current; 108 struct thread *current;
109 struct bpf_object *bpf_obj;
99 struct cgroup *cgroup; 110 struct cgroup *cgroup;
100 u64 base_time; 111 u64 base_time;
101 FILE *output; 112 FILE *output;
102 unsigned long nr_events; 113 unsigned long nr_events;
103 unsigned long nr_events_printed; 114 unsigned long nr_events_printed;
104 unsigned long max_events; 115 unsigned long max_events;
116 struct evswitch evswitch;
105 struct strlist *ev_qualifier; 117 struct strlist *ev_qualifier;
106 struct { 118 struct {
107 size_t nr; 119 size_t nr;
@@ -122,6 +134,7 @@ struct trace {
122 unsigned int min_stack; 134 unsigned int min_stack;
123 int raw_augmented_syscalls_args_size; 135 int raw_augmented_syscalls_args_size;
124 bool raw_augmented_syscalls; 136 bool raw_augmented_syscalls;
137 bool fd_path_disabled;
125 bool sort_events; 138 bool sort_events;
126 bool not_ev_qualifier; 139 bool not_ev_qualifier;
127 bool live; 140 bool live;
@@ -237,7 +250,7 @@ struct syscall_tp {
237 }; 250 };
238}; 251};
239 252
240static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 253static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
241 struct tp_field *field, 254 struct tp_field *field,
242 const char *name) 255 const char *name)
243{ 256{
@@ -253,7 +266,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
253 ({ struct syscall_tp *sc = evsel->priv;\ 266 ({ struct syscall_tp *sc = evsel->priv;\
254 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 267 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
255 268
256static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 269static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
257 struct tp_field *field, 270 struct tp_field *field,
258 const char *name) 271 const char *name)
259{ 272{
@@ -269,13 +282,13 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
269 ({ struct syscall_tp *sc = evsel->priv;\ 282 ({ struct syscall_tp *sc = evsel->priv;\
270 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 283 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
271 284
272static void perf_evsel__delete_priv(struct perf_evsel *evsel) 285static void evsel__delete_priv(struct evsel *evsel)
273{ 286{
274 zfree(&evsel->priv); 287 zfree(&evsel->priv);
275 perf_evsel__delete(evsel); 288 evsel__delete(evsel);
276} 289}
277 290
278static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel) 291static int perf_evsel__init_syscall_tp(struct evsel *evsel)
279{ 292{
280 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); 293 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
281 294
@@ -292,7 +305,7 @@ out_delete:
292 return -ENOENT; 305 return -ENOENT;
293} 306}
294 307
295static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp) 308static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
296{ 309{
297 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); 310 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
298 311
@@ -314,21 +327,21 @@ out_delete:
314 return -EINVAL; 327 return -EINVAL;
315} 328}
316 329
317static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel) 330static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
318{ 331{
319 struct syscall_tp *sc = evsel->priv; 332 struct syscall_tp *sc = evsel->priv;
320 333
321 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); 334 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
322} 335}
323 336
324static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel) 337static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
325{ 338{
326 struct syscall_tp *sc = evsel->priv; 339 struct syscall_tp *sc = evsel->priv;
327 340
328 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); 341 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
329} 342}
330 343
331static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler) 344static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
332{ 345{
333 evsel->priv = malloc(sizeof(struct syscall_tp)); 346 evsel->priv = malloc(sizeof(struct syscall_tp));
334 if (evsel->priv != NULL) { 347 if (evsel->priv != NULL) {
@@ -346,9 +359,9 @@ out_delete:
346 return -ENOENT; 359 return -ENOENT;
347} 360}
348 361
349static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) 362static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
350{ 363{
351 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 364 struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
352 365
353 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 366 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
354 if (IS_ERR(evsel)) 367 if (IS_ERR(evsel))
@@ -363,7 +376,7 @@ static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, v
363 return evsel; 376 return evsel;
364 377
365out_delete: 378out_delete:
366 perf_evsel__delete_priv(evsel); 379 evsel__delete_priv(evsel);
367 return NULL; 380 return NULL;
368} 381}
369 382
@@ -688,6 +701,10 @@ struct syscall_arg_fmt {
688static struct syscall_fmt { 701static struct syscall_fmt {
689 const char *name; 702 const char *name;
690 const char *alias; 703 const char *alias;
704 struct {
705 const char *sys_enter,
706 *sys_exit;
707 } bpf_prog_name;
691 struct syscall_arg_fmt arg[6]; 708 struct syscall_arg_fmt arg[6];
692 u8 nr_args; 709 u8 nr_args;
693 bool errpid; 710 bool errpid;
@@ -700,7 +717,9 @@ static struct syscall_fmt {
700 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ }, 717 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
701 [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, 718 [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
702 { .name = "bind", 719 { .name = "bind",
703 .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, }, 720 .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
721 [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
722 [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
704 { .name = "bpf", 723 { .name = "bpf",
705 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 724 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
706 { .name = "brk", .hexret = true, 725 { .name = "brk", .hexret = true,
@@ -716,7 +735,9 @@ static struct syscall_fmt {
716 { .name = "close", 735 { .name = "close",
717 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, 736 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
718 { .name = "connect", 737 { .name = "connect",
719 .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, }, 738 .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
739 [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
740 [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
720 { .name = "epoll_ctl", 741 { .name = "epoll_ctl",
721 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, 742 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
722 { .name = "eventfd2", 743 { .name = "eventfd2",
@@ -882,6 +903,7 @@ static struct syscall_fmt {
882 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, 903 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
883 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, 904 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
884 { .name = "select", .timeout = true, }, 905 { .name = "select", .timeout = true, },
906 { .name = "sendfile", .alias = "sendfile64", },
885 { .name = "sendmmsg", 907 { .name = "sendmmsg",
886 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 908 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
887 { .name = "sendmsg", 909 { .name = "sendmsg",
@@ -960,13 +982,19 @@ static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
960 * is_exit: is this "exit" or "exit_group"? 982 * is_exit: is this "exit" or "exit_group"?
961 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. 983 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
962 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. 984 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
985 * nonexistent: Just a hole in the syscall table, syscall id not allocated
963 */ 986 */
964struct syscall { 987struct syscall {
965 struct tep_event *tp_format; 988 struct tep_event *tp_format;
966 int nr_args; 989 int nr_args;
967 int args_size; 990 int args_size;
991 struct {
992 struct bpf_program *sys_enter,
993 *sys_exit;
994 } bpf_prog;
968 bool is_exit; 995 bool is_exit;
969 bool is_open; 996 bool is_open;
997 bool nonexistent;
970 struct tep_format_field *args; 998 struct tep_format_field *args;
971 const char *name; 999 const char *name;
972 struct syscall_fmt *fmt; 1000 struct syscall_fmt *fmt;
@@ -1163,7 +1191,7 @@ static const char *thread__fd_path(struct thread *thread, int fd,
1163{ 1191{
1164 struct thread_trace *ttrace = thread__priv(thread); 1192 struct thread_trace *ttrace = thread__priv(thread);
1165 1193
1166 if (ttrace == NULL) 1194 if (ttrace == NULL || trace->fd_path_disabled)
1167 return NULL; 1195 return NULL;
1168 1196
1169 if (fd < 0) 1197 if (fd < 0)
@@ -1359,7 +1387,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l
1359 1387
1360 if (symbol_conf.kptr_restrict) { 1388 if (symbol_conf.kptr_restrict) {
1361 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 1389 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1362 "Check /proc/sys/kernel/kptr_restrict.\n\n" 1390 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1363 "Kernel samples will not be resolved.\n"); 1391 "Kernel samples will not be resolved.\n");
1364 machine->kptr_restrict_warned = true; 1392 machine->kptr_restrict_warned = true;
1365 return NULL; 1393 return NULL;
@@ -1368,7 +1396,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l
1368 return machine__resolve_kernel_addr(vmachine, addrp, modp); 1396 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1369} 1397}
1370 1398
1371static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1399static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1372{ 1400{
1373 int err = symbol__init(NULL); 1401 int err = symbol__init(NULL);
1374 1402
@@ -1384,7 +1412,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1384 goto out; 1412 goto out;
1385 1413
1386 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1414 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1387 evlist->threads, trace__tool_process, false, 1415 evlist->core.threads, trace__tool_process, false,
1388 1); 1416 1);
1389out: 1417out:
1390 if (err) 1418 if (err)
@@ -1471,29 +1499,22 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1471 struct syscall *sc; 1499 struct syscall *sc;
1472 const char *name = syscalltbl__name(trace->sctbl, id); 1500 const char *name = syscalltbl__name(trace->sctbl, id);
1473 1501
1474 if (name == NULL) 1502 if (trace->syscalls.table == NULL) {
1475 return -1; 1503 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1476 1504 if (trace->syscalls.table == NULL)
1477 if (id > trace->syscalls.max) { 1505 return -ENOMEM;
1478 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1506 }
1479
1480 if (nsyscalls == NULL)
1481 return -1;
1482 1507
1483 if (trace->syscalls.max != -1) { 1508 sc = trace->syscalls.table + id;
1484 memset(nsyscalls + trace->syscalls.max + 1, 0, 1509 if (sc->nonexistent)
1485 (id - trace->syscalls.max) * sizeof(*sc)); 1510 return 0;
1486 } else {
1487 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1488 }
1489 1511
1490 trace->syscalls.table = nsyscalls; 1512 if (name == NULL) {
1491 trace->syscalls.max = id; 1513 sc->nonexistent = true;
1514 return 0;
1492 } 1515 }
1493 1516
1494 sc = trace->syscalls.table + id;
1495 sc->name = name; 1517 sc->name = name;
1496
1497 sc->fmt = syscall_fmt__find(sc->name); 1518 sc->fmt = syscall_fmt__find(sc->name);
1498 1519
1499 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1520 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
@@ -1505,10 +1526,10 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1505 } 1526 }
1506 1527
1507 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) 1528 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1508 return -1; 1529 return -ENOMEM;
1509 1530
1510 if (IS_ERR(sc->tp_format)) 1531 if (IS_ERR(sc->tp_format))
1511 return -1; 1532 return PTR_ERR(sc->tp_format);
1512 1533
1513 sc->args = sc->tp_format->format.fields; 1534 sc->args = sc->tp_format->format.fields;
1514 /* 1535 /*
@@ -1527,6 +1548,13 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1527 return syscall__set_arg_fmts(sc); 1548 return syscall__set_arg_fmts(sc);
1528} 1549}
1529 1550
1551static int intcmp(const void *a, const void *b)
1552{
1553 const int *one = a, *another = b;
1554
1555 return *one - *another;
1556}
1557
1530static int trace__validate_ev_qualifier(struct trace *trace) 1558static int trace__validate_ev_qualifier(struct trace *trace)
1531{ 1559{
1532 int err = 0; 1560 int err = 0;
@@ -1590,6 +1618,7 @@ matches:
1590 } 1618 }
1591 1619
1592 trace->ev_qualifier_ids.nr = nr_used; 1620 trace->ev_qualifier_ids.nr = nr_used;
1621 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1593out: 1622out:
1594 if (printed_invalid_prefix) 1623 if (printed_invalid_prefix)
1595 pr_debug("\n"); 1624 pr_debug("\n");
@@ -1600,6 +1629,22 @@ out_free:
1600 goto out; 1629 goto out;
1601} 1630}
1602 1631
1632static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1633{
1634 bool in_ev_qualifier;
1635
1636 if (trace->ev_qualifier_ids.nr == 0)
1637 return true;
1638
1639 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1640 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1641
1642 if (in_ev_qualifier)
1643 return !trace->not_ev_qualifier;
1644
1645 return trace->not_ev_qualifier;
1646}
1647
1603/* 1648/*
1604 * args is to be interpreted as a series of longs but we need to handle 1649 * args is to be interpreted as a series of longs but we need to handle
1605 * 8-byte unaligned accesses. args points to raw_data within the event 1650 * 8-byte unaligned accesses. args points to raw_data within the event
@@ -1738,13 +1783,14 @@ next_arg:
1738 return printed; 1783 return printed;
1739} 1784}
1740 1785
1741typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1786typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
1742 union perf_event *event, 1787 union perf_event *event,
1743 struct perf_sample *sample); 1788 struct perf_sample *sample);
1744 1789
1745static struct syscall *trace__syscall_info(struct trace *trace, 1790static struct syscall *trace__syscall_info(struct trace *trace,
1746 struct perf_evsel *evsel, int id) 1791 struct evsel *evsel, int id)
1747{ 1792{
1793 int err = 0;
1748 1794
1749 if (id < 0) { 1795 if (id < 0) {
1750 1796
@@ -1766,19 +1812,28 @@ static struct syscall *trace__syscall_info(struct trace *trace,
1766 return NULL; 1812 return NULL;
1767 } 1813 }
1768 1814
1769 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1815 err = -EINVAL;
1770 trace__read_syscall_info(trace, id)) 1816
1817 if (id > trace->sctbl->syscalls.max_id)
1818 goto out_cant_read;
1819
1820 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
1821 (err = trace__read_syscall_info(trace, id)) != 0)
1771 goto out_cant_read; 1822 goto out_cant_read;
1772 1823
1773 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1824 if (trace->syscalls.table[id].name == NULL) {
1825 if (trace->syscalls.table[id].nonexistent)
1826 return NULL;
1774 goto out_cant_read; 1827 goto out_cant_read;
1828 }
1775 1829
1776 return &trace->syscalls.table[id]; 1830 return &trace->syscalls.table[id];
1777 1831
1778out_cant_read: 1832out_cant_read:
1779 if (verbose > 0) { 1833 if (verbose > 0) {
1780 fprintf(trace->output, "Problems reading syscall %d", id); 1834 char sbuf[STRERR_BUFSIZE];
1781 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1835 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
1836 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
1782 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1837 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1783 fputs(" information\n", trace->output); 1838 fputs(" information\n", trace->output);
1784 } 1839 }
@@ -1839,7 +1894,7 @@ static int trace__printf_interrupted_entry(struct trace *trace)
1839 return printed; 1894 return printed;
1840} 1895}
1841 1896
1842static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, 1897static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
1843 struct perf_sample *sample, struct thread *thread) 1898 struct perf_sample *sample, struct thread *thread)
1844{ 1899{
1845 int printed = 0; 1900 int printed = 0;
@@ -1882,7 +1937,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
1882 return augmented_args; 1937 return augmented_args;
1883} 1938}
1884 1939
1885static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1940static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
1886 union perf_event *event __maybe_unused, 1941 union perf_event *event __maybe_unused,
1887 struct perf_sample *sample) 1942 struct perf_sample *sample)
1888{ 1943{
@@ -1961,7 +2016,7 @@ out_put:
1961 return err; 2016 return err;
1962} 2017}
1963 2018
1964static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel, 2019static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
1965 struct perf_sample *sample) 2020 struct perf_sample *sample)
1966{ 2021{
1967 struct thread_trace *ttrace; 2022 struct thread_trace *ttrace;
@@ -1994,13 +2049,13 @@ out_put:
1994 return err; 2049 return err;
1995} 2050}
1996 2051
1997static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 2052static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
1998 struct perf_sample *sample, 2053 struct perf_sample *sample,
1999 struct callchain_cursor *cursor) 2054 struct callchain_cursor *cursor)
2000{ 2055{
2001 struct addr_location al; 2056 struct addr_location al;
2002 int max_stack = evsel->attr.sample_max_stack ? 2057 int max_stack = evsel->core.attr.sample_max_stack ?
2003 evsel->attr.sample_max_stack : 2058 evsel->core.attr.sample_max_stack :
2004 trace->max_stack; 2059 trace->max_stack;
2005 int err; 2060 int err;
2006 2061
@@ -2022,7 +2077,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
2022 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 2077 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
2023} 2078}
2024 2079
2025static const char *errno_to_name(struct perf_evsel *evsel, int err) 2080static const char *errno_to_name(struct evsel *evsel, int err)
2026{ 2081{
2027 struct perf_env *env = perf_evsel__env(evsel); 2082 struct perf_env *env = perf_evsel__env(evsel);
2028 const char *arch_name = perf_env__arch(env); 2083 const char *arch_name = perf_env__arch(env);
@@ -2030,7 +2085,7 @@ static const char *errno_to_name(struct perf_evsel *evsel, int err)
2030 return arch_syscalls__strerrno(arch_name, err); 2085 return arch_syscalls__strerrno(arch_name, err);
2031} 2086}
2032 2087
2033static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 2088static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2034 union perf_event *event __maybe_unused, 2089 union perf_event *event __maybe_unused,
2035 struct perf_sample *sample) 2090 struct perf_sample *sample)
2036{ 2091{
@@ -2058,7 +2113,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2058 2113
2059 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 2114 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2060 2115
2061 if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) { 2116 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2062 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 2117 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2063 ttrace->filename.pending_open = false; 2118 ttrace->filename.pending_open = false;
2064 ++trace->stats.vfs_getname; 2119 ++trace->stats.vfs_getname;
@@ -2164,7 +2219,7 @@ out_put:
2164 return err; 2219 return err;
2165} 2220}
2166 2221
2167static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 2222static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2168 union perf_event *event __maybe_unused, 2223 union perf_event *event __maybe_unused,
2169 struct perf_sample *sample) 2224 struct perf_sample *sample)
2170{ 2225{
@@ -2225,7 +2280,7 @@ out:
2225 return 0; 2280 return 0;
2226} 2281}
2227 2282
2228static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 2283static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2229 union perf_event *event __maybe_unused, 2284 union perf_event *event __maybe_unused,
2230 struct perf_sample *sample) 2285 struct perf_sample *sample)
2231{ 2286{
@@ -2287,7 +2342,7 @@ static void bpf_output__fprintf(struct trace *trace,
2287 ++trace->nr_events_printed; 2342 ++trace->nr_events_printed;
2288} 2343}
2289 2344
2290static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 2345static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2291 union perf_event *event __maybe_unused, 2346 union perf_event *event __maybe_unused,
2292 struct perf_sample *sample) 2347 struct perf_sample *sample)
2293{ 2348{
@@ -2353,8 +2408,8 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2353 ++trace->nr_events_printed; 2408 ++trace->nr_events_printed;
2354 2409
2355 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { 2410 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2356 perf_evsel__disable(evsel); 2411 evsel__disable(evsel);
2357 perf_evsel__close(evsel); 2412 evsel__close(evsel);
2358 } 2413 }
2359 } 2414 }
2360 } 2415 }
@@ -2389,7 +2444,7 @@ static void print_location(FILE *f, struct perf_sample *sample,
2389} 2444}
2390 2445
2391static int trace__pgfault(struct trace *trace, 2446static int trace__pgfault(struct trace *trace,
2392 struct perf_evsel *evsel, 2447 struct evsel *evsel,
2393 union perf_event *event __maybe_unused, 2448 union perf_event *event __maybe_unused,
2394 struct perf_sample *sample) 2449 struct perf_sample *sample)
2395{ 2450{
@@ -2415,7 +2470,7 @@ static int trace__pgfault(struct trace *trace,
2415 if (ttrace == NULL) 2470 if (ttrace == NULL)
2416 goto out_put; 2471 goto out_put;
2417 2472
2418 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2473 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2419 ttrace->pfmaj++; 2474 ttrace->pfmaj++;
2420 else 2475 else
2421 ttrace->pfmin++; 2476 ttrace->pfmin++;
@@ -2428,7 +2483,7 @@ static int trace__pgfault(struct trace *trace,
2428 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2483 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2429 2484
2430 fprintf(trace->output, "%sfault [", 2485 fprintf(trace->output, "%sfault [",
2431 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2486 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2432 "maj" : "min"); 2487 "maj" : "min");
2433 2488
2434 print_location(trace->output, sample, &al, false, true); 2489 print_location(trace->output, sample, &al, false, true);
@@ -2464,7 +2519,7 @@ out_put:
2464} 2519}
2465 2520
2466static void trace__set_base_time(struct trace *trace, 2521static void trace__set_base_time(struct trace *trace,
2467 struct perf_evsel *evsel, 2522 struct evsel *evsel,
2468 struct perf_sample *sample) 2523 struct perf_sample *sample)
2469{ 2524{
2470 /* 2525 /*
@@ -2476,14 +2531,14 @@ static void trace__set_base_time(struct trace *trace,
2476 * appears in our event stream (vfs_getname comes to mind). 2531 * appears in our event stream (vfs_getname comes to mind).
2477 */ 2532 */
2478 if (trace->base_time == 0 && !trace->full_time && 2533 if (trace->base_time == 0 && !trace->full_time &&
2479 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2534 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2480 trace->base_time = sample->time; 2535 trace->base_time = sample->time;
2481} 2536}
2482 2537
2483static int trace__process_sample(struct perf_tool *tool, 2538static int trace__process_sample(struct perf_tool *tool,
2484 union perf_event *event, 2539 union perf_event *event,
2485 struct perf_sample *sample, 2540 struct perf_sample *sample,
2486 struct perf_evsel *evsel, 2541 struct evsel *evsel,
2487 struct machine *machine __maybe_unused) 2542 struct machine *machine __maybe_unused)
2488{ 2543{
2489 struct trace *trace = container_of(tool, struct trace, tool); 2544 struct trace *trace = container_of(tool, struct trace, tool);
@@ -2569,10 +2624,10 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
2569 2624
2570static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2625static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2571 2626
2572static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2627static bool evlist__add_vfs_getname(struct evlist *evlist)
2573{ 2628{
2574 bool found = false; 2629 bool found = false;
2575 struct perf_evsel *evsel, *tmp; 2630 struct evsel *evsel, *tmp;
2576 struct parse_events_error err = { .idx = 0, }; 2631 struct parse_events_error err = { .idx = 0, };
2577 int ret = parse_events(evlist, "probe:vfs_getname*", &err); 2632 int ret = parse_events(evlist, "probe:vfs_getname*", &err);
2578 2633
@@ -2589,17 +2644,17 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2589 continue; 2644 continue;
2590 } 2645 }
2591 2646
2592 list_del_init(&evsel->node); 2647 list_del_init(&evsel->core.node);
2593 evsel->evlist = NULL; 2648 evsel->evlist = NULL;
2594 perf_evsel__delete(evsel); 2649 evsel__delete(evsel);
2595 } 2650 }
2596 2651
2597 return found; 2652 return found;
2598} 2653}
2599 2654
2600static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2655static struct evsel *perf_evsel__new_pgfault(u64 config)
2601{ 2656{
2602 struct perf_evsel *evsel; 2657 struct evsel *evsel;
2603 struct perf_event_attr attr = { 2658 struct perf_event_attr attr = {
2604 .type = PERF_TYPE_SOFTWARE, 2659 .type = PERF_TYPE_SOFTWARE,
2605 .mmap_data = 1, 2660 .mmap_data = 1,
@@ -2610,7 +2665,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2610 2665
2611 event_attr_init(&attr); 2666 event_attr_init(&attr);
2612 2667
2613 evsel = perf_evsel__new(&attr); 2668 evsel = evsel__new(&attr);
2614 if (evsel) 2669 if (evsel)
2615 evsel->handler = trace__pgfault; 2670 evsel->handler = trace__pgfault;
2616 2671
@@ -2620,7 +2675,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2620static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2675static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2621{ 2676{
2622 const u32 type = event->header.type; 2677 const u32 type = event->header.type;
2623 struct perf_evsel *evsel; 2678 struct evsel *evsel;
2624 2679
2625 if (type != PERF_RECORD_SAMPLE) { 2680 if (type != PERF_RECORD_SAMPLE) {
2626 trace__process_event(trace, trace->host, event, sample); 2681 trace__process_event(trace, trace->host, event, sample);
@@ -2633,9 +2688,12 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
2633 return; 2688 return;
2634 } 2689 }
2635 2690
2691 if (evswitch__discard(&trace->evswitch, evsel))
2692 return;
2693
2636 trace__set_base_time(trace, evsel, sample); 2694 trace__set_base_time(trace, evsel, sample);
2637 2695
2638 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2696 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
2639 sample->raw_data == NULL) { 2697 sample->raw_data == NULL) {
2640 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2698 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2641 perf_evsel__name(evsel), sample->tid, 2699 perf_evsel__name(evsel), sample->tid,
@@ -2652,8 +2710,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
2652static int trace__add_syscall_newtp(struct trace *trace) 2710static int trace__add_syscall_newtp(struct trace *trace)
2653{ 2711{
2654 int ret = -1; 2712 int ret = -1;
2655 struct perf_evlist *evlist = trace->evlist; 2713 struct evlist *evlist = trace->evlist;
2656 struct perf_evsel *sys_enter, *sys_exit; 2714 struct evsel *sys_enter, *sys_exit;
2657 2715
2658 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter); 2716 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
2659 if (sys_enter == NULL) 2717 if (sys_enter == NULL)
@@ -2672,8 +2730,8 @@ static int trace__add_syscall_newtp(struct trace *trace)
2672 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); 2730 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2673 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); 2731 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2674 2732
2675 perf_evlist__add(evlist, sys_enter); 2733 evlist__add(evlist, sys_enter);
2676 perf_evlist__add(evlist, sys_exit); 2734 evlist__add(evlist, sys_exit);
2677 2735
2678 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2736 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2679 /* 2737 /*
@@ -2681,7 +2739,7 @@ static int trace__add_syscall_newtp(struct trace *trace)
2681 * leading to the syscall, allow overriding that for 2739 * leading to the syscall, allow overriding that for
2682 * debugging reasons using --kernel_syscall_callchains 2740 * debugging reasons using --kernel_syscall_callchains
2683 */ 2741 */
2684 sys_exit->attr.exclude_callchain_kernel = 1; 2742 sys_exit->core.attr.exclude_callchain_kernel = 1;
2685 } 2743 }
2686 2744
2687 trace->syscalls.events.sys_enter = sys_enter; 2745 trace->syscalls.events.sys_enter = sys_enter;
@@ -2692,16 +2750,16 @@ out:
2692 return ret; 2750 return ret;
2693 2751
2694out_delete_sys_exit: 2752out_delete_sys_exit:
2695 perf_evsel__delete_priv(sys_exit); 2753 evsel__delete_priv(sys_exit);
2696out_delete_sys_enter: 2754out_delete_sys_enter:
2697 perf_evsel__delete_priv(sys_enter); 2755 evsel__delete_priv(sys_enter);
2698 goto out; 2756 goto out;
2699} 2757}
2700 2758
2701static int trace__set_ev_qualifier_tp_filter(struct trace *trace) 2759static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
2702{ 2760{
2703 int err = -1; 2761 int err = -1;
2704 struct perf_evsel *sys_exit; 2762 struct evsel *sys_exit;
2705 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2763 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2706 trace->ev_qualifier_ids.nr, 2764 trace->ev_qualifier_ids.nr,
2707 trace->ev_qualifier_ids.entries); 2765 trace->ev_qualifier_ids.entries);
@@ -2724,6 +2782,70 @@ out_enomem:
2724} 2782}
2725 2783
2726#ifdef HAVE_LIBBPF_SUPPORT 2784#ifdef HAVE_LIBBPF_SUPPORT
2785static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
2786{
2787 if (trace->bpf_obj == NULL)
2788 return NULL;
2789
2790 return bpf_object__find_program_by_title(trace->bpf_obj, name);
2791}
2792
2793static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
2794 const char *prog_name, const char *type)
2795{
2796 struct bpf_program *prog;
2797
2798 if (prog_name == NULL) {
2799 char default_prog_name[256];
2800 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
2801 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2802 if (prog != NULL)
2803 goto out_found;
2804 if (sc->fmt && sc->fmt->alias) {
2805 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
2806 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
2807 if (prog != NULL)
2808 goto out_found;
2809 }
2810 goto out_unaugmented;
2811 }
2812
2813 prog = trace__find_bpf_program_by_title(trace, prog_name);
2814
2815 if (prog != NULL) {
2816out_found:
2817 return prog;
2818 }
2819
2820 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
2821 prog_name, type, sc->name);
2822out_unaugmented:
2823 return trace->syscalls.unaugmented_prog;
2824}
2825
2826static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
2827{
2828 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2829
2830 if (sc == NULL)
2831 return;
2832
2833 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
2834 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
2835}
2836
2837static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
2838{
2839 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2840 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2841}
2842
2843static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
2844{
2845 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2846 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
2847}
2848
2727static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) 2849static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
2728{ 2850{
2729 struct syscall *sc = trace__syscall_info(trace, NULL, id); 2851 struct syscall *sc = trace__syscall_info(trace, NULL, id);
@@ -2755,8 +2877,10 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
2755 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { 2877 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
2756 int key = trace->ev_qualifier_ids.entries[i]; 2878 int key = trace->ev_qualifier_ids.entries[i];
2757 2879
2758 if (value.enabled) 2880 if (value.enabled) {
2759 trace__init_bpf_map_syscall_args(trace, key, &value); 2881 trace__init_bpf_map_syscall_args(trace, key, &value);
2882 trace__init_syscall_bpf_progs(trace, key);
2883 }
2760 2884
2761 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); 2885 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
2762 if (err) 2886 if (err)
@@ -2795,6 +2919,186 @@ static int trace__init_syscalls_bpf_map(struct trace *trace)
2795 2919
2796 return __trace__init_syscalls_bpf_map(trace, enabled); 2920 return __trace__init_syscalls_bpf_map(trace, enabled);
2797} 2921}
2922
2923static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
2924{
2925 struct tep_format_field *field, *candidate_field;
2926 int id;
2927
2928 /*
2929 * We're only interested in syscalls that have a pointer:
2930 */
2931 for (field = sc->args; field; field = field->next) {
2932 if (field->flags & TEP_FIELD_IS_POINTER)
2933 goto try_to_find_pair;
2934 }
2935
2936 return NULL;
2937
2938try_to_find_pair:
2939 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
2940 struct syscall *pair = trace__syscall_info(trace, NULL, id);
2941 struct bpf_program *pair_prog;
2942 bool is_candidate = false;
2943
2944 if (pair == NULL || pair == sc ||
2945 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
2946 continue;
2947
2948 for (field = sc->args, candidate_field = pair->args;
2949 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
2950 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
2951 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
2952
2953 if (is_pointer) {
2954 if (!candidate_is_pointer) {
2955 // The candidate just doesn't copies our pointer arg, might copy other pointers we want.
2956 continue;
2957 }
2958 } else {
2959 if (candidate_is_pointer) {
2960 // The candidate might copy a pointer we don't have, skip it.
2961 goto next_candidate;
2962 }
2963 continue;
2964 }
2965
2966 if (strcmp(field->type, candidate_field->type))
2967 goto next_candidate;
2968
2969 is_candidate = true;
2970 }
2971
2972 if (!is_candidate)
2973 goto next_candidate;
2974
2975 /*
2976 * Check if the tentative pair syscall augmenter has more pointers, if it has,
2977 * then it may be collecting that and we then can't use it, as it would collect
2978 * more than what is common to the two syscalls.
2979 */
2980 if (candidate_field) {
2981 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
2982 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
2983 goto next_candidate;
2984 }
2985
2986 pair_prog = pair->bpf_prog.sys_enter;
2987 /*
2988 * If the pair isn't enabled, then its bpf_prog.sys_enter will not
2989 * have been searched for, so search it here and if it returns the
2990 * unaugmented one, then ignore it, otherwise we'll reuse that BPF
2991 * program for a filtered syscall on a non-filtered one.
2992 *
2993 * For instance, we have "!syscalls:sys_enter_renameat" and that is
2994 * useful for "renameat2".
2995 */
2996 if (pair_prog == NULL) {
2997 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
2998 if (pair_prog == trace->syscalls.unaugmented_prog)
2999 goto next_candidate;
3000 }
3001
3002 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3003 return pair_prog;
3004 next_candidate:
3005 continue;
3006 }
3007
3008 return NULL;
3009}
3010
3011static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3012{
3013 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3014 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3015 int err = 0, key;
3016
3017 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3018 int prog_fd;
3019
3020 if (!trace__syscall_enabled(trace, key))
3021 continue;
3022
3023 trace__init_syscall_bpf_progs(trace, key);
3024
3025 // It'll get at least the "!raw_syscalls:unaugmented"
3026 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3027 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3028 if (err)
3029 break;
3030 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3031 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3032 if (err)
3033 break;
3034 }
3035
3036 /*
3037 * Now lets do a second pass looking for enabled syscalls without
3038 * an augmenter that have a signature that is a superset of another
3039 * syscall with an augmenter so that we can auto-reuse it.
3040 *
3041 * I.e. if we have an augmenter for the "open" syscall that has
3042 * this signature:
3043 *
3044 * int open(const char *pathname, int flags, mode_t mode);
3045 *
3046 * I.e. that will collect just the first string argument, then we
3047 * can reuse it for the 'creat' syscall, that has this signature:
3048 *
3049 * int creat(const char *pathname, mode_t mode);
3050 *
3051 * and for:
3052 *
3053 * int stat(const char *pathname, struct stat *statbuf);
3054 * int lstat(const char *pathname, struct stat *statbuf);
3055 *
3056 * Because the 'open' augmenter will collect the first arg as a string,
3057 * and leave alone all the other args, which already helps with
3058 * beautifying 'stat' and 'lstat''s pathname arg.
3059 *
3060 * Then, in time, when 'stat' gets an augmenter that collects both
3061 * first and second arg (this one on the raw_syscalls:sys_exit prog
3062 * array tail call, then that one will be used.
3063 */
3064 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3065 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3066 struct bpf_program *pair_prog;
3067 int prog_fd;
3068
3069 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3070 continue;
3071
3072 /*
3073 * For now we're just reusing the sys_enter prog, and if it
3074 * already has an augmenter, we don't need to find one.
3075 */
3076 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3077 continue;
3078
3079 /*
3080 * Look at all the other syscalls for one that has a signature
3081 * that is close enough that we can share:
3082 */
3083 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3084 if (pair_prog == NULL)
3085 continue;
3086
3087 sc->bpf_prog.sys_enter = pair_prog;
3088
3089 /*
3090 * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3091 * with the fd for the program we're reusing:
3092 */
3093 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3094 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3095 if (err)
3096 break;
3097 }
3098
3099
3100 return err;
3101}
2798#else 3102#else
2799static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) 3103static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
2800{ 3104{
@@ -2805,6 +3109,17 @@ static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
2805{ 3109{
2806 return 0; 3110 return 0;
2807} 3111}
3112
3113static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3114 const char *name __maybe_unused)
3115{
3116 return NULL;
3117}
3118
3119static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3120{
3121 return 0;
3122}
2808#endif // HAVE_LIBBPF_SUPPORT 3123#endif // HAVE_LIBBPF_SUPPORT
2809 3124
2810static int trace__set_ev_qualifier_filter(struct trace *trace) 3125static int trace__set_ev_qualifier_filter(struct trace *trace)
@@ -2879,7 +3194,7 @@ static int trace__set_filter_pids(struct trace *trace)
2879 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, 3194 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
2880 trace->filter_pids.entries); 3195 trace->filter_pids.entries);
2881 } 3196 }
2882 } else if (thread_map__pid(trace->evlist->threads, 0) == -1) { 3197 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
2883 err = trace__set_filter_loop_pids(trace); 3198 err = trace__set_filter_loop_pids(trace);
2884 } 3199 }
2885 3200
@@ -2888,7 +3203,7 @@ static int trace__set_filter_pids(struct trace *trace)
2888 3203
2889static int __trace__deliver_event(struct trace *trace, union perf_event *event) 3204static int __trace__deliver_event(struct trace *trace, union perf_event *event)
2890{ 3205{
2891 struct perf_evlist *evlist = trace->evlist; 3206 struct evlist *evlist = trace->evlist;
2892 struct perf_sample sample; 3207 struct perf_sample sample;
2893 int err; 3208 int err;
2894 3209
@@ -2946,8 +3261,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
2946 3261
2947static int trace__run(struct trace *trace, int argc, const char **argv) 3262static int trace__run(struct trace *trace, int argc, const char **argv)
2948{ 3263{
2949 struct perf_evlist *evlist = trace->evlist; 3264 struct evlist *evlist = trace->evlist;
2950 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 3265 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2951 int err = -1, i; 3266 int err = -1, i;
2952 unsigned long before; 3267 unsigned long before;
2953 const bool forks = argc > 0; 3268 const bool forks = argc > 0;
@@ -2960,7 +3275,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2960 goto out_error_raw_syscalls; 3275 goto out_error_raw_syscalls;
2961 3276
2962 if (trace->trace_syscalls) 3277 if (trace->trace_syscalls)
2963 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 3278 trace->vfs_getname = evlist__add_vfs_getname(evlist);
2964 } 3279 }
2965 3280
2966 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 3281 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
@@ -2968,7 +3283,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2968 if (pgfault_maj == NULL) 3283 if (pgfault_maj == NULL)
2969 goto out_error_mem; 3284 goto out_error_mem;
2970 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 3285 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2971 perf_evlist__add(evlist, pgfault_maj); 3286 evlist__add(evlist, pgfault_maj);
2972 } 3287 }
2973 3288
2974 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 3289 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
@@ -2976,7 +3291,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2976 if (pgfault_min == NULL) 3291 if (pgfault_min == NULL)
2977 goto out_error_mem; 3292 goto out_error_mem;
2978 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 3293 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2979 perf_evlist__add(evlist, pgfault_min); 3294 evlist__add(evlist, pgfault_min);
2980 } 3295 }
2981 3296
2982 if (trace->sched && 3297 if (trace->sched &&
@@ -3038,7 +3353,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
3038 } 3353 }
3039 } 3354 }
3040 3355
3041 err = perf_evlist__open(evlist); 3356 err = evlist__open(evlist);
3042 if (err < 0) 3357 if (err < 0)
3043 goto out_error_open; 3358 goto out_error_open;
3044 3359
@@ -3059,6 +3374,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
3059 if (trace->syscalls.map) 3374 if (trace->syscalls.map)
3060 trace__init_syscalls_bpf_map(trace); 3375 trace__init_syscalls_bpf_map(trace);
3061 3376
3377 if (trace->syscalls.prog_array.sys_enter)
3378 trace__init_syscalls_bpf_prog_array_maps(trace);
3379
3062 if (trace->ev_qualifier_ids.nr > 0) { 3380 if (trace->ev_qualifier_ids.nr > 0) {
3063 err = trace__set_ev_qualifier_filter(trace); 3381 err = trace__set_ev_qualifier_filter(trace);
3064 if (err < 0) 3382 if (err < 0)
@@ -3070,6 +3388,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
3070 } 3388 }
3071 } 3389 }
3072 3390
3391 /*
3392 * If the "close" syscall is not traced, then we will not have the
3393 * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
3394 * fd->pathname table and were ending up showing the last value set by
3395 * syscalls opening a pathname and associating it with a descriptor or
3396 * reading it from /proc/pid/fd/ in cases where that doesn't make
3397 * sense.
3398 *
3399 * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
3400 * not in use.
3401 */
3402 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
3403
3073 err = perf_evlist__apply_filters(evlist, &evsel); 3404 err = perf_evlist__apply_filters(evlist, &evsel);
3074 if (err < 0) 3405 if (err < 0)
3075 goto out_error_apply_filters; 3406 goto out_error_apply_filters;
@@ -3082,30 +3413,30 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
3082 goto out_error_mmap; 3413 goto out_error_mmap;
3083 3414
3084 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 3415 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
3085 perf_evlist__enable(evlist); 3416 evlist__enable(evlist);
3086 3417
3087 if (forks) 3418 if (forks)
3088 perf_evlist__start_workload(evlist); 3419 perf_evlist__start_workload(evlist);
3089 3420
3090 if (trace->opts.initial_delay) { 3421 if (trace->opts.initial_delay) {
3091 usleep(trace->opts.initial_delay * 1000); 3422 usleep(trace->opts.initial_delay * 1000);
3092 perf_evlist__enable(evlist); 3423 evlist__enable(evlist);
3093 } 3424 }
3094 3425
3095 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 3426 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
3096 evlist->threads->nr > 1 || 3427 evlist->core.threads->nr > 1 ||
3097 perf_evlist__first(evlist)->attr.inherit; 3428 perf_evlist__first(evlist)->core.attr.inherit;
3098 3429
3099 /* 3430 /*
3100 * Now that we already used evsel->attr to ask the kernel to setup the 3431 * Now that we already used evsel->core.attr to ask the kernel to setup the
3101 * events, lets reuse evsel->attr.sample_max_stack as the limit in 3432 * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
3102 * trace__resolve_callchain(), allowing per-event max-stack settings 3433 * trace__resolve_callchain(), allowing per-event max-stack settings
3103 * to override an explicitly set --max-stack global setting. 3434 * to override an explicitly set --max-stack global setting.
3104 */ 3435 */
3105 evlist__for_each_entry(evlist, evsel) { 3436 evlist__for_each_entry(evlist, evsel) {
3106 if (evsel__has_callchain(evsel) && 3437 if (evsel__has_callchain(evsel) &&
3107 evsel->attr.sample_max_stack == 0) 3438 evsel->core.attr.sample_max_stack == 0)
3108 evsel->attr.sample_max_stack = trace->max_stack; 3439 evsel->core.attr.sample_max_stack = trace->max_stack;
3109 } 3440 }
3110again: 3441again:
3111 before = trace->nr_events; 3442 before = trace->nr_events;
@@ -3131,7 +3462,7 @@ again:
3131 goto out_disable; 3462 goto out_disable;
3132 3463
3133 if (done && !draining) { 3464 if (done && !draining) {
3134 perf_evlist__disable(evlist); 3465 evlist__disable(evlist);
3135 draining = true; 3466 draining = true;
3136 } 3467 }
3137 } 3468 }
@@ -3157,7 +3488,7 @@ again:
3157out_disable: 3488out_disable:
3158 thread__zput(trace->current); 3489 thread__zput(trace->current);
3159 3490
3160 perf_evlist__disable(evlist); 3491 evlist__disable(evlist);
3161 3492
3162 if (trace->sort_events) 3493 if (trace->sort_events)
3163 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); 3494 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
@@ -3178,7 +3509,7 @@ out_disable:
3178out_delete_evlist: 3509out_delete_evlist:
3179 trace__symbols__exit(trace); 3510 trace__symbols__exit(trace);
3180 3511
3181 perf_evlist__delete(evlist); 3512 evlist__delete(evlist);
3182 cgroup__put(trace->cgroup); 3513 cgroup__put(trace->cgroup);
3183 trace->evlist = NULL; 3514 trace->evlist = NULL;
3184 trace->live = false; 3515 trace->live = false;
@@ -3223,7 +3554,7 @@ out_errno:
3223 3554
3224static int trace__replay(struct trace *trace) 3555static int trace__replay(struct trace *trace)
3225{ 3556{
3226 const struct perf_evsel_str_handler handlers[] = { 3557 const struct evsel_str_handler handlers[] = {
3227 { "probe:vfs_getname", trace__vfs_getname, }, 3558 { "probe:vfs_getname", trace__vfs_getname, },
3228 }; 3559 };
3229 struct perf_data data = { 3560 struct perf_data data = {
@@ -3232,7 +3563,7 @@ static int trace__replay(struct trace *trace)
3232 .force = trace->force, 3563 .force = trace->force,
3233 }; 3564 };
3234 struct perf_session *session; 3565 struct perf_session *session;
3235 struct perf_evsel *evsel; 3566 struct evsel *evsel;
3236 int err = -1; 3567 int err = -1;
3237 3568
3238 trace->tool.sample = trace__process_sample; 3569 trace->tool.sample = trace__process_sample;
@@ -3298,10 +3629,10 @@ static int trace__replay(struct trace *trace)
3298 } 3629 }
3299 3630
3300 evlist__for_each_entry(session->evlist, evsel) { 3631 evlist__for_each_entry(session->evlist, evsel) {
3301 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 3632 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
3302 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 3633 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
3303 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 3634 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
3304 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 3635 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
3305 evsel->handler = trace__pgfault; 3636 evsel->handler = trace__pgfault;
3306 } 3637 }
3307 3638
@@ -3523,17 +3854,17 @@ static int parse_pagefaults(const struct option *opt, const char *str,
3523 return 0; 3854 return 0;
3524} 3855}
3525 3856
3526static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 3857static void evlist__set_evsel_handler(struct evlist *evlist, void *handler)
3527{ 3858{
3528 struct perf_evsel *evsel; 3859 struct evsel *evsel;
3529 3860
3530 evlist__for_each_entry(evlist, evsel) 3861 evlist__for_each_entry(evlist, evsel)
3531 evsel->handler = handler; 3862 evsel->handler = handler;
3532} 3863}
3533 3864
3534static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist) 3865static int evlist__set_syscall_tp_fields(struct evlist *evlist)
3535{ 3866{
3536 struct perf_evsel *evsel; 3867 struct evsel *evsel;
3537 3868
3538 evlist__for_each_entry(evlist, evsel) { 3869 evlist__for_each_entry(evlist, evsel) {
3539 if (evsel->priv || !evsel->tp_format) 3870 if (evsel->priv || !evsel->tp_format)
@@ -3660,7 +3991,7 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
3660{ 3991{
3661 struct trace *trace = opt->value; 3992 struct trace *trace = opt->value;
3662 3993
3663 if (!list_empty(&trace->evlist->entries)) 3994 if (!list_empty(&trace->evlist->core.entries))
3664 return parse_cgroups(opt, str, unset); 3995 return parse_cgroups(opt, str, unset);
3665 3996
3666 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); 3997 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
@@ -3668,28 +3999,24 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
3668 return 0; 3999 return 0;
3669} 4000}
3670 4001
3671static struct bpf_map *bpf__find_map_by_name(const char *name) 4002static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3672{ 4003{
3673 struct bpf_object *obj, *tmp; 4004 if (trace->bpf_obj == NULL)
3674 4005 return NULL;
3675 bpf_object__for_each_safe(obj, tmp) {
3676 struct bpf_map *map = bpf_object__find_map_by_name(obj, name);
3677 if (map)
3678 return map;
3679
3680 }
3681 4006
3682 return NULL; 4007 return bpf_object__find_map_by_name(trace->bpf_obj, name);
3683} 4008}
3684 4009
3685static void trace__set_bpf_map_filtered_pids(struct trace *trace) 4010static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3686{ 4011{
3687 trace->filter_pids.map = bpf__find_map_by_name("pids_filtered"); 4012 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3688} 4013}
3689 4014
3690static void trace__set_bpf_map_syscalls(struct trace *trace) 4015static void trace__set_bpf_map_syscalls(struct trace *trace)
3691{ 4016{
3692 trace->syscalls.map = bpf__find_map_by_name("syscalls"); 4017 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
4018 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
4019 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3693} 4020}
3694 4021
3695static int trace__config(const char *var, const char *value, void *arg) 4022static int trace__config(const char *var, const char *value, void *arg)
@@ -3745,9 +4072,6 @@ int cmd_trace(int argc, const char **argv)
3745 NULL 4072 NULL
3746 }; 4073 };
3747 struct trace trace = { 4074 struct trace trace = {
3748 .syscalls = {
3749 . max = -1,
3750 },
3751 .opts = { 4075 .opts = {
3752 .target = { 4076 .target = {
3753 .uid = UINT_MAX, 4077 .uid = UINT_MAX,
@@ -3844,11 +4168,12 @@ int cmd_trace(int argc, const char **argv)
3844 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 4168 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3845 "ms to wait before starting measurement after program " 4169 "ms to wait before starting measurement after program "
3846 "start"), 4170 "start"),
4171 OPTS_EVSWITCH(&trace.evswitch),
3847 OPT_END() 4172 OPT_END()
3848 }; 4173 };
3849 bool __maybe_unused max_stack_user_set = true; 4174 bool __maybe_unused max_stack_user_set = true;
3850 bool mmap_pages_user_set = true; 4175 bool mmap_pages_user_set = true;
3851 struct perf_evsel *evsel; 4176 struct evsel *evsel;
3852 const char * const trace_subcommands[] = { "record", NULL }; 4177 const char * const trace_subcommands[] = { "record", NULL };
3853 int err = -1; 4178 int err = -1;
3854 char bf[BUFSIZ]; 4179 char bf[BUFSIZ];
@@ -3856,7 +4181,7 @@ int cmd_trace(int argc, const char **argv)
3856 signal(SIGSEGV, sighandler_dump_stack); 4181 signal(SIGSEGV, sighandler_dump_stack);
3857 signal(SIGFPE, sighandler_dump_stack); 4182 signal(SIGFPE, sighandler_dump_stack);
3858 4183
3859 trace.evlist = perf_evlist__new(); 4184 trace.evlist = evlist__new();
3860 trace.sctbl = syscalltbl__new(); 4185 trace.sctbl = syscalltbl__new();
3861 4186
3862 if (trace.evlist == NULL || trace.sctbl == NULL) { 4187 if (trace.evlist == NULL || trace.sctbl == NULL) {
@@ -3895,8 +4220,23 @@ int cmd_trace(int argc, const char **argv)
3895 4220
3896 if (evsel) { 4221 if (evsel) {
3897 trace.syscalls.events.augmented = evsel; 4222 trace.syscalls.events.augmented = evsel;
4223
4224 evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4225 if (evsel == NULL) {
4226 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4227 goto out;
4228 }
4229
4230 if (evsel->bpf_obj == NULL) {
4231 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4232 goto out;
4233 }
4234
4235 trace.bpf_obj = evsel->bpf_obj;
4236
3898 trace__set_bpf_map_filtered_pids(&trace); 4237 trace__set_bpf_map_filtered_pids(&trace);
3899 trace__set_bpf_map_syscalls(&trace); 4238 trace__set_bpf_map_syscalls(&trace);
4239 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
3900 } 4240 }
3901 4241
3902 err = bpf__setup_stdout(trace.evlist); 4242 err = bpf__setup_stdout(trace.evlist);
@@ -3909,7 +4249,7 @@ int cmd_trace(int argc, const char **argv)
3909 err = -1; 4249 err = -1;
3910 4250
3911 if (map_dump_str) { 4251 if (map_dump_str) {
3912 trace.dump.map = bpf__find_map_by_name(map_dump_str); 4252 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
3913 if (trace.dump.map == NULL) { 4253 if (trace.dump.map == NULL) {
3914 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); 4254 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
3915 goto out; 4255 goto out;
@@ -3942,7 +4282,7 @@ int cmd_trace(int argc, const char **argv)
3942 symbol_conf.use_callchain = true; 4282 symbol_conf.use_callchain = true;
3943 } 4283 }
3944 4284
3945 if (trace.evlist->nr_entries > 0) { 4285 if (trace.evlist->core.nr_entries > 0) {
3946 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 4286 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3947 if (evlist__set_syscall_tp_fields(trace.evlist)) { 4287 if (evlist__set_syscall_tp_fields(trace.evlist)) {
3948 perror("failed to set syscalls:* tracepoint fields"); 4288 perror("failed to set syscalls:* tracepoint fields");
@@ -3977,11 +4317,26 @@ int cmd_trace(int argc, const char **argv)
3977 4317
3978 if (trace.syscalls.events.augmented->priv == NULL && 4318 if (trace.syscalls.events.augmented->priv == NULL &&
3979 strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { 4319 strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
3980 struct perf_evsel *augmented = trace.syscalls.events.augmented; 4320 struct evsel *augmented = trace.syscalls.events.augmented;
3981 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || 4321 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
3982 perf_evsel__init_augmented_syscall_tp_args(augmented)) 4322 perf_evsel__init_augmented_syscall_tp_args(augmented))
3983 goto out; 4323 goto out;
4324 /*
4325 * Augmented is __augmented_syscalls__ BPF_OUTPUT event
4326 * Above we made sure we can get from the payload the tp fields
4327 * that we get from syscalls:sys_enter tracefs format file.
4328 */
3984 augmented->handler = trace__sys_enter; 4329 augmented->handler = trace__sys_enter;
4330 /*
4331 * Now we do the same for the *syscalls:sys_enter event so that
4332 * if we handle it directly, i.e. if the BPF prog returns 0 so
4333 * as not to filter it, then we'll handle it just like we would
4334 * for the BPF_OUTPUT one:
4335 */
4336 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
4337 perf_evsel__init_augmented_syscall_tp_args(evsel))
4338 goto out;
4339 evsel->handler = trace__sys_enter;
3985 } 4340 }
3986 4341
3987 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { 4342 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
@@ -4025,7 +4380,7 @@ init_augmented_syscall_tp:
4025 trace.summary = trace.summary_only; 4380 trace.summary = trace.summary_only;
4026 4381
4027 if (!trace.trace_syscalls && !trace.trace_pgfaults && 4382 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4028 trace.evlist->nr_entries == 0 /* Was --events used? */) { 4383 trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
4029 trace.trace_syscalls = true; 4384 trace.trace_syscalls = true;
4030 } 4385 }
4031 4386
@@ -4037,6 +4392,10 @@ init_augmented_syscall_tp:
4037 } 4392 }
4038 } 4393 }
4039 4394
4395 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
4396 if (err)
4397 goto out_close;
4398
4040 err = target__validate(&trace.opts.target); 4399 err = target__validate(&trace.opts.target);
4041 if (err) { 4400 if (err) {
4042 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 4401 target__strerror(&trace.opts.target, err, bf, sizeof(bf));