diff options
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r-- | tools/perf/builtin-trace.c | 637 |
1 files changed, 498 insertions, 139 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4f0bbffee05f..0f633f0d6be8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -1,4 +1,3 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | 1 | /* |
3 | * builtin-trace.c | 2 | * builtin-trace.c |
4 | * | 3 | * |
@@ -15,6 +14,7 @@ | |||
15 | * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") | 14 | * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") |
16 | */ | 15 | */ |
17 | 16 | ||
17 | #include "util/record.h" | ||
18 | #include <traceevent/event-parse.h> | 18 | #include <traceevent/event-parse.h> |
19 | #include <api/fs/tracing_path.h> | 19 | #include <api/fs/tracing_path.h> |
20 | #include <bpf/bpf.h> | 20 | #include <bpf/bpf.h> |
@@ -25,9 +25,12 @@ | |||
25 | #include "util/color.h" | 25 | #include "util/color.h" |
26 | #include "util/config.h" | 26 | #include "util/config.h" |
27 | #include "util/debug.h" | 27 | #include "util/debug.h" |
28 | #include "util/dso.h" | ||
28 | #include "util/env.h" | 29 | #include "util/env.h" |
29 | #include "util/event.h" | 30 | #include "util/event.h" |
30 | #include "util/evlist.h" | 31 | #include "util/evlist.h" |
32 | #include "util/evswitch.h" | ||
33 | #include <subcmd/pager.h> | ||
31 | #include <subcmd/exec-cmd.h> | 34 | #include <subcmd/exec-cmd.h> |
32 | #include "util/machine.h" | 35 | #include "util/machine.h" |
33 | #include "util/map.h" | 36 | #include "util/map.h" |
@@ -40,6 +43,8 @@ | |||
40 | #include "util/intlist.h" | 43 | #include "util/intlist.h" |
41 | #include "util/thread_map.h" | 44 | #include "util/thread_map.h" |
42 | #include "util/stat.h" | 45 | #include "util/stat.h" |
46 | #include "util/tool.h" | ||
47 | #include "util/util.h" | ||
43 | #include "trace/beauty/beauty.h" | 48 | #include "trace/beauty/beauty.h" |
44 | #include "trace-event.h" | 49 | #include "trace-event.h" |
45 | #include "util/parse-events.h" | 50 | #include "util/parse-events.h" |
@@ -49,6 +54,7 @@ | |||
49 | #include "string2.h" | 54 | #include "string2.h" |
50 | #include "syscalltbl.h" | 55 | #include "syscalltbl.h" |
51 | #include "rb_resort.h" | 56 | #include "rb_resort.h" |
57 | #include "../perf.h" | ||
52 | 58 | ||
53 | #include <errno.h> | 59 | #include <errno.h> |
54 | #include <inttypes.h> | 60 | #include <inttypes.h> |
@@ -80,28 +86,34 @@ struct trace { | |||
80 | struct perf_tool tool; | 86 | struct perf_tool tool; |
81 | struct syscalltbl *sctbl; | 87 | struct syscalltbl *sctbl; |
82 | struct { | 88 | struct { |
83 | int max; | ||
84 | struct syscall *table; | 89 | struct syscall *table; |
85 | struct bpf_map *map; | 90 | struct bpf_map *map; |
91 | struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY | ||
92 | struct bpf_map *sys_enter, | ||
93 | *sys_exit; | ||
94 | } prog_array; | ||
86 | struct { | 95 | struct { |
87 | struct perf_evsel *sys_enter, | 96 | struct evsel *sys_enter, |
88 | *sys_exit, | 97 | *sys_exit, |
89 | *augmented; | 98 | *augmented; |
90 | } events; | 99 | } events; |
100 | struct bpf_program *unaugmented_prog; | ||
91 | } syscalls; | 101 | } syscalls; |
92 | struct { | 102 | struct { |
93 | struct bpf_map *map; | 103 | struct bpf_map *map; |
94 | } dump; | 104 | } dump; |
95 | struct record_opts opts; | 105 | struct record_opts opts; |
96 | struct perf_evlist *evlist; | 106 | struct evlist *evlist; |
97 | struct machine *host; | 107 | struct machine *host; |
98 | struct thread *current; | 108 | struct thread *current; |
109 | struct bpf_object *bpf_obj; | ||
99 | struct cgroup *cgroup; | 110 | struct cgroup *cgroup; |
100 | u64 base_time; | 111 | u64 base_time; |
101 | FILE *output; | 112 | FILE *output; |
102 | unsigned long nr_events; | 113 | unsigned long nr_events; |
103 | unsigned long nr_events_printed; | 114 | unsigned long nr_events_printed; |
104 | unsigned long max_events; | 115 | unsigned long max_events; |
116 | struct evswitch evswitch; | ||
105 | struct strlist *ev_qualifier; | 117 | struct strlist *ev_qualifier; |
106 | struct { | 118 | struct { |
107 | size_t nr; | 119 | size_t nr; |
@@ -122,6 +134,7 @@ struct trace { | |||
122 | unsigned int min_stack; | 134 | unsigned int min_stack; |
123 | int raw_augmented_syscalls_args_size; | 135 | int raw_augmented_syscalls_args_size; |
124 | bool raw_augmented_syscalls; | 136 | bool raw_augmented_syscalls; |
137 | bool fd_path_disabled; | ||
125 | bool sort_events; | 138 | bool sort_events; |
126 | bool not_ev_qualifier; | 139 | bool not_ev_qualifier; |
127 | bool live; | 140 | bool live; |
@@ -237,7 +250,7 @@ struct syscall_tp { | |||
237 | }; | 250 | }; |
238 | }; | 251 | }; |
239 | 252 | ||
240 | static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, | 253 | static int perf_evsel__init_tp_uint_field(struct evsel *evsel, |
241 | struct tp_field *field, | 254 | struct tp_field *field, |
242 | const char *name) | 255 | const char *name) |
243 | { | 256 | { |
@@ -253,7 +266,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, | |||
253 | ({ struct syscall_tp *sc = evsel->priv;\ | 266 | ({ struct syscall_tp *sc = evsel->priv;\ |
254 | perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) | 267 | perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) |
255 | 268 | ||
256 | static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, | 269 | static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, |
257 | struct tp_field *field, | 270 | struct tp_field *field, |
258 | const char *name) | 271 | const char *name) |
259 | { | 272 | { |
@@ -269,13 +282,13 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, | |||
269 | ({ struct syscall_tp *sc = evsel->priv;\ | 282 | ({ struct syscall_tp *sc = evsel->priv;\ |
270 | perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) | 283 | perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) |
271 | 284 | ||
272 | static void perf_evsel__delete_priv(struct perf_evsel *evsel) | 285 | static void evsel__delete_priv(struct evsel *evsel) |
273 | { | 286 | { |
274 | zfree(&evsel->priv); | 287 | zfree(&evsel->priv); |
275 | perf_evsel__delete(evsel); | 288 | evsel__delete(evsel); |
276 | } | 289 | } |
277 | 290 | ||
278 | static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel) | 291 | static int perf_evsel__init_syscall_tp(struct evsel *evsel) |
279 | { | 292 | { |
280 | struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); | 293 | struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); |
281 | 294 | ||
@@ -292,7 +305,7 @@ out_delete: | |||
292 | return -ENOENT; | 305 | return -ENOENT; |
293 | } | 306 | } |
294 | 307 | ||
295 | static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp) | 308 | static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) |
296 | { | 309 | { |
297 | struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); | 310 | struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); |
298 | 311 | ||
@@ -314,21 +327,21 @@ out_delete: | |||
314 | return -EINVAL; | 327 | return -EINVAL; |
315 | } | 328 | } |
316 | 329 | ||
317 | static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel) | 330 | static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel) |
318 | { | 331 | { |
319 | struct syscall_tp *sc = evsel->priv; | 332 | struct syscall_tp *sc = evsel->priv; |
320 | 333 | ||
321 | return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); | 334 | return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); |
322 | } | 335 | } |
323 | 336 | ||
324 | static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel) | 337 | static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) |
325 | { | 338 | { |
326 | struct syscall_tp *sc = evsel->priv; | 339 | struct syscall_tp *sc = evsel->priv; |
327 | 340 | ||
328 | return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); | 341 | return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); |
329 | } | 342 | } |
330 | 343 | ||
331 | static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler) | 344 | static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) |
332 | { | 345 | { |
333 | evsel->priv = malloc(sizeof(struct syscall_tp)); | 346 | evsel->priv = malloc(sizeof(struct syscall_tp)); |
334 | if (evsel->priv != NULL) { | 347 | if (evsel->priv != NULL) { |
@@ -346,9 +359,9 @@ out_delete: | |||
346 | return -ENOENT; | 359 | return -ENOENT; |
347 | } | 360 | } |
348 | 361 | ||
349 | static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) | 362 | static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) |
350 | { | 363 | { |
351 | struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); | 364 | struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); |
352 | 365 | ||
353 | /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ | 366 | /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ |
354 | if (IS_ERR(evsel)) | 367 | if (IS_ERR(evsel)) |
@@ -363,7 +376,7 @@ static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, v | |||
363 | return evsel; | 376 | return evsel; |
364 | 377 | ||
365 | out_delete: | 378 | out_delete: |
366 | perf_evsel__delete_priv(evsel); | 379 | evsel__delete_priv(evsel); |
367 | return NULL; | 380 | return NULL; |
368 | } | 381 | } |
369 | 382 | ||
@@ -688,6 +701,10 @@ struct syscall_arg_fmt { | |||
688 | static struct syscall_fmt { | 701 | static struct syscall_fmt { |
689 | const char *name; | 702 | const char *name; |
690 | const char *alias; | 703 | const char *alias; |
704 | struct { | ||
705 | const char *sys_enter, | ||
706 | *sys_exit; | ||
707 | } bpf_prog_name; | ||
691 | struct syscall_arg_fmt arg[6]; | 708 | struct syscall_arg_fmt arg[6]; |
692 | u8 nr_args; | 709 | u8 nr_args; |
693 | bool errpid; | 710 | bool errpid; |
@@ -700,7 +717,9 @@ static struct syscall_fmt { | |||
700 | .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ }, | 717 | .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ }, |
701 | [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, | 718 | [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, |
702 | { .name = "bind", | 719 | { .name = "bind", |
703 | .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, }, | 720 | .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, |
721 | [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, | ||
722 | [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, | ||
704 | { .name = "bpf", | 723 | { .name = "bpf", |
705 | .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, | 724 | .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, |
706 | { .name = "brk", .hexret = true, | 725 | { .name = "brk", .hexret = true, |
@@ -716,7 +735,9 @@ static struct syscall_fmt { | |||
716 | { .name = "close", | 735 | { .name = "close", |
717 | .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, | 736 | .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, |
718 | { .name = "connect", | 737 | { .name = "connect", |
719 | .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, }, | 738 | .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, |
739 | [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, | ||
740 | [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, | ||
720 | { .name = "epoll_ctl", | 741 | { .name = "epoll_ctl", |
721 | .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, | 742 | .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, |
722 | { .name = "eventfd2", | 743 | { .name = "eventfd2", |
@@ -882,6 +903,7 @@ static struct syscall_fmt { | |||
882 | .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, | 903 | .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, |
883 | [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, | 904 | [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, |
884 | { .name = "select", .timeout = true, }, | 905 | { .name = "select", .timeout = true, }, |
906 | { .name = "sendfile", .alias = "sendfile64", }, | ||
885 | { .name = "sendmmsg", | 907 | { .name = "sendmmsg", |
886 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, | 908 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
887 | { .name = "sendmsg", | 909 | { .name = "sendmsg", |
@@ -960,13 +982,19 @@ static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) | |||
960 | * is_exit: is this "exit" or "exit_group"? | 982 | * is_exit: is this "exit" or "exit_group"? |
961 | * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. | 983 | * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. |
962 | * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. | 984 | * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. |
985 | * nonexistent: Just a hole in the syscall table, syscall id not allocated | ||
963 | */ | 986 | */ |
964 | struct syscall { | 987 | struct syscall { |
965 | struct tep_event *tp_format; | 988 | struct tep_event *tp_format; |
966 | int nr_args; | 989 | int nr_args; |
967 | int args_size; | 990 | int args_size; |
991 | struct { | ||
992 | struct bpf_program *sys_enter, | ||
993 | *sys_exit; | ||
994 | } bpf_prog; | ||
968 | bool is_exit; | 995 | bool is_exit; |
969 | bool is_open; | 996 | bool is_open; |
997 | bool nonexistent; | ||
970 | struct tep_format_field *args; | 998 | struct tep_format_field *args; |
971 | const char *name; | 999 | const char *name; |
972 | struct syscall_fmt *fmt; | 1000 | struct syscall_fmt *fmt; |
@@ -1163,7 +1191,7 @@ static const char *thread__fd_path(struct thread *thread, int fd, | |||
1163 | { | 1191 | { |
1164 | struct thread_trace *ttrace = thread__priv(thread); | 1192 | struct thread_trace *ttrace = thread__priv(thread); |
1165 | 1193 | ||
1166 | if (ttrace == NULL) | 1194 | if (ttrace == NULL || trace->fd_path_disabled) |
1167 | return NULL; | 1195 | return NULL; |
1168 | 1196 | ||
1169 | if (fd < 0) | 1197 | if (fd < 0) |
@@ -1359,7 +1387,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l | |||
1359 | 1387 | ||
1360 | if (symbol_conf.kptr_restrict) { | 1388 | if (symbol_conf.kptr_restrict) { |
1361 | pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" | 1389 | pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" |
1362 | "Check /proc/sys/kernel/kptr_restrict.\n\n" | 1390 | "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" |
1363 | "Kernel samples will not be resolved.\n"); | 1391 | "Kernel samples will not be resolved.\n"); |
1364 | machine->kptr_restrict_warned = true; | 1392 | machine->kptr_restrict_warned = true; |
1365 | return NULL; | 1393 | return NULL; |
@@ -1368,7 +1396,7 @@ static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long l | |||
1368 | return machine__resolve_kernel_addr(vmachine, addrp, modp); | 1396 | return machine__resolve_kernel_addr(vmachine, addrp, modp); |
1369 | } | 1397 | } |
1370 | 1398 | ||
1371 | static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) | 1399 | static int trace__symbols_init(struct trace *trace, struct evlist *evlist) |
1372 | { | 1400 | { |
1373 | int err = symbol__init(NULL); | 1401 | int err = symbol__init(NULL); |
1374 | 1402 | ||
@@ -1384,7 +1412,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) | |||
1384 | goto out; | 1412 | goto out; |
1385 | 1413 | ||
1386 | err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, | 1414 | err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, |
1387 | evlist->threads, trace__tool_process, false, | 1415 | evlist->core.threads, trace__tool_process, false, |
1388 | 1); | 1416 | 1); |
1389 | out: | 1417 | out: |
1390 | if (err) | 1418 | if (err) |
@@ -1471,29 +1499,22 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1471 | struct syscall *sc; | 1499 | struct syscall *sc; |
1472 | const char *name = syscalltbl__name(trace->sctbl, id); | 1500 | const char *name = syscalltbl__name(trace->sctbl, id); |
1473 | 1501 | ||
1474 | if (name == NULL) | 1502 | if (trace->syscalls.table == NULL) { |
1475 | return -1; | 1503 | trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); |
1476 | 1504 | if (trace->syscalls.table == NULL) | |
1477 | if (id > trace->syscalls.max) { | 1505 | return -ENOMEM; |
1478 | struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); | 1506 | } |
1479 | |||
1480 | if (nsyscalls == NULL) | ||
1481 | return -1; | ||
1482 | 1507 | ||
1483 | if (trace->syscalls.max != -1) { | 1508 | sc = trace->syscalls.table + id; |
1484 | memset(nsyscalls + trace->syscalls.max + 1, 0, | 1509 | if (sc->nonexistent) |
1485 | (id - trace->syscalls.max) * sizeof(*sc)); | 1510 | return 0; |
1486 | } else { | ||
1487 | memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); | ||
1488 | } | ||
1489 | 1511 | ||
1490 | trace->syscalls.table = nsyscalls; | 1512 | if (name == NULL) { |
1491 | trace->syscalls.max = id; | 1513 | sc->nonexistent = true; |
1514 | return 0; | ||
1492 | } | 1515 | } |
1493 | 1516 | ||
1494 | sc = trace->syscalls.table + id; | ||
1495 | sc->name = name; | 1517 | sc->name = name; |
1496 | |||
1497 | sc->fmt = syscall_fmt__find(sc->name); | 1518 | sc->fmt = syscall_fmt__find(sc->name); |
1498 | 1519 | ||
1499 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); | 1520 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); |
@@ -1505,10 +1526,10 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1505 | } | 1526 | } |
1506 | 1527 | ||
1507 | if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) | 1528 | if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) |
1508 | return -1; | 1529 | return -ENOMEM; |
1509 | 1530 | ||
1510 | if (IS_ERR(sc->tp_format)) | 1531 | if (IS_ERR(sc->tp_format)) |
1511 | return -1; | 1532 | return PTR_ERR(sc->tp_format); |
1512 | 1533 | ||
1513 | sc->args = sc->tp_format->format.fields; | 1534 | sc->args = sc->tp_format->format.fields; |
1514 | /* | 1535 | /* |
@@ -1527,6 +1548,13 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1527 | return syscall__set_arg_fmts(sc); | 1548 | return syscall__set_arg_fmts(sc); |
1528 | } | 1549 | } |
1529 | 1550 | ||
1551 | static int intcmp(const void *a, const void *b) | ||
1552 | { | ||
1553 | const int *one = a, *another = b; | ||
1554 | |||
1555 | return *one - *another; | ||
1556 | } | ||
1557 | |||
1530 | static int trace__validate_ev_qualifier(struct trace *trace) | 1558 | static int trace__validate_ev_qualifier(struct trace *trace) |
1531 | { | 1559 | { |
1532 | int err = 0; | 1560 | int err = 0; |
@@ -1590,6 +1618,7 @@ matches: | |||
1590 | } | 1618 | } |
1591 | 1619 | ||
1592 | trace->ev_qualifier_ids.nr = nr_used; | 1620 | trace->ev_qualifier_ids.nr = nr_used; |
1621 | qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp); | ||
1593 | out: | 1622 | out: |
1594 | if (printed_invalid_prefix) | 1623 | if (printed_invalid_prefix) |
1595 | pr_debug("\n"); | 1624 | pr_debug("\n"); |
@@ -1600,6 +1629,22 @@ out_free: | |||
1600 | goto out; | 1629 | goto out; |
1601 | } | 1630 | } |
1602 | 1631 | ||
1632 | static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id) | ||
1633 | { | ||
1634 | bool in_ev_qualifier; | ||
1635 | |||
1636 | if (trace->ev_qualifier_ids.nr == 0) | ||
1637 | return true; | ||
1638 | |||
1639 | in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries, | ||
1640 | trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL; | ||
1641 | |||
1642 | if (in_ev_qualifier) | ||
1643 | return !trace->not_ev_qualifier; | ||
1644 | |||
1645 | return trace->not_ev_qualifier; | ||
1646 | } | ||
1647 | |||
1603 | /* | 1648 | /* |
1604 | * args is to be interpreted as a series of longs but we need to handle | 1649 | * args is to be interpreted as a series of longs but we need to handle |
1605 | * 8-byte unaligned accesses. args points to raw_data within the event | 1650 | * 8-byte unaligned accesses. args points to raw_data within the event |
@@ -1738,13 +1783,14 @@ next_arg: | |||
1738 | return printed; | 1783 | return printed; |
1739 | } | 1784 | } |
1740 | 1785 | ||
1741 | typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, | 1786 | typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, |
1742 | union perf_event *event, | 1787 | union perf_event *event, |
1743 | struct perf_sample *sample); | 1788 | struct perf_sample *sample); |
1744 | 1789 | ||
1745 | static struct syscall *trace__syscall_info(struct trace *trace, | 1790 | static struct syscall *trace__syscall_info(struct trace *trace, |
1746 | struct perf_evsel *evsel, int id) | 1791 | struct evsel *evsel, int id) |
1747 | { | 1792 | { |
1793 | int err = 0; | ||
1748 | 1794 | ||
1749 | if (id < 0) { | 1795 | if (id < 0) { |
1750 | 1796 | ||
@@ -1766,19 +1812,28 @@ static struct syscall *trace__syscall_info(struct trace *trace, | |||
1766 | return NULL; | 1812 | return NULL; |
1767 | } | 1813 | } |
1768 | 1814 | ||
1769 | if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && | 1815 | err = -EINVAL; |
1770 | trace__read_syscall_info(trace, id)) | 1816 | |
1817 | if (id > trace->sctbl->syscalls.max_id) | ||
1818 | goto out_cant_read; | ||
1819 | |||
1820 | if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && | ||
1821 | (err = trace__read_syscall_info(trace, id)) != 0) | ||
1771 | goto out_cant_read; | 1822 | goto out_cant_read; |
1772 | 1823 | ||
1773 | if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) | 1824 | if (trace->syscalls.table[id].name == NULL) { |
1825 | if (trace->syscalls.table[id].nonexistent) | ||
1826 | return NULL; | ||
1774 | goto out_cant_read; | 1827 | goto out_cant_read; |
1828 | } | ||
1775 | 1829 | ||
1776 | return &trace->syscalls.table[id]; | 1830 | return &trace->syscalls.table[id]; |
1777 | 1831 | ||
1778 | out_cant_read: | 1832 | out_cant_read: |
1779 | if (verbose > 0) { | 1833 | if (verbose > 0) { |
1780 | fprintf(trace->output, "Problems reading syscall %d", id); | 1834 | char sbuf[STRERR_BUFSIZE]; |
1781 | if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) | 1835 | fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); |
1836 | if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) | ||
1782 | fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); | 1837 | fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); |
1783 | fputs(" information\n", trace->output); | 1838 | fputs(" information\n", trace->output); |
1784 | } | 1839 | } |
@@ -1839,7 +1894,7 @@ static int trace__printf_interrupted_entry(struct trace *trace) | |||
1839 | return printed; | 1894 | return printed; |
1840 | } | 1895 | } |
1841 | 1896 | ||
1842 | static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, | 1897 | static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, |
1843 | struct perf_sample *sample, struct thread *thread) | 1898 | struct perf_sample *sample, struct thread *thread) |
1844 | { | 1899 | { |
1845 | int printed = 0; | 1900 | int printed = 0; |
@@ -1882,7 +1937,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam | |||
1882 | return augmented_args; | 1937 | return augmented_args; |
1883 | } | 1938 | } |
1884 | 1939 | ||
1885 | static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | 1940 | static int trace__sys_enter(struct trace *trace, struct evsel *evsel, |
1886 | union perf_event *event __maybe_unused, | 1941 | union perf_event *event __maybe_unused, |
1887 | struct perf_sample *sample) | 1942 | struct perf_sample *sample) |
1888 | { | 1943 | { |
@@ -1961,7 +2016,7 @@ out_put: | |||
1961 | return err; | 2016 | return err; |
1962 | } | 2017 | } |
1963 | 2018 | ||
1964 | static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel, | 2019 | static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, |
1965 | struct perf_sample *sample) | 2020 | struct perf_sample *sample) |
1966 | { | 2021 | { |
1967 | struct thread_trace *ttrace; | 2022 | struct thread_trace *ttrace; |
@@ -1994,13 +2049,13 @@ out_put: | |||
1994 | return err; | 2049 | return err; |
1995 | } | 2050 | } |
1996 | 2051 | ||
1997 | static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, | 2052 | static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel, |
1998 | struct perf_sample *sample, | 2053 | struct perf_sample *sample, |
1999 | struct callchain_cursor *cursor) | 2054 | struct callchain_cursor *cursor) |
2000 | { | 2055 | { |
2001 | struct addr_location al; | 2056 | struct addr_location al; |
2002 | int max_stack = evsel->attr.sample_max_stack ? | 2057 | int max_stack = evsel->core.attr.sample_max_stack ? |
2003 | evsel->attr.sample_max_stack : | 2058 | evsel->core.attr.sample_max_stack : |
2004 | trace->max_stack; | 2059 | trace->max_stack; |
2005 | int err; | 2060 | int err; |
2006 | 2061 | ||
@@ -2022,7 +2077,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam | |||
2022 | return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); | 2077 | return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); |
2023 | } | 2078 | } |
2024 | 2079 | ||
2025 | static const char *errno_to_name(struct perf_evsel *evsel, int err) | 2080 | static const char *errno_to_name(struct evsel *evsel, int err) |
2026 | { | 2081 | { |
2027 | struct perf_env *env = perf_evsel__env(evsel); | 2082 | struct perf_env *env = perf_evsel__env(evsel); |
2028 | const char *arch_name = perf_env__arch(env); | 2083 | const char *arch_name = perf_env__arch(env); |
@@ -2030,7 +2085,7 @@ static const char *errno_to_name(struct perf_evsel *evsel, int err) | |||
2030 | return arch_syscalls__strerrno(arch_name, err); | 2085 | return arch_syscalls__strerrno(arch_name, err); |
2031 | } | 2086 | } |
2032 | 2087 | ||
2033 | static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | 2088 | static int trace__sys_exit(struct trace *trace, struct evsel *evsel, |
2034 | union perf_event *event __maybe_unused, | 2089 | union perf_event *event __maybe_unused, |
2035 | struct perf_sample *sample) | 2090 | struct perf_sample *sample) |
2036 | { | 2091 | { |
@@ -2058,7 +2113,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |||
2058 | 2113 | ||
2059 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); | 2114 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); |
2060 | 2115 | ||
2061 | if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) { | 2116 | if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { |
2062 | trace__set_fd_pathname(thread, ret, ttrace->filename.name); | 2117 | trace__set_fd_pathname(thread, ret, ttrace->filename.name); |
2063 | ttrace->filename.pending_open = false; | 2118 | ttrace->filename.pending_open = false; |
2064 | ++trace->stats.vfs_getname; | 2119 | ++trace->stats.vfs_getname; |
@@ -2164,7 +2219,7 @@ out_put: | |||
2164 | return err; | 2219 | return err; |
2165 | } | 2220 | } |
2166 | 2221 | ||
2167 | static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, | 2222 | static int trace__vfs_getname(struct trace *trace, struct evsel *evsel, |
2168 | union perf_event *event __maybe_unused, | 2223 | union perf_event *event __maybe_unused, |
2169 | struct perf_sample *sample) | 2224 | struct perf_sample *sample) |
2170 | { | 2225 | { |
@@ -2225,7 +2280,7 @@ out: | |||
2225 | return 0; | 2280 | return 0; |
2226 | } | 2281 | } |
2227 | 2282 | ||
2228 | static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, | 2283 | static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, |
2229 | union perf_event *event __maybe_unused, | 2284 | union perf_event *event __maybe_unused, |
2230 | struct perf_sample *sample) | 2285 | struct perf_sample *sample) |
2231 | { | 2286 | { |
@@ -2287,7 +2342,7 @@ static void bpf_output__fprintf(struct trace *trace, | |||
2287 | ++trace->nr_events_printed; | 2342 | ++trace->nr_events_printed; |
2288 | } | 2343 | } |
2289 | 2344 | ||
2290 | static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, | 2345 | static int trace__event_handler(struct trace *trace, struct evsel *evsel, |
2291 | union perf_event *event __maybe_unused, | 2346 | union perf_event *event __maybe_unused, |
2292 | struct perf_sample *sample) | 2347 | struct perf_sample *sample) |
2293 | { | 2348 | { |
@@ -2353,8 +2408,8 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, | |||
2353 | ++trace->nr_events_printed; | 2408 | ++trace->nr_events_printed; |
2354 | 2409 | ||
2355 | if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { | 2410 | if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { |
2356 | perf_evsel__disable(evsel); | 2411 | evsel__disable(evsel); |
2357 | perf_evsel__close(evsel); | 2412 | evsel__close(evsel); |
2358 | } | 2413 | } |
2359 | } | 2414 | } |
2360 | } | 2415 | } |
@@ -2389,7 +2444,7 @@ static void print_location(FILE *f, struct perf_sample *sample, | |||
2389 | } | 2444 | } |
2390 | 2445 | ||
2391 | static int trace__pgfault(struct trace *trace, | 2446 | static int trace__pgfault(struct trace *trace, |
2392 | struct perf_evsel *evsel, | 2447 | struct evsel *evsel, |
2393 | union perf_event *event __maybe_unused, | 2448 | union perf_event *event __maybe_unused, |
2394 | struct perf_sample *sample) | 2449 | struct perf_sample *sample) |
2395 | { | 2450 | { |
@@ -2415,7 +2470,7 @@ static int trace__pgfault(struct trace *trace, | |||
2415 | if (ttrace == NULL) | 2470 | if (ttrace == NULL) |
2416 | goto out_put; | 2471 | goto out_put; |
2417 | 2472 | ||
2418 | if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) | 2473 | if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) |
2419 | ttrace->pfmaj++; | 2474 | ttrace->pfmaj++; |
2420 | else | 2475 | else |
2421 | ttrace->pfmin++; | 2476 | ttrace->pfmin++; |
@@ -2428,7 +2483,7 @@ static int trace__pgfault(struct trace *trace, | |||
2428 | trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); | 2483 | trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); |
2429 | 2484 | ||
2430 | fprintf(trace->output, "%sfault [", | 2485 | fprintf(trace->output, "%sfault [", |
2431 | evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? | 2486 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? |
2432 | "maj" : "min"); | 2487 | "maj" : "min"); |
2433 | 2488 | ||
2434 | print_location(trace->output, sample, &al, false, true); | 2489 | print_location(trace->output, sample, &al, false, true); |
@@ -2464,7 +2519,7 @@ out_put: | |||
2464 | } | 2519 | } |
2465 | 2520 | ||
2466 | static void trace__set_base_time(struct trace *trace, | 2521 | static void trace__set_base_time(struct trace *trace, |
2467 | struct perf_evsel *evsel, | 2522 | struct evsel *evsel, |
2468 | struct perf_sample *sample) | 2523 | struct perf_sample *sample) |
2469 | { | 2524 | { |
2470 | /* | 2525 | /* |
@@ -2476,14 +2531,14 @@ static void trace__set_base_time(struct trace *trace, | |||
2476 | * appears in our event stream (vfs_getname comes to mind). | 2531 | * appears in our event stream (vfs_getname comes to mind). |
2477 | */ | 2532 | */ |
2478 | if (trace->base_time == 0 && !trace->full_time && | 2533 | if (trace->base_time == 0 && !trace->full_time && |
2479 | (evsel->attr.sample_type & PERF_SAMPLE_TIME)) | 2534 | (evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) |
2480 | trace->base_time = sample->time; | 2535 | trace->base_time = sample->time; |
2481 | } | 2536 | } |
2482 | 2537 | ||
2483 | static int trace__process_sample(struct perf_tool *tool, | 2538 | static int trace__process_sample(struct perf_tool *tool, |
2484 | union perf_event *event, | 2539 | union perf_event *event, |
2485 | struct perf_sample *sample, | 2540 | struct perf_sample *sample, |
2486 | struct perf_evsel *evsel, | 2541 | struct evsel *evsel, |
2487 | struct machine *machine __maybe_unused) | 2542 | struct machine *machine __maybe_unused) |
2488 | { | 2543 | { |
2489 | struct trace *trace = container_of(tool, struct trace, tool); | 2544 | struct trace *trace = container_of(tool, struct trace, tool); |
@@ -2569,10 +2624,10 @@ static int trace__record(struct trace *trace, int argc, const char **argv) | |||
2569 | 2624 | ||
2570 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); | 2625 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); |
2571 | 2626 | ||
2572 | static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) | 2627 | static bool evlist__add_vfs_getname(struct evlist *evlist) |
2573 | { | 2628 | { |
2574 | bool found = false; | 2629 | bool found = false; |
2575 | struct perf_evsel *evsel, *tmp; | 2630 | struct evsel *evsel, *tmp; |
2576 | struct parse_events_error err = { .idx = 0, }; | 2631 | struct parse_events_error err = { .idx = 0, }; |
2577 | int ret = parse_events(evlist, "probe:vfs_getname*", &err); | 2632 | int ret = parse_events(evlist, "probe:vfs_getname*", &err); |
2578 | 2633 | ||
@@ -2589,17 +2644,17 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) | |||
2589 | continue; | 2644 | continue; |
2590 | } | 2645 | } |
2591 | 2646 | ||
2592 | list_del_init(&evsel->node); | 2647 | list_del_init(&evsel->core.node); |
2593 | evsel->evlist = NULL; | 2648 | evsel->evlist = NULL; |
2594 | perf_evsel__delete(evsel); | 2649 | evsel__delete(evsel); |
2595 | } | 2650 | } |
2596 | 2651 | ||
2597 | return found; | 2652 | return found; |
2598 | } | 2653 | } |
2599 | 2654 | ||
2600 | static struct perf_evsel *perf_evsel__new_pgfault(u64 config) | 2655 | static struct evsel *perf_evsel__new_pgfault(u64 config) |
2601 | { | 2656 | { |
2602 | struct perf_evsel *evsel; | 2657 | struct evsel *evsel; |
2603 | struct perf_event_attr attr = { | 2658 | struct perf_event_attr attr = { |
2604 | .type = PERF_TYPE_SOFTWARE, | 2659 | .type = PERF_TYPE_SOFTWARE, |
2605 | .mmap_data = 1, | 2660 | .mmap_data = 1, |
@@ -2610,7 +2665,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config) | |||
2610 | 2665 | ||
2611 | event_attr_init(&attr); | 2666 | event_attr_init(&attr); |
2612 | 2667 | ||
2613 | evsel = perf_evsel__new(&attr); | 2668 | evsel = evsel__new(&attr); |
2614 | if (evsel) | 2669 | if (evsel) |
2615 | evsel->handler = trace__pgfault; | 2670 | evsel->handler = trace__pgfault; |
2616 | 2671 | ||
@@ -2620,7 +2675,7 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config) | |||
2620 | static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) | 2675 | static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) |
2621 | { | 2676 | { |
2622 | const u32 type = event->header.type; | 2677 | const u32 type = event->header.type; |
2623 | struct perf_evsel *evsel; | 2678 | struct evsel *evsel; |
2624 | 2679 | ||
2625 | if (type != PERF_RECORD_SAMPLE) { | 2680 | if (type != PERF_RECORD_SAMPLE) { |
2626 | trace__process_event(trace, trace->host, event, sample); | 2681 | trace__process_event(trace, trace->host, event, sample); |
@@ -2633,9 +2688,12 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st | |||
2633 | return; | 2688 | return; |
2634 | } | 2689 | } |
2635 | 2690 | ||
2691 | if (evswitch__discard(&trace->evswitch, evsel)) | ||
2692 | return; | ||
2693 | |||
2636 | trace__set_base_time(trace, evsel, sample); | 2694 | trace__set_base_time(trace, evsel, sample); |
2637 | 2695 | ||
2638 | if (evsel->attr.type == PERF_TYPE_TRACEPOINT && | 2696 | if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && |
2639 | sample->raw_data == NULL) { | 2697 | sample->raw_data == NULL) { |
2640 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", | 2698 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", |
2641 | perf_evsel__name(evsel), sample->tid, | 2699 | perf_evsel__name(evsel), sample->tid, |
@@ -2652,8 +2710,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st | |||
2652 | static int trace__add_syscall_newtp(struct trace *trace) | 2710 | static int trace__add_syscall_newtp(struct trace *trace) |
2653 | { | 2711 | { |
2654 | int ret = -1; | 2712 | int ret = -1; |
2655 | struct perf_evlist *evlist = trace->evlist; | 2713 | struct evlist *evlist = trace->evlist; |
2656 | struct perf_evsel *sys_enter, *sys_exit; | 2714 | struct evsel *sys_enter, *sys_exit; |
2657 | 2715 | ||
2658 | sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter); | 2716 | sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter); |
2659 | if (sys_enter == NULL) | 2717 | if (sys_enter == NULL) |
@@ -2672,8 +2730,8 @@ static int trace__add_syscall_newtp(struct trace *trace) | |||
2672 | perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); | 2730 | perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); |
2673 | perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); | 2731 | perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); |
2674 | 2732 | ||
2675 | perf_evlist__add(evlist, sys_enter); | 2733 | evlist__add(evlist, sys_enter); |
2676 | perf_evlist__add(evlist, sys_exit); | 2734 | evlist__add(evlist, sys_exit); |
2677 | 2735 | ||
2678 | if (callchain_param.enabled && !trace->kernel_syscallchains) { | 2736 | if (callchain_param.enabled && !trace->kernel_syscallchains) { |
2679 | /* | 2737 | /* |
@@ -2681,7 +2739,7 @@ static int trace__add_syscall_newtp(struct trace *trace) | |||
2681 | * leading to the syscall, allow overriding that for | 2739 | * leading to the syscall, allow overriding that for |
2682 | * debugging reasons using --kernel_syscall_callchains | 2740 | * debugging reasons using --kernel_syscall_callchains |
2683 | */ | 2741 | */ |
2684 | sys_exit->attr.exclude_callchain_kernel = 1; | 2742 | sys_exit->core.attr.exclude_callchain_kernel = 1; |
2685 | } | 2743 | } |
2686 | 2744 | ||
2687 | trace->syscalls.events.sys_enter = sys_enter; | 2745 | trace->syscalls.events.sys_enter = sys_enter; |
@@ -2692,16 +2750,16 @@ out: | |||
2692 | return ret; | 2750 | return ret; |
2693 | 2751 | ||
2694 | out_delete_sys_exit: | 2752 | out_delete_sys_exit: |
2695 | perf_evsel__delete_priv(sys_exit); | 2753 | evsel__delete_priv(sys_exit); |
2696 | out_delete_sys_enter: | 2754 | out_delete_sys_enter: |
2697 | perf_evsel__delete_priv(sys_enter); | 2755 | evsel__delete_priv(sys_enter); |
2698 | goto out; | 2756 | goto out; |
2699 | } | 2757 | } |
2700 | 2758 | ||
2701 | static int trace__set_ev_qualifier_tp_filter(struct trace *trace) | 2759 | static int trace__set_ev_qualifier_tp_filter(struct trace *trace) |
2702 | { | 2760 | { |
2703 | int err = -1; | 2761 | int err = -1; |
2704 | struct perf_evsel *sys_exit; | 2762 | struct evsel *sys_exit; |
2705 | char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, | 2763 | char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, |
2706 | trace->ev_qualifier_ids.nr, | 2764 | trace->ev_qualifier_ids.nr, |
2707 | trace->ev_qualifier_ids.entries); | 2765 | trace->ev_qualifier_ids.entries); |
@@ -2724,6 +2782,70 @@ out_enomem: | |||
2724 | } | 2782 | } |
2725 | 2783 | ||
2726 | #ifdef HAVE_LIBBPF_SUPPORT | 2784 | #ifdef HAVE_LIBBPF_SUPPORT |
2785 | static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) | ||
2786 | { | ||
2787 | if (trace->bpf_obj == NULL) | ||
2788 | return NULL; | ||
2789 | |||
2790 | return bpf_object__find_program_by_title(trace->bpf_obj, name); | ||
2791 | } | ||
2792 | |||
2793 | static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, | ||
2794 | const char *prog_name, const char *type) | ||
2795 | { | ||
2796 | struct bpf_program *prog; | ||
2797 | |||
2798 | if (prog_name == NULL) { | ||
2799 | char default_prog_name[256]; | ||
2800 | scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name); | ||
2801 | prog = trace__find_bpf_program_by_title(trace, default_prog_name); | ||
2802 | if (prog != NULL) | ||
2803 | goto out_found; | ||
2804 | if (sc->fmt && sc->fmt->alias) { | ||
2805 | scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias); | ||
2806 | prog = trace__find_bpf_program_by_title(trace, default_prog_name); | ||
2807 | if (prog != NULL) | ||
2808 | goto out_found; | ||
2809 | } | ||
2810 | goto out_unaugmented; | ||
2811 | } | ||
2812 | |||
2813 | prog = trace__find_bpf_program_by_title(trace, prog_name); | ||
2814 | |||
2815 | if (prog != NULL) { | ||
2816 | out_found: | ||
2817 | return prog; | ||
2818 | } | ||
2819 | |||
2820 | pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n", | ||
2821 | prog_name, type, sc->name); | ||
2822 | out_unaugmented: | ||
2823 | return trace->syscalls.unaugmented_prog; | ||
2824 | } | ||
2825 | |||
2826 | static void trace__init_syscall_bpf_progs(struct trace *trace, int id) | ||
2827 | { | ||
2828 | struct syscall *sc = trace__syscall_info(trace, NULL, id); | ||
2829 | |||
2830 | if (sc == NULL) | ||
2831 | return; | ||
2832 | |||
2833 | sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter"); | ||
2834 | sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit"); | ||
2835 | } | ||
2836 | |||
2837 | static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) | ||
2838 | { | ||
2839 | struct syscall *sc = trace__syscall_info(trace, NULL, id); | ||
2840 | return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog); | ||
2841 | } | ||
2842 | |||
2843 | static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) | ||
2844 | { | ||
2845 | struct syscall *sc = trace__syscall_info(trace, NULL, id); | ||
2846 | return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); | ||
2847 | } | ||
2848 | |||
2727 | static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) | 2849 | static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) |
2728 | { | 2850 | { |
2729 | struct syscall *sc = trace__syscall_info(trace, NULL, id); | 2851 | struct syscall *sc = trace__syscall_info(trace, NULL, id); |
@@ -2755,8 +2877,10 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) | |||
2755 | for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { | 2877 | for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { |
2756 | int key = trace->ev_qualifier_ids.entries[i]; | 2878 | int key = trace->ev_qualifier_ids.entries[i]; |
2757 | 2879 | ||
2758 | if (value.enabled) | 2880 | if (value.enabled) { |
2759 | trace__init_bpf_map_syscall_args(trace, key, &value); | 2881 | trace__init_bpf_map_syscall_args(trace, key, &value); |
2882 | trace__init_syscall_bpf_progs(trace, key); | ||
2883 | } | ||
2760 | 2884 | ||
2761 | err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); | 2885 | err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); |
2762 | if (err) | 2886 | if (err) |
@@ -2795,6 +2919,186 @@ static int trace__init_syscalls_bpf_map(struct trace *trace) | |||
2795 | 2919 | ||
2796 | return __trace__init_syscalls_bpf_map(trace, enabled); | 2920 | return __trace__init_syscalls_bpf_map(trace, enabled); |
2797 | } | 2921 | } |
2922 | |||
2923 | static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) | ||
2924 | { | ||
2925 | struct tep_format_field *field, *candidate_field; | ||
2926 | int id; | ||
2927 | |||
2928 | /* | ||
2929 | * We're only interested in syscalls that have a pointer: | ||
2930 | */ | ||
2931 | for (field = sc->args; field; field = field->next) { | ||
2932 | if (field->flags & TEP_FIELD_IS_POINTER) | ||
2933 | goto try_to_find_pair; | ||
2934 | } | ||
2935 | |||
2936 | return NULL; | ||
2937 | |||
2938 | try_to_find_pair: | ||
2939 | for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) { | ||
2940 | struct syscall *pair = trace__syscall_info(trace, NULL, id); | ||
2941 | struct bpf_program *pair_prog; | ||
2942 | bool is_candidate = false; | ||
2943 | |||
2944 | if (pair == NULL || pair == sc || | ||
2945 | pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog) | ||
2946 | continue; | ||
2947 | |||
2948 | for (field = sc->args, candidate_field = pair->args; | ||
2949 | field && candidate_field; field = field->next, candidate_field = candidate_field->next) { | ||
2950 | bool is_pointer = field->flags & TEP_FIELD_IS_POINTER, | ||
2951 | candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER; | ||
2952 | |||
2953 | if (is_pointer) { | ||
2954 | if (!candidate_is_pointer) { | ||
2955 | // The candidate just doesn't copies our pointer arg, might copy other pointers we want. | ||
2956 | continue; | ||
2957 | } | ||
2958 | } else { | ||
2959 | if (candidate_is_pointer) { | ||
2960 | // The candidate might copy a pointer we don't have, skip it. | ||
2961 | goto next_candidate; | ||
2962 | } | ||
2963 | continue; | ||
2964 | } | ||
2965 | |||
2966 | if (strcmp(field->type, candidate_field->type)) | ||
2967 | goto next_candidate; | ||
2968 | |||
2969 | is_candidate = true; | ||
2970 | } | ||
2971 | |||
2972 | if (!is_candidate) | ||
2973 | goto next_candidate; | ||
2974 | |||
2975 | /* | ||
2976 | * Check if the tentative pair syscall augmenter has more pointers, if it has, | ||
2977 | * then it may be collecting that and we then can't use it, as it would collect | ||
2978 | * more than what is common to the two syscalls. | ||
2979 | */ | ||
2980 | if (candidate_field) { | ||
2981 | for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next) | ||
2982 | if (candidate_field->flags & TEP_FIELD_IS_POINTER) | ||
2983 | goto next_candidate; | ||
2984 | } | ||
2985 | |||
2986 | pair_prog = pair->bpf_prog.sys_enter; | ||
2987 | /* | ||
2988 | * If the pair isn't enabled, then its bpf_prog.sys_enter will not | ||
2989 | * have been searched for, so search it here and if it returns the | ||
2990 | * unaugmented one, then ignore it, otherwise we'll reuse that BPF | ||
2991 | * program for a filtered syscall on a non-filtered one. | ||
2992 | * | ||
2993 | * For instance, we have "!syscalls:sys_enter_renameat" and that is | ||
2994 | * useful for "renameat2". | ||
2995 | */ | ||
2996 | if (pair_prog == NULL) { | ||
2997 | pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter"); | ||
2998 | if (pair_prog == trace->syscalls.unaugmented_prog) | ||
2999 | goto next_candidate; | ||
3000 | } | ||
3001 | |||
3002 | pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name); | ||
3003 | return pair_prog; | ||
3004 | next_candidate: | ||
3005 | continue; | ||
3006 | } | ||
3007 | |||
3008 | return NULL; | ||
3009 | } | ||
3010 | |||
3011 | static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) | ||
3012 | { | ||
3013 | int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter), | ||
3014 | map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit); | ||
3015 | int err = 0, key; | ||
3016 | |||
3017 | for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { | ||
3018 | int prog_fd; | ||
3019 | |||
3020 | if (!trace__syscall_enabled(trace, key)) | ||
3021 | continue; | ||
3022 | |||
3023 | trace__init_syscall_bpf_progs(trace, key); | ||
3024 | |||
3025 | // It'll get at least the "!raw_syscalls:unaugmented" | ||
3026 | prog_fd = trace__bpf_prog_sys_enter_fd(trace, key); | ||
3027 | err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); | ||
3028 | if (err) | ||
3029 | break; | ||
3030 | prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); | ||
3031 | err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); | ||
3032 | if (err) | ||
3033 | break; | ||
3034 | } | ||
3035 | |||
3036 | /* | ||
3037 | * Now lets do a second pass looking for enabled syscalls without | ||
3038 | * an augmenter that have a signature that is a superset of another | ||
3039 | * syscall with an augmenter so that we can auto-reuse it. | ||
3040 | * | ||
3041 | * I.e. if we have an augmenter for the "open" syscall that has | ||
3042 | * this signature: | ||
3043 | * | ||
3044 | * int open(const char *pathname, int flags, mode_t mode); | ||
3045 | * | ||
3046 | * I.e. that will collect just the first string argument, then we | ||
3047 | * can reuse it for the 'creat' syscall, that has this signature: | ||
3048 | * | ||
3049 | * int creat(const char *pathname, mode_t mode); | ||
3050 | * | ||
3051 | * and for: | ||
3052 | * | ||
3053 | * int stat(const char *pathname, struct stat *statbuf); | ||
3054 | * int lstat(const char *pathname, struct stat *statbuf); | ||
3055 | * | ||
3056 | * Because the 'open' augmenter will collect the first arg as a string, | ||
3057 | * and leave alone all the other args, which already helps with | ||
3058 | * beautifying 'stat' and 'lstat''s pathname arg. | ||
3059 | * | ||
3060 | * Then, in time, when 'stat' gets an augmenter that collects both | ||
3061 | * first and second arg (this one on the raw_syscalls:sys_exit prog | ||
3062 | * array tail call, then that one will be used. | ||
3063 | */ | ||
3064 | for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { | ||
3065 | struct syscall *sc = trace__syscall_info(trace, NULL, key); | ||
3066 | struct bpf_program *pair_prog; | ||
3067 | int prog_fd; | ||
3068 | |||
3069 | if (sc == NULL || sc->bpf_prog.sys_enter == NULL) | ||
3070 | continue; | ||
3071 | |||
3072 | /* | ||
3073 | * For now we're just reusing the sys_enter prog, and if it | ||
3074 | * already has an augmenter, we don't need to find one. | ||
3075 | */ | ||
3076 | if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog) | ||
3077 | continue; | ||
3078 | |||
3079 | /* | ||
3080 | * Look at all the other syscalls for one that has a signature | ||
3081 | * that is close enough that we can share: | ||
3082 | */ | ||
3083 | pair_prog = trace__find_usable_bpf_prog_entry(trace, sc); | ||
3084 | if (pair_prog == NULL) | ||
3085 | continue; | ||
3086 | |||
3087 | sc->bpf_prog.sys_enter = pair_prog; | ||
3088 | |||
3089 | /* | ||
3090 | * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter | ||
3091 | * with the fd for the program we're reusing: | ||
3092 | */ | ||
3093 | prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter); | ||
3094 | err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); | ||
3095 | if (err) | ||
3096 | break; | ||
3097 | } | ||
3098 | |||
3099 | |||
3100 | return err; | ||
3101 | } | ||
2798 | #else | 3102 | #else |
2799 | static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) | 3103 | static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) |
2800 | { | 3104 | { |
@@ -2805,6 +3109,17 @@ static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused) | |||
2805 | { | 3109 | { |
2806 | return 0; | 3110 | return 0; |
2807 | } | 3111 | } |
3112 | |||
3113 | static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, | ||
3114 | const char *name __maybe_unused) | ||
3115 | { | ||
3116 | return NULL; | ||
3117 | } | ||
3118 | |||
3119 | static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused) | ||
3120 | { | ||
3121 | return 0; | ||
3122 | } | ||
2808 | #endif // HAVE_LIBBPF_SUPPORT | 3123 | #endif // HAVE_LIBBPF_SUPPORT |
2809 | 3124 | ||
2810 | static int trace__set_ev_qualifier_filter(struct trace *trace) | 3125 | static int trace__set_ev_qualifier_filter(struct trace *trace) |
@@ -2879,7 +3194,7 @@ static int trace__set_filter_pids(struct trace *trace) | |||
2879 | err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, | 3194 | err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, |
2880 | trace->filter_pids.entries); | 3195 | trace->filter_pids.entries); |
2881 | } | 3196 | } |
2882 | } else if (thread_map__pid(trace->evlist->threads, 0) == -1) { | 3197 | } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) { |
2883 | err = trace__set_filter_loop_pids(trace); | 3198 | err = trace__set_filter_loop_pids(trace); |
2884 | } | 3199 | } |
2885 | 3200 | ||
@@ -2888,7 +3203,7 @@ static int trace__set_filter_pids(struct trace *trace) | |||
2888 | 3203 | ||
2889 | static int __trace__deliver_event(struct trace *trace, union perf_event *event) | 3204 | static int __trace__deliver_event(struct trace *trace, union perf_event *event) |
2890 | { | 3205 | { |
2891 | struct perf_evlist *evlist = trace->evlist; | 3206 | struct evlist *evlist = trace->evlist; |
2892 | struct perf_sample sample; | 3207 | struct perf_sample sample; |
2893 | int err; | 3208 | int err; |
2894 | 3209 | ||
@@ -2946,8 +3261,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe, | |||
2946 | 3261 | ||
2947 | static int trace__run(struct trace *trace, int argc, const char **argv) | 3262 | static int trace__run(struct trace *trace, int argc, const char **argv) |
2948 | { | 3263 | { |
2949 | struct perf_evlist *evlist = trace->evlist; | 3264 | struct evlist *evlist = trace->evlist; |
2950 | struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; | 3265 | struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; |
2951 | int err = -1, i; | 3266 | int err = -1, i; |
2952 | unsigned long before; | 3267 | unsigned long before; |
2953 | const bool forks = argc > 0; | 3268 | const bool forks = argc > 0; |
@@ -2960,7 +3275,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
2960 | goto out_error_raw_syscalls; | 3275 | goto out_error_raw_syscalls; |
2961 | 3276 | ||
2962 | if (trace->trace_syscalls) | 3277 | if (trace->trace_syscalls) |
2963 | trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); | 3278 | trace->vfs_getname = evlist__add_vfs_getname(evlist); |
2964 | } | 3279 | } |
2965 | 3280 | ||
2966 | if ((trace->trace_pgfaults & TRACE_PFMAJ)) { | 3281 | if ((trace->trace_pgfaults & TRACE_PFMAJ)) { |
@@ -2968,7 +3283,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
2968 | if (pgfault_maj == NULL) | 3283 | if (pgfault_maj == NULL) |
2969 | goto out_error_mem; | 3284 | goto out_error_mem; |
2970 | perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); | 3285 | perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); |
2971 | perf_evlist__add(evlist, pgfault_maj); | 3286 | evlist__add(evlist, pgfault_maj); |
2972 | } | 3287 | } |
2973 | 3288 | ||
2974 | if ((trace->trace_pgfaults & TRACE_PFMIN)) { | 3289 | if ((trace->trace_pgfaults & TRACE_PFMIN)) { |
@@ -2976,7 +3291,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
2976 | if (pgfault_min == NULL) | 3291 | if (pgfault_min == NULL) |
2977 | goto out_error_mem; | 3292 | goto out_error_mem; |
2978 | perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); | 3293 | perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); |
2979 | perf_evlist__add(evlist, pgfault_min); | 3294 | evlist__add(evlist, pgfault_min); |
2980 | } | 3295 | } |
2981 | 3296 | ||
2982 | if (trace->sched && | 3297 | if (trace->sched && |
@@ -3038,7 +3353,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
3038 | } | 3353 | } |
3039 | } | 3354 | } |
3040 | 3355 | ||
3041 | err = perf_evlist__open(evlist); | 3356 | err = evlist__open(evlist); |
3042 | if (err < 0) | 3357 | if (err < 0) |
3043 | goto out_error_open; | 3358 | goto out_error_open; |
3044 | 3359 | ||
@@ -3059,6 +3374,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
3059 | if (trace->syscalls.map) | 3374 | if (trace->syscalls.map) |
3060 | trace__init_syscalls_bpf_map(trace); | 3375 | trace__init_syscalls_bpf_map(trace); |
3061 | 3376 | ||
3377 | if (trace->syscalls.prog_array.sys_enter) | ||
3378 | trace__init_syscalls_bpf_prog_array_maps(trace); | ||
3379 | |||
3062 | if (trace->ev_qualifier_ids.nr > 0) { | 3380 | if (trace->ev_qualifier_ids.nr > 0) { |
3063 | err = trace__set_ev_qualifier_filter(trace); | 3381 | err = trace__set_ev_qualifier_filter(trace); |
3064 | if (err < 0) | 3382 | if (err < 0) |
@@ -3070,6 +3388,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
3070 | } | 3388 | } |
3071 | } | 3389 | } |
3072 | 3390 | ||
3391 | /* | ||
3392 | * If the "close" syscall is not traced, then we will not have the | ||
3393 | * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the | ||
3394 | * fd->pathname table and were ending up showing the last value set by | ||
3395 | * syscalls opening a pathname and associating it with a descriptor or | ||
3396 | * reading it from /proc/pid/fd/ in cases where that doesn't make | ||
3397 | * sense. | ||
3398 | * | ||
3399 | * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is | ||
3400 | * not in use. | ||
3401 | */ | ||
3402 | trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); | ||
3403 | |||
3073 | err = perf_evlist__apply_filters(evlist, &evsel); | 3404 | err = perf_evlist__apply_filters(evlist, &evsel); |
3074 | if (err < 0) | 3405 | if (err < 0) |
3075 | goto out_error_apply_filters; | 3406 | goto out_error_apply_filters; |
@@ -3082,30 +3413,30 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
3082 | goto out_error_mmap; | 3413 | goto out_error_mmap; |
3083 | 3414 | ||
3084 | if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) | 3415 | if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) |
3085 | perf_evlist__enable(evlist); | 3416 | evlist__enable(evlist); |
3086 | 3417 | ||
3087 | if (forks) | 3418 | if (forks) |
3088 | perf_evlist__start_workload(evlist); | 3419 | perf_evlist__start_workload(evlist); |
3089 | 3420 | ||
3090 | if (trace->opts.initial_delay) { | 3421 | if (trace->opts.initial_delay) { |
3091 | usleep(trace->opts.initial_delay * 1000); | 3422 | usleep(trace->opts.initial_delay * 1000); |
3092 | perf_evlist__enable(evlist); | 3423 | evlist__enable(evlist); |
3093 | } | 3424 | } |
3094 | 3425 | ||
3095 | trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || | 3426 | trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || |
3096 | evlist->threads->nr > 1 || | 3427 | evlist->core.threads->nr > 1 || |
3097 | perf_evlist__first(evlist)->attr.inherit; | 3428 | perf_evlist__first(evlist)->core.attr.inherit; |
3098 | 3429 | ||
3099 | /* | 3430 | /* |
3100 | * Now that we already used evsel->attr to ask the kernel to setup the | 3431 | * Now that we already used evsel->core.attr to ask the kernel to setup the |
3101 | * events, lets reuse evsel->attr.sample_max_stack as the limit in | 3432 | * events, lets reuse evsel->core.attr.sample_max_stack as the limit in |
3102 | * trace__resolve_callchain(), allowing per-event max-stack settings | 3433 | * trace__resolve_callchain(), allowing per-event max-stack settings |
3103 | * to override an explicitly set --max-stack global setting. | 3434 | * to override an explicitly set --max-stack global setting. |
3104 | */ | 3435 | */ |
3105 | evlist__for_each_entry(evlist, evsel) { | 3436 | evlist__for_each_entry(evlist, evsel) { |
3106 | if (evsel__has_callchain(evsel) && | 3437 | if (evsel__has_callchain(evsel) && |
3107 | evsel->attr.sample_max_stack == 0) | 3438 | evsel->core.attr.sample_max_stack == 0) |
3108 | evsel->attr.sample_max_stack = trace->max_stack; | 3439 | evsel->core.attr.sample_max_stack = trace->max_stack; |
3109 | } | 3440 | } |
3110 | again: | 3441 | again: |
3111 | before = trace->nr_events; | 3442 | before = trace->nr_events; |
@@ -3131,7 +3462,7 @@ again: | |||
3131 | goto out_disable; | 3462 | goto out_disable; |
3132 | 3463 | ||
3133 | if (done && !draining) { | 3464 | if (done && !draining) { |
3134 | perf_evlist__disable(evlist); | 3465 | evlist__disable(evlist); |
3135 | draining = true; | 3466 | draining = true; |
3136 | } | 3467 | } |
3137 | } | 3468 | } |
@@ -3157,7 +3488,7 @@ again: | |||
3157 | out_disable: | 3488 | out_disable: |
3158 | thread__zput(trace->current); | 3489 | thread__zput(trace->current); |
3159 | 3490 | ||
3160 | perf_evlist__disable(evlist); | 3491 | evlist__disable(evlist); |
3161 | 3492 | ||
3162 | if (trace->sort_events) | 3493 | if (trace->sort_events) |
3163 | ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); | 3494 | ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); |
@@ -3178,7 +3509,7 @@ out_disable: | |||
3178 | out_delete_evlist: | 3509 | out_delete_evlist: |
3179 | trace__symbols__exit(trace); | 3510 | trace__symbols__exit(trace); |
3180 | 3511 | ||
3181 | perf_evlist__delete(evlist); | 3512 | evlist__delete(evlist); |
3182 | cgroup__put(trace->cgroup); | 3513 | cgroup__put(trace->cgroup); |
3183 | trace->evlist = NULL; | 3514 | trace->evlist = NULL; |
3184 | trace->live = false; | 3515 | trace->live = false; |
@@ -3223,7 +3554,7 @@ out_errno: | |||
3223 | 3554 | ||
3224 | static int trace__replay(struct trace *trace) | 3555 | static int trace__replay(struct trace *trace) |
3225 | { | 3556 | { |
3226 | const struct perf_evsel_str_handler handlers[] = { | 3557 | const struct evsel_str_handler handlers[] = { |
3227 | { "probe:vfs_getname", trace__vfs_getname, }, | 3558 | { "probe:vfs_getname", trace__vfs_getname, }, |
3228 | }; | 3559 | }; |
3229 | struct perf_data data = { | 3560 | struct perf_data data = { |
@@ -3232,7 +3563,7 @@ static int trace__replay(struct trace *trace) | |||
3232 | .force = trace->force, | 3563 | .force = trace->force, |
3233 | }; | 3564 | }; |
3234 | struct perf_session *session; | 3565 | struct perf_session *session; |
3235 | struct perf_evsel *evsel; | 3566 | struct evsel *evsel; |
3236 | int err = -1; | 3567 | int err = -1; |
3237 | 3568 | ||
3238 | trace->tool.sample = trace__process_sample; | 3569 | trace->tool.sample = trace__process_sample; |
@@ -3298,10 +3629,10 @@ static int trace__replay(struct trace *trace) | |||
3298 | } | 3629 | } |
3299 | 3630 | ||
3300 | evlist__for_each_entry(session->evlist, evsel) { | 3631 | evlist__for_each_entry(session->evlist, evsel) { |
3301 | if (evsel->attr.type == PERF_TYPE_SOFTWARE && | 3632 | if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && |
3302 | (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || | 3633 | (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || |
3303 | evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || | 3634 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || |
3304 | evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) | 3635 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS)) |
3305 | evsel->handler = trace__pgfault; | 3636 | evsel->handler = trace__pgfault; |
3306 | } | 3637 | } |
3307 | 3638 | ||
@@ -3523,17 +3854,17 @@ static int parse_pagefaults(const struct option *opt, const char *str, | |||
3523 | return 0; | 3854 | return 0; |
3524 | } | 3855 | } |
3525 | 3856 | ||
3526 | static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) | 3857 | static void evlist__set_evsel_handler(struct evlist *evlist, void *handler) |
3527 | { | 3858 | { |
3528 | struct perf_evsel *evsel; | 3859 | struct evsel *evsel; |
3529 | 3860 | ||
3530 | evlist__for_each_entry(evlist, evsel) | 3861 | evlist__for_each_entry(evlist, evsel) |
3531 | evsel->handler = handler; | 3862 | evsel->handler = handler; |
3532 | } | 3863 | } |
3533 | 3864 | ||
3534 | static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist) | 3865 | static int evlist__set_syscall_tp_fields(struct evlist *evlist) |
3535 | { | 3866 | { |
3536 | struct perf_evsel *evsel; | 3867 | struct evsel *evsel; |
3537 | 3868 | ||
3538 | evlist__for_each_entry(evlist, evsel) { | 3869 | evlist__for_each_entry(evlist, evsel) { |
3539 | if (evsel->priv || !evsel->tp_format) | 3870 | if (evsel->priv || !evsel->tp_format) |
@@ -3660,7 +3991,7 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u | |||
3660 | { | 3991 | { |
3661 | struct trace *trace = opt->value; | 3992 | struct trace *trace = opt->value; |
3662 | 3993 | ||
3663 | if (!list_empty(&trace->evlist->entries)) | 3994 | if (!list_empty(&trace->evlist->core.entries)) |
3664 | return parse_cgroups(opt, str, unset); | 3995 | return parse_cgroups(opt, str, unset); |
3665 | 3996 | ||
3666 | trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); | 3997 | trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); |
@@ -3668,28 +3999,24 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u | |||
3668 | return 0; | 3999 | return 0; |
3669 | } | 4000 | } |
3670 | 4001 | ||
3671 | static struct bpf_map *bpf__find_map_by_name(const char *name) | 4002 | static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) |
3672 | { | 4003 | { |
3673 | struct bpf_object *obj, *tmp; | 4004 | if (trace->bpf_obj == NULL) |
3674 | 4005 | return NULL; | |
3675 | bpf_object__for_each_safe(obj, tmp) { | ||
3676 | struct bpf_map *map = bpf_object__find_map_by_name(obj, name); | ||
3677 | if (map) | ||
3678 | return map; | ||
3679 | |||
3680 | } | ||
3681 | 4006 | ||
3682 | return NULL; | 4007 | return bpf_object__find_map_by_name(trace->bpf_obj, name); |
3683 | } | 4008 | } |
3684 | 4009 | ||
3685 | static void trace__set_bpf_map_filtered_pids(struct trace *trace) | 4010 | static void trace__set_bpf_map_filtered_pids(struct trace *trace) |
3686 | { | 4011 | { |
3687 | trace->filter_pids.map = bpf__find_map_by_name("pids_filtered"); | 4012 | trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); |
3688 | } | 4013 | } |
3689 | 4014 | ||
3690 | static void trace__set_bpf_map_syscalls(struct trace *trace) | 4015 | static void trace__set_bpf_map_syscalls(struct trace *trace) |
3691 | { | 4016 | { |
3692 | trace->syscalls.map = bpf__find_map_by_name("syscalls"); | 4017 | trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); |
4018 | trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); | ||
4019 | trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); | ||
3693 | } | 4020 | } |
3694 | 4021 | ||
3695 | static int trace__config(const char *var, const char *value, void *arg) | 4022 | static int trace__config(const char *var, const char *value, void *arg) |
@@ -3745,9 +4072,6 @@ int cmd_trace(int argc, const char **argv) | |||
3745 | NULL | 4072 | NULL |
3746 | }; | 4073 | }; |
3747 | struct trace trace = { | 4074 | struct trace trace = { |
3748 | .syscalls = { | ||
3749 | . max = -1, | ||
3750 | }, | ||
3751 | .opts = { | 4075 | .opts = { |
3752 | .target = { | 4076 | .target = { |
3753 | .uid = UINT_MAX, | 4077 | .uid = UINT_MAX, |
@@ -3844,11 +4168,12 @@ int cmd_trace(int argc, const char **argv) | |||
3844 | OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, | 4168 | OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, |
3845 | "ms to wait before starting measurement after program " | 4169 | "ms to wait before starting measurement after program " |
3846 | "start"), | 4170 | "start"), |
4171 | OPTS_EVSWITCH(&trace.evswitch), | ||
3847 | OPT_END() | 4172 | OPT_END() |
3848 | }; | 4173 | }; |
3849 | bool __maybe_unused max_stack_user_set = true; | 4174 | bool __maybe_unused max_stack_user_set = true; |
3850 | bool mmap_pages_user_set = true; | 4175 | bool mmap_pages_user_set = true; |
3851 | struct perf_evsel *evsel; | 4176 | struct evsel *evsel; |
3852 | const char * const trace_subcommands[] = { "record", NULL }; | 4177 | const char * const trace_subcommands[] = { "record", NULL }; |
3853 | int err = -1; | 4178 | int err = -1; |
3854 | char bf[BUFSIZ]; | 4179 | char bf[BUFSIZ]; |
@@ -3856,7 +4181,7 @@ int cmd_trace(int argc, const char **argv) | |||
3856 | signal(SIGSEGV, sighandler_dump_stack); | 4181 | signal(SIGSEGV, sighandler_dump_stack); |
3857 | signal(SIGFPE, sighandler_dump_stack); | 4182 | signal(SIGFPE, sighandler_dump_stack); |
3858 | 4183 | ||
3859 | trace.evlist = perf_evlist__new(); | 4184 | trace.evlist = evlist__new(); |
3860 | trace.sctbl = syscalltbl__new(); | 4185 | trace.sctbl = syscalltbl__new(); |
3861 | 4186 | ||
3862 | if (trace.evlist == NULL || trace.sctbl == NULL) { | 4187 | if (trace.evlist == NULL || trace.sctbl == NULL) { |
@@ -3895,8 +4220,23 @@ int cmd_trace(int argc, const char **argv) | |||
3895 | 4220 | ||
3896 | if (evsel) { | 4221 | if (evsel) { |
3897 | trace.syscalls.events.augmented = evsel; | 4222 | trace.syscalls.events.augmented = evsel; |
4223 | |||
4224 | evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); | ||
4225 | if (evsel == NULL) { | ||
4226 | pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n"); | ||
4227 | goto out; | ||
4228 | } | ||
4229 | |||
4230 | if (evsel->bpf_obj == NULL) { | ||
4231 | pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n"); | ||
4232 | goto out; | ||
4233 | } | ||
4234 | |||
4235 | trace.bpf_obj = evsel->bpf_obj; | ||
4236 | |||
3898 | trace__set_bpf_map_filtered_pids(&trace); | 4237 | trace__set_bpf_map_filtered_pids(&trace); |
3899 | trace__set_bpf_map_syscalls(&trace); | 4238 | trace__set_bpf_map_syscalls(&trace); |
4239 | trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); | ||
3900 | } | 4240 | } |
3901 | 4241 | ||
3902 | err = bpf__setup_stdout(trace.evlist); | 4242 | err = bpf__setup_stdout(trace.evlist); |
@@ -3909,7 +4249,7 @@ int cmd_trace(int argc, const char **argv) | |||
3909 | err = -1; | 4249 | err = -1; |
3910 | 4250 | ||
3911 | if (map_dump_str) { | 4251 | if (map_dump_str) { |
3912 | trace.dump.map = bpf__find_map_by_name(map_dump_str); | 4252 | trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str); |
3913 | if (trace.dump.map == NULL) { | 4253 | if (trace.dump.map == NULL) { |
3914 | pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); | 4254 | pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); |
3915 | goto out; | 4255 | goto out; |
@@ -3942,7 +4282,7 @@ int cmd_trace(int argc, const char **argv) | |||
3942 | symbol_conf.use_callchain = true; | 4282 | symbol_conf.use_callchain = true; |
3943 | } | 4283 | } |
3944 | 4284 | ||
3945 | if (trace.evlist->nr_entries > 0) { | 4285 | if (trace.evlist->core.nr_entries > 0) { |
3946 | evlist__set_evsel_handler(trace.evlist, trace__event_handler); | 4286 | evlist__set_evsel_handler(trace.evlist, trace__event_handler); |
3947 | if (evlist__set_syscall_tp_fields(trace.evlist)) { | 4287 | if (evlist__set_syscall_tp_fields(trace.evlist)) { |
3948 | perror("failed to set syscalls:* tracepoint fields"); | 4288 | perror("failed to set syscalls:* tracepoint fields"); |
@@ -3977,11 +4317,26 @@ int cmd_trace(int argc, const char **argv) | |||
3977 | 4317 | ||
3978 | if (trace.syscalls.events.augmented->priv == NULL && | 4318 | if (trace.syscalls.events.augmented->priv == NULL && |
3979 | strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { | 4319 | strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { |
3980 | struct perf_evsel *augmented = trace.syscalls.events.augmented; | 4320 | struct evsel *augmented = trace.syscalls.events.augmented; |
3981 | if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || | 4321 | if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || |
3982 | perf_evsel__init_augmented_syscall_tp_args(augmented)) | 4322 | perf_evsel__init_augmented_syscall_tp_args(augmented)) |
3983 | goto out; | 4323 | goto out; |
4324 | /* | ||
4325 | * Augmented is __augmented_syscalls__ BPF_OUTPUT event | ||
4326 | * Above we made sure we can get from the payload the tp fields | ||
4327 | * that we get from syscalls:sys_enter tracefs format file. | ||
4328 | */ | ||
3984 | augmented->handler = trace__sys_enter; | 4329 | augmented->handler = trace__sys_enter; |
4330 | /* | ||
4331 | * Now we do the same for the *syscalls:sys_enter event so that | ||
4332 | * if we handle it directly, i.e. if the BPF prog returns 0 so | ||
4333 | * as not to filter it, then we'll handle it just like we would | ||
4334 | * for the BPF_OUTPUT one: | ||
4335 | */ | ||
4336 | if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) || | ||
4337 | perf_evsel__init_augmented_syscall_tp_args(evsel)) | ||
4338 | goto out; | ||
4339 | evsel->handler = trace__sys_enter; | ||
3985 | } | 4340 | } |
3986 | 4341 | ||
3987 | if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { | 4342 | if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { |
@@ -4025,7 +4380,7 @@ init_augmented_syscall_tp: | |||
4025 | trace.summary = trace.summary_only; | 4380 | trace.summary = trace.summary_only; |
4026 | 4381 | ||
4027 | if (!trace.trace_syscalls && !trace.trace_pgfaults && | 4382 | if (!trace.trace_syscalls && !trace.trace_pgfaults && |
4028 | trace.evlist->nr_entries == 0 /* Was --events used? */) { | 4383 | trace.evlist->core.nr_entries == 0 /* Was --events used? */) { |
4029 | trace.trace_syscalls = true; | 4384 | trace.trace_syscalls = true; |
4030 | } | 4385 | } |
4031 | 4386 | ||
@@ -4037,6 +4392,10 @@ init_augmented_syscall_tp: | |||
4037 | } | 4392 | } |
4038 | } | 4393 | } |
4039 | 4394 | ||
4395 | err = evswitch__init(&trace.evswitch, trace.evlist, stderr); | ||
4396 | if (err) | ||
4397 | goto out_close; | ||
4398 | |||
4040 | err = target__validate(&trace.opts.target); | 4399 | err = target__validate(&trace.opts.target); |
4041 | if (err) { | 4400 | if (err) { |
4042 | target__strerror(&trace.opts.target, err, bf, sizeof(bf)); | 4401 | target__strerror(&trace.opts.target, err, bf, sizeof(bf)); |