summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2018-12-19 16:54:36 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-12-21 07:42:46 -0500
commitb9b6a2ea2baf69204a6e5f311e0d24fe3b956f2e (patch)
treecc33bfffc4765548bae52631551455958d486653 /tools
parent14541b1e7e723859ff2c75c6fc10cdbbec6b8c34 (diff)
perf trace: Do not hardcode the size of the tracepoint common_ fields
We shouldn't hardcode the size of the tracepoint common_ fields, use the offset of the 'id'/'__syscallnr' field in the sys_enter event instead. This caused the augmented syscalls code to fail on a particular build of a PREEMPT_RT_FULL kernel where these extra 'common_migrate_disable' and 'common_padding' fields were before the syscall id one: # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/format name: sys_enter ID: 22 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:unsigned short common_migrate_disable; offset:8; size:2; signed:0; field:unsigned short common_padding; offset:10; size:2; signed:0; field:long id; offset:16; size:8; signed:1; field:unsigned long args[6]; offset:24; size:48; signed:0; print fmt: "NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", REC->id, REC->args[0], REC->args[1], REC->args[2], REC->args[3], REC->args[4], REC->args[5] # All those 'common_' prefixed fields are zeroed when they hit a BPF tracepoint hook, we better just discard those, i.e. somehow pass an offset to the BPF program from the start of the ctx and make adjustments in the 'perf trace' handlers to adjust the offset of the syscall arg offsets obtained from tracefs. Till then, fix it the quick way and add this to the augmented_raw_syscalls.c to bet it to work in such kernels: diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..1f746f931e13 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -38,12 +38,14 @@ struct bpf_map SEC("maps") syscalls = { struct syscall_enter_args { unsigned long long common_tp_fields; + long rt_common_tp_fields; long syscall_nr; unsigned long args[6]; }; struct syscall_exit_args { unsigned long long common_tp_fields; + long rt_common_tp_fields; long syscall_nr; long ret; }; Just to check that this was the case. Fix it properly later, for now remove the hardcoding of the offset in the 'perf trace' side and document the situation with this patch. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-2pqavrktqkliu5b9nzouio21@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-trace.c73
1 files changed, 52 insertions, 21 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6689c1a114fe..1e9e886b2811 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -112,8 +112,9 @@ struct trace {
112 } stats; 112 } stats;
113 unsigned int max_stack; 113 unsigned int max_stack;
114 unsigned int min_stack; 114 unsigned int min_stack;
115 bool sort_events; 115 int raw_augmented_syscalls_args_size;
116 bool raw_augmented_syscalls; 116 bool raw_augmented_syscalls;
117 bool sort_events;
117 bool not_ev_qualifier; 118 bool not_ev_qualifier;
118 bool live; 119 bool live;
119 bool full_time; 120 bool full_time;
@@ -283,12 +284,17 @@ out_delete:
283 return -ENOENT; 284 return -ENOENT;
284} 285}
285 286
286static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel) 287static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp)
287{ 288{
288 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); 289 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
289 290
290 if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */ 291 if (evsel->priv != NULL) {
291 if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap)) 292 struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
293 if (syscall_id == NULL)
294 syscall_id = perf_evsel__field(tp, "__syscall_nr");
295 if (syscall_id == NULL)
296 goto out_delete;
297 if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
292 goto out_delete; 298 goto out_delete;
293 299
294 return 0; 300 return 0;
@@ -1768,16 +1774,16 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1768 return printed; 1774 return printed;
1769} 1775}
1770 1776
1771static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented) 1777static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
1772{ 1778{
1773 void *augmented_args = NULL; 1779 void *augmented_args = NULL;
1774 /* 1780 /*
1775 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter 1781 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
1776 * and there we get all 6 syscall args plus the tracepoint common 1782 * and there we get all 6 syscall args plus the tracepoint common fields
1777 * fields (sizeof(long)) and the syscall_nr (another long). So we check 1783 * that gets calculated at the start and the syscall_nr (another long).
1778 * if that is the case and if so don't look after the sc->args_size, 1784 * So we check if that is the case and if so don't look after the
1779 * but always after the full raw_syscalls:sys_enter payload, which is 1785 * sc->args_size but always after the full raw_syscalls:sys_enter payload,
1780 * fixed. 1786 * which is fixed.
1781 * 1787 *
1782 * We'll revisit this later to pass s->args_size to the BPF augmenter 1788 * We'll revisit this later to pass s->args_size to the BPF augmenter
1783 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it 1789 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
@@ -1785,7 +1791,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
1785 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace 1791 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
1786 * traffic to just what is needed for each syscall. 1792 * traffic to just what is needed for each syscall.
1787 */ 1793 */
1788 int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size; 1794 int args_size = raw_augmented_args_size ?: sc->args_size;
1789 1795
1790 *augmented_args_size = sample->raw_size - args_size; 1796 *augmented_args_size = sample->raw_size - args_size;
1791 if (*augmented_args_size > 0) 1797 if (*augmented_args_size > 0)
@@ -1839,7 +1845,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1839 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. 1845 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
1840 */ 1846 */
1841 if (evsel != trace->syscalls.events.sys_enter) 1847 if (evsel != trace->syscalls.events.sys_enter)
1842 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); 1848 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
1843 ttrace->entry_time = sample->time; 1849 ttrace->entry_time = sample->time;
1844 msg = ttrace->entry_str; 1850 msg = ttrace->entry_str;
1845 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1851 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
@@ -1897,7 +1903,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
1897 goto out_put; 1903 goto out_put;
1898 1904
1899 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1905 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1900 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); 1906 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
1901 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); 1907 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
1902 fprintf(trace->output, "%s", msg); 1908 fprintf(trace->output, "%s", msg);
1903 err = 0; 1909 err = 0;
@@ -3814,13 +3820,6 @@ int cmd_trace(int argc, const char **argv)
3814 * syscall. 3820 * syscall.
3815 */ 3821 */
3816 if (trace.syscalls.events.augmented) { 3822 if (trace.syscalls.events.augmented) {
3817 evsel = trace.syscalls.events.augmented;
3818
3819 if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3820 perf_evsel__init_augmented_syscall_tp_args(evsel))
3821 goto out;
3822 evsel->handler = trace__sys_enter;
3823
3824 evlist__for_each_entry(trace.evlist, evsel) { 3823 evlist__for_each_entry(trace.evlist, evsel) {
3825 bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; 3824 bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
3826 3825
@@ -3829,9 +3828,41 @@ int cmd_trace(int argc, const char **argv)
3829 goto init_augmented_syscall_tp; 3828 goto init_augmented_syscall_tp;
3830 } 3829 }
3831 3830
3831 if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) {
3832 struct perf_evsel *augmented = trace.syscalls.events.augmented;
3833 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
3834 perf_evsel__init_augmented_syscall_tp_args(augmented))
3835 goto out;
3836 augmented->handler = trace__sys_enter;
3837 }
3838
3832 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { 3839 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
3840 struct syscall_tp *sc;
3833init_augmented_syscall_tp: 3841init_augmented_syscall_tp:
3834 perf_evsel__init_augmented_syscall_tp(evsel); 3842 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
3843 goto out;
3844 sc = evsel->priv;
3845 /*
3846 * For now with BPF raw_augmented we hook into
3847 * raw_syscalls:sys_enter and there we get all
3848 * 6 syscall args plus the tracepoint common
3849 * fields and the syscall_nr (another long).
3850 * So we check if that is the case and if so
3851 * don't look after the sc->args_size but
3852 * always after the full raw_syscalls:sys_enter
3853 * payload, which is fixed.
3854 *
3855 * We'll revisit this later to pass
3856 * s->args_size to the BPF augmenter (now
3857 * tools/perf/examples/bpf/augmented_raw_syscalls.c,
3858 * so that it copies only what we need for each
3859 * syscall, like what happens when we use
3860 * syscalls:sys_enter_NAME, so that we reduce
3861 * the kernel/userspace traffic to just what is
3862 * needed for each syscall.
3863 */
3864 if (trace.raw_augmented_syscalls)
3865 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
3835 perf_evsel__init_augmented_syscall_tp_ret(evsel); 3866 perf_evsel__init_augmented_syscall_tp_ret(evsel);
3836 evsel->handler = trace__sys_exit; 3867 evsel->handler = trace__sys_exit;
3837 } 3868 }