perf augmented_syscalls: Start collecting pathnames in the BPF program

This is the start of having the raw_syscalls:sys_enter BPF handler collecting pointer arguments, namely pathnames, and with two syscalls that have that pointer in different arguments, "open" as it as its first argument, "openat" as the second. With this in place the existing beautifiers in 'perf trace' works, those args are shown instead of just the pointer that comes with the syscalls tracepoints. This also serves to show and document pitfalls in the process of using just that place in the kernel (raw_syscalls:sys_enter) plus tables provided by userspace to collect syscall pointer arguments. One is the need to use a barrier, as suggested by Edward, to avoid clang optimizations that make the kernel BPF verifier to refuse loading our pointer contents collector. The end result should be a generic eBPF program that works in all architectures, with the differences amongst archs resolved by the userspace component, 'perf trace', that should get all its tables created automatically from the kernel components where they are defined, via string table constructors for things not expressed in BTF/DWARF (enums, structs, etc), and otherwise using those observability files (BTF). Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: Edward Cree <ecree@solarflare.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Martin KaFai Lau <kafai@fb.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Yonghong Song <yhs@fb.com> Link: https://lkml.kernel.org/n/tip-37dz54pmotgpnwg9tb6zuk9j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
author: Arnaldo Carvalho de Melo <acme@redhat.com> 2018-11-05 10:23:40 -0500
committer: Arnaldo Carvalho de Melo <acme@redhat.com> 2018-11-05 10:41:10 -0500
commit: 79ef68c7e1f665578e005b454480b6eca60edabe (patch)
tree: f75551a4b0800654f63192946f84169c4e6ad40e
parent: cd26ea6d50a207ee37e0364ecc2d196d6c9671e8 (diff)
1 files changed, 72 insertions, 0 deletions
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index cde91c34b101..90a19336310b 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -37,15 +37,87 @@ struct syscall_exit_args {
        long               ret;
 };
+struct augmented_filename {
+        unsigned int    size;
+        int             reserved;
+        char            value[256];
+};
+#define SYS_OPEN 2
+#define SYS_OPENAT 257
 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
        struct {
                struct syscall_enter_args args;
+                struct augmented_filename filename;
        } augmented_args;
        unsigned int len = sizeof(augmented_args);
+        const void *filename_arg = NULL;
        probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+        /*
+         * Yonghong and Edward Cree sayz:
+         *
+         * https://www.spinics.net/lists/netdev/msg531645.html
+         *
+         * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
+         * >> 10: (bf) r1 = r6
+         * >> 11: (07) r1 += 16
+         * >> 12: (05) goto pc+2
+         * >> 15: (79) r3 = *(u64 *)(r1 +0)
+         * >> dereference of modified ctx ptr R1 off=16 disallowed
+         * > Aha, we at least got a different error message this time.
+         * > And indeed llvm has done that optimisation, rather than the more obvious
+         * > 11: r3 = *(u64 *)(r1 +16)
+         * > because it wants to have lots of reads share a single insn.  You may be able
+         * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone
+         * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
+         * > when it's generating for bpf backend and not do that).  -O0?  ¯\_(ツ)_/¯
+         *
+         * The optimization mostly likes below:
+         *
+         *      br1:
+         *      ...
+         *      r1 += 16
+         *      goto merge
+         *      br2:
+         *      ...
+         *      r1 += 20
+         *      goto merge
+         *      merge:
+         *      *(u64 *)(r1 + 0)
+         *
+         * The compiler tries to merge common loads. There is no easy way to
+         * stop this compiler optimization without turning off a lot of other
+         * optimizations. The easiest way is to add barriers:
+         *
+         *       __asm__ __volatile__("": : :"memory")
+         *
+         *       after the ctx memory access to prevent their down stream merging.
+         */
+        switch (augmented_args.args.syscall_nr) {
+        case SYS_OPEN:   filename_arg = (const void *)args->args[0];
+                        __asm__ __volatile__("": : :"memory");
+                         break;
+        case SYS_OPENAT: filename_arg = (const void *)args->args[1];
+                         break;
+        }
+        if (filename_arg != NULL) {
+                augmented_args.filename.reserved = 0;
+                augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
+                                                              sizeof(augmented_args.filename.value),
+                                                              filename_arg);
+                if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
+                        len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
+                        len &= sizeof(augmented_args.filename.value) - 1;
+                }
+        } else {
+                len = sizeof(augmented_args.args);
+        }
        perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
        return 0;
 }
author	Arnaldo Carvalho de Melo <acme@redhat.com>	2018-11-05 10:23:40 -0500
committer	Arnaldo Carvalho de Melo <acme@redhat.com>	2018-11-05 10:41:10 -0500
commit	79ef68c7e1f665578e005b454480b6eca60edabe (patch)
tree	f75551a4b0800654f63192946f84169c4e6ad40e
parent	cd26ea6d50a207ee37e0364ecc2d196d6c9671e8 (diff)

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index cde91c34b101..90a19336310b 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -37,15 +37,87 @@ struct syscall_exit_args {
37	long ret;	37	long ret;
38	};	38	};
39		39
		40	struct augmented_filename {
		41	unsigned int size;
		42	int reserved;
		43	char value[256];
		44	};
		45
		46	#define SYS_OPEN 2
		47	#define SYS_OPENAT 257
		48
40	SEC("raw_syscalls:sys_enter")	49	SEC("raw_syscalls:sys_enter")
41	int sys_enter(struct syscall_enter_args *args)	50	int sys_enter(struct syscall_enter_args *args)
42	{	51	{
43	struct {	52	struct {
44	struct syscall_enter_args args;	53	struct syscall_enter_args args;
		54	struct augmented_filename filename;
45	} augmented_args;	55	} augmented_args;
46	unsigned int len = sizeof(augmented_args);	56	unsigned int len = sizeof(augmented_args);
		57	const void *filename_arg = NULL;
47		58
48	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);	59	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
		60	/*
		61	* Yonghong and Edward Cree sayz:
		62	*
		63	* https://www.spinics.net/lists/netdev/msg531645.html
		64	*
		65	* >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
		66	* >> 10: (bf) r1 = r6
		67	* >> 11: (07) r1 += 16
		68	* >> 12: (05) goto pc+2
		69	* >> 15: (79) r3 = (u64 )(r1 +0)
		70	* >> dereference of modified ctx ptr R1 off=16 disallowed
		71	* > Aha, we at least got a different error message this time.
		72	* > And indeed llvm has done that optimisation, rather than the more obvious
		73	* > 11: r3 = (u64 )(r1 +16)
		74	* > because it wants to have lots of reads share a single insn. You may be able
		75	* > to defeat that optimisation by adding compiler barriers, idk. Maybe someone
		76	* > with llvm knowledge can figure out how to stop it (ideally, llvm would know
		77	* > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯
		78	*
		79	* The optimization mostly likes below:
		80	*
		81	* br1:
		82	* ...
		83	* r1 += 16
		84	* goto merge
		85	* br2:
		86	* ...
		87	* r1 += 20
		88	* goto merge
		89	* merge:
		90	* (u64 )(r1 + 0)
		91	*
		92	* The compiler tries to merge common loads. There is no easy way to
		93	* stop this compiler optimization without turning off a lot of other
		94	* optimizations. The easiest way is to add barriers:
		95	*
		96	* __asm__ __volatile__("": : :"memory")
		97	*
		98	* after the ctx memory access to prevent their down stream merging.
		99	*/
		100	switch (augmented_args.args.syscall_nr) {
		101	case SYS_OPEN: filename_arg = (const void *)args->args[0];
		102	__asm__ __volatile__("": : :"memory");
		103	break;
		104	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
		105	break;
		106	}
		107
		108	if (filename_arg != NULL) {
		109	augmented_args.filename.reserved = 0;
		110	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
		111	sizeof(augmented_args.filename.value),
		112	filename_arg);
		113	if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
		114	len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
		115	len &= sizeof(augmented_args.filename.value) - 1;
		116	}
		117	} else {
		118	len = sizeof(augmented_args.args);
		119	}
		120
49	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);	121	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
50	return 0;	122	return 0;
51	}	123	}