diff options
| author | Alexei Starovoitov <ast@plumgrid.com> | 2015-05-19 19:59:05 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-05-21 17:07:59 -0400 |
| commit | 5bacd7805ab4f07a69c7ef4b1d45ce553d2b1c3a (patch) | |
| tree | 6e2b12a280e4431e2a5c3d971a8d3ebad4c0b5bc /samples | |
| parent | b52f00e6a7154308a08d0a2edab621f277801a2c (diff) | |
samples/bpf: bpf_tail_call example for tracing
kprobe example that demonstrates how future seccomp programs may look like.
It attaches to seccomp_phase1() function and tail-calls other BPF programs
depending on syscall number.
Existing optimized classic BPF seccomp programs generated by Chrome look like:
if (sd.nr < 121) {
if (sd.nr < 57) {
if (sd.nr < 22) {
if (sd.nr < 7) {
if (sd.nr < 4) {
if (sd.nr < 1) {
check sys_read
} else {
if (sd.nr < 3) {
check sys_write and sys_open
} else {
check sys_close
}
}
} else {
} else {
} else {
} else {
} else {
}
the future seccomp using native eBPF may look like:
bpf_tail_call(&sd, &syscall_jmp_table, sd.nr);
which is simpler, faster and leaves more room for per-syscall checks.
Usage:
$ sudo ./tracex5
<...>-366 [001] d... 4.870033: : read(fd=1, buf=00007f6d5bebf000, size=771)
<...>-369 [003] d... 4.870066: : mmap
<...>-369 [003] d... 4.870077: : syscall=110 (one of get/set uid/pid/gid)
<...>-369 [003] d... 4.870089: : syscall=107 (one of get/set uid/pid/gid)
sh-369 [000] d... 4.891740: : read(fd=0, buf=00000000023d1000, size=512)
sh-369 [000] d... 4.891747: : write(fd=1, buf=00000000023d3000, size=512)
sh-369 [000] d... 4.891747: : read(fd=1, buf=00000000023d3000, size=512)
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
| -rw-r--r-- | samples/bpf/Makefile | 4 | ||||
| -rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
| -rw-r--r-- | samples/bpf/bpf_load.c | 57 | ||||
| -rw-r--r-- | samples/bpf/tracex5_kern.c | 75 | ||||
| -rw-r--r-- | samples/bpf/tracex5_user.c | 46 |
5 files changed, 172 insertions, 12 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8fdbd73429dd..ded10d05617e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
| @@ -10,6 +10,7 @@ hostprogs-y += tracex1 | |||
| 10 | hostprogs-y += tracex2 | 10 | hostprogs-y += tracex2 |
| 11 | hostprogs-y += tracex3 | 11 | hostprogs-y += tracex3 |
| 12 | hostprogs-y += tracex4 | 12 | hostprogs-y += tracex4 |
| 13 | hostprogs-y += tracex5 | ||
| 13 | 14 | ||
| 14 | test_verifier-objs := test_verifier.o libbpf.o | 15 | test_verifier-objs := test_verifier.o libbpf.o |
| 15 | test_maps-objs := test_maps.o libbpf.o | 16 | test_maps-objs := test_maps.o libbpf.o |
| @@ -20,6 +21,7 @@ tracex1-objs := bpf_load.o libbpf.o tracex1_user.o | |||
| 20 | tracex2-objs := bpf_load.o libbpf.o tracex2_user.o | 21 | tracex2-objs := bpf_load.o libbpf.o tracex2_user.o |
| 21 | tracex3-objs := bpf_load.o libbpf.o tracex3_user.o | 22 | tracex3-objs := bpf_load.o libbpf.o tracex3_user.o |
| 22 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o | 23 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o |
| 24 | tracex5-objs := bpf_load.o libbpf.o tracex5_user.o | ||
| 23 | 25 | ||
| 24 | # Tell kbuild to always build the programs | 26 | # Tell kbuild to always build the programs |
| 25 | always := $(hostprogs-y) | 27 | always := $(hostprogs-y) |
| @@ -29,6 +31,7 @@ always += tracex1_kern.o | |||
| 29 | always += tracex2_kern.o | 31 | always += tracex2_kern.o |
| 30 | always += tracex3_kern.o | 32 | always += tracex3_kern.o |
| 31 | always += tracex4_kern.o | 33 | always += tracex4_kern.o |
| 34 | always += tracex5_kern.o | ||
| 32 | always += tcbpf1_kern.o | 35 | always += tcbpf1_kern.o |
| 33 | 36 | ||
| 34 | HOSTCFLAGS += -I$(objtree)/usr/include | 37 | HOSTCFLAGS += -I$(objtree)/usr/include |
| @@ -40,6 +43,7 @@ HOSTLOADLIBES_tracex1 += -lelf | |||
| 40 | HOSTLOADLIBES_tracex2 += -lelf | 43 | HOSTLOADLIBES_tracex2 += -lelf |
| 41 | HOSTLOADLIBES_tracex3 += -lelf | 44 | HOSTLOADLIBES_tracex3 += -lelf |
| 42 | HOSTLOADLIBES_tracex4 += -lelf -lrt | 45 | HOSTLOADLIBES_tracex4 += -lelf -lrt |
| 46 | HOSTLOADLIBES_tracex5 += -lelf | ||
| 43 | 47 | ||
| 44 | # point this to your LLVM backend with bpf support | 48 | # point this to your LLVM backend with bpf support |
| 45 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc | 49 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc |
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5fb3ed8..699ed8dbdd64 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
| @@ -21,6 +21,8 @@ static unsigned long long (*bpf_ktime_get_ns)(void) = | |||
| 21 | (void *) BPF_FUNC_ktime_get_ns; | 21 | (void *) BPF_FUNC_ktime_get_ns; |
| 22 | static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = | 22 | static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = |
| 23 | (void *) BPF_FUNC_trace_printk; | 23 | (void *) BPF_FUNC_trace_printk; |
| 24 | static void (*bpf_tail_call)(void *ctx, void *map, int index) = | ||
| 25 | (void *) BPF_FUNC_tail_call; | ||
| 24 | 26 | ||
| 25 | /* llvm builtin functions that eBPF C program may use to | 27 | /* llvm builtin functions that eBPF C program may use to |
| 26 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 28 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 38dac5a53b51..da86a8e0a95a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <sys/ioctl.h> | 16 | #include <sys/ioctl.h> |
| 17 | #include <sys/mman.h> | 17 | #include <sys/mman.h> |
| 18 | #include <poll.h> | 18 | #include <poll.h> |
| 19 | #include <ctype.h> | ||
| 19 | #include "libbpf.h" | 20 | #include "libbpf.h" |
| 20 | #include "bpf_helpers.h" | 21 | #include "bpf_helpers.h" |
| 21 | #include "bpf_load.h" | 22 | #include "bpf_load.h" |
| @@ -29,6 +30,19 @@ int map_fd[MAX_MAPS]; | |||
| 29 | int prog_fd[MAX_PROGS]; | 30 | int prog_fd[MAX_PROGS]; |
| 30 | int event_fd[MAX_PROGS]; | 31 | int event_fd[MAX_PROGS]; |
| 31 | int prog_cnt; | 32 | int prog_cnt; |
| 33 | int prog_array_fd = -1; | ||
| 34 | |||
| 35 | static int populate_prog_array(const char *event, int prog_fd) | ||
| 36 | { | ||
| 37 | int ind = atoi(event), err; | ||
| 38 | |||
| 39 | err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); | ||
| 40 | if (err < 0) { | ||
| 41 | printf("failed to store prog_fd in prog_array\n"); | ||
| 42 | return -1; | ||
| 43 | } | ||
| 44 | return 0; | ||
| 45 | } | ||
| 32 | 46 | ||
| 33 | static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | 47 | static int load_and_attach(const char *event, struct bpf_insn *prog, int size) |
| 34 | { | 48 | { |
| @@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
| 54 | return -1; | 68 | return -1; |
| 55 | } | 69 | } |
| 56 | 70 | ||
| 71 | fd = bpf_prog_load(prog_type, prog, size, license, kern_version); | ||
| 72 | if (fd < 0) { | ||
| 73 | printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); | ||
| 74 | return -1; | ||
| 75 | } | ||
| 76 | |||
| 77 | prog_fd[prog_cnt++] = fd; | ||
| 78 | |||
| 79 | if (is_socket) { | ||
| 80 | event += 6; | ||
| 81 | if (*event != '/') | ||
| 82 | return 0; | ||
| 83 | event++; | ||
| 84 | if (!isdigit(*event)) { | ||
| 85 | printf("invalid prog number\n"); | ||
| 86 | return -1; | ||
| 87 | } | ||
| 88 | return populate_prog_array(event, fd); | ||
| 89 | } | ||
| 90 | |||
| 57 | if (is_kprobe || is_kretprobe) { | 91 | if (is_kprobe || is_kretprobe) { |
| 58 | if (is_kprobe) | 92 | if (is_kprobe) |
| 59 | event += 7; | 93 | event += 7; |
| 60 | else | 94 | else |
| 61 | event += 10; | 95 | event += 10; |
| 62 | 96 | ||
| 97 | if (*event == 0) { | ||
| 98 | printf("event name cannot be empty\n"); | ||
| 99 | return -1; | ||
| 100 | } | ||
| 101 | |||
| 102 | if (isdigit(*event)) | ||
| 103 | return populate_prog_array(event, fd); | ||
| 104 | |||
| 63 | snprintf(buf, sizeof(buf), | 105 | snprintf(buf, sizeof(buf), |
| 64 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", | 106 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", |
| 65 | is_kprobe ? 'p' : 'r', event, event); | 107 | is_kprobe ? 'p' : 'r', event, event); |
| @@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
| 71 | } | 113 | } |
| 72 | } | 114 | } |
| 73 | 115 | ||
| 74 | fd = bpf_prog_load(prog_type, prog, size, license, kern_version); | ||
| 75 | |||
| 76 | if (fd < 0) { | ||
| 77 | printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); | ||
| 78 | return -1; | ||
| 79 | } | ||
| 80 | |||
| 81 | prog_fd[prog_cnt++] = fd; | ||
| 82 | |||
| 83 | if (is_socket) | ||
| 84 | return 0; | ||
| 85 | |||
| 86 | strcpy(buf, DEBUGFS); | 116 | strcpy(buf, DEBUGFS); |
| 87 | strcat(buf, "events/kprobes/"); | 117 | strcat(buf, "events/kprobes/"); |
| 88 | strcat(buf, event); | 118 | strcat(buf, event); |
| @@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len) | |||
| 130 | maps[i].max_entries); | 160 | maps[i].max_entries); |
| 131 | if (map_fd[i] < 0) | 161 | if (map_fd[i] < 0) |
| 132 | return 1; | 162 | return 1; |
| 163 | |||
| 164 | if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) | ||
| 165 | prog_array_fd = map_fd[i]; | ||
| 133 | } | 166 | } |
| 134 | return 0; | 167 | return 0; |
| 135 | } | 168 | } |
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c new file mode 100644 index 000000000000..b71fe07a7a7a --- /dev/null +++ b/samples/bpf/tracex5_kern.c | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com | ||
| 2 | * | ||
| 3 | * This program is free software; you can redistribute it and/or | ||
| 4 | * modify it under the terms of version 2 of the GNU General Public | ||
| 5 | * License as published by the Free Software Foundation. | ||
| 6 | */ | ||
| 7 | #include <linux/ptrace.h> | ||
| 8 | #include <linux/version.h> | ||
| 9 | #include <uapi/linux/bpf.h> | ||
| 10 | #include <uapi/linux/seccomp.h> | ||
| 11 | #include "bpf_helpers.h" | ||
| 12 | |||
| 13 | #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F | ||
| 14 | |||
| 15 | struct bpf_map_def SEC("maps") progs = { | ||
| 16 | .type = BPF_MAP_TYPE_PROG_ARRAY, | ||
| 17 | .key_size = sizeof(u32), | ||
| 18 | .value_size = sizeof(u32), | ||
| 19 | .max_entries = 1024, | ||
| 20 | }; | ||
| 21 | |||
| 22 | SEC("kprobe/seccomp_phase1") | ||
| 23 | int bpf_prog1(struct pt_regs *ctx) | ||
| 24 | { | ||
| 25 | struct seccomp_data sd = {}; | ||
| 26 | |||
| 27 | bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); | ||
| 28 | |||
| 29 | /* dispatch into next BPF program depending on syscall number */ | ||
| 30 | bpf_tail_call(ctx, &progs, sd.nr); | ||
| 31 | |||
| 32 | /* fall through -> unknown syscall */ | ||
| 33 | if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { | ||
| 34 | char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; | ||
| 35 | bpf_trace_printk(fmt, sizeof(fmt), sd.nr); | ||
| 36 | } | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | |||
| 40 | /* we jump here when syscall number == __NR_write */ | ||
| 41 | PROG(__NR_write)(struct pt_regs *ctx) | ||
| 42 | { | ||
| 43 | struct seccomp_data sd = {}; | ||
| 44 | |||
| 45 | bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); | ||
| 46 | if (sd.args[2] == 512) { | ||
| 47 | char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; | ||
| 48 | bpf_trace_printk(fmt, sizeof(fmt), | ||
| 49 | sd.args[0], sd.args[1], sd.args[2]); | ||
| 50 | } | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | PROG(__NR_read)(struct pt_regs *ctx) | ||
| 55 | { | ||
| 56 | struct seccomp_data sd = {}; | ||
| 57 | |||
| 58 | bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); | ||
| 59 | if (sd.args[2] > 128 && sd.args[2] <= 1024) { | ||
| 60 | char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; | ||
| 61 | bpf_trace_printk(fmt, sizeof(fmt), | ||
| 62 | sd.args[0], sd.args[1], sd.args[2]); | ||
| 63 | } | ||
| 64 | return 0; | ||
| 65 | } | ||
| 66 | |||
| 67 | PROG(__NR_mmap)(struct pt_regs *ctx) | ||
| 68 | { | ||
| 69 | char fmt[] = "mmap\n"; | ||
| 70 | bpf_trace_printk(fmt, sizeof(fmt)); | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | char _license[] SEC("license") = "GPL"; | ||
| 75 | u32 _version SEC("version") = LINUX_VERSION_CODE; | ||
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c new file mode 100644 index 000000000000..a04dd3cd4358 --- /dev/null +++ b/samples/bpf/tracex5_user.c | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include <linux/bpf.h> | ||
| 3 | #include <unistd.h> | ||
| 4 | #include <linux/filter.h> | ||
| 5 | #include <linux/seccomp.h> | ||
| 6 | #include <sys/prctl.h> | ||
| 7 | #include "libbpf.h" | ||
| 8 | #include "bpf_load.h" | ||
| 9 | |||
| 10 | /* install fake seccomp program to enable seccomp code path inside the kernel, | ||
| 11 | * so that our kprobe attached to seccomp_phase1() can be triggered | ||
| 12 | */ | ||
| 13 | static void install_accept_all_seccomp(void) | ||
| 14 | { | ||
| 15 | struct sock_filter filter[] = { | ||
| 16 | BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), | ||
| 17 | }; | ||
| 18 | struct sock_fprog prog = { | ||
| 19 | .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), | ||
| 20 | .filter = filter, | ||
| 21 | }; | ||
| 22 | if (prctl(PR_SET_SECCOMP, 2, &prog)) | ||
| 23 | perror("prctl"); | ||
| 24 | } | ||
| 25 | |||
| 26 | int main(int ac, char **argv) | ||
| 27 | { | ||
| 28 | FILE *f; | ||
| 29 | char filename[256]; | ||
| 30 | |||
| 31 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
| 32 | |||
| 33 | if (load_bpf_file(filename)) { | ||
| 34 | printf("%s", bpf_log_buf); | ||
| 35 | return 1; | ||
| 36 | } | ||
| 37 | |||
| 38 | install_accept_all_seccomp(); | ||
| 39 | |||
| 40 | f = popen("dd if=/dev/zero of=/dev/null count=5", "r"); | ||
| 41 | (void) f; | ||
| 42 | |||
| 43 | read_trace_pipe(); | ||
| 44 | |||
| 45 | return 0; | ||
| 46 | } | ||
