diff options
| author | Alexei Starovoitov <ast@plumgrid.com> | 2015-10-20 23:02:35 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-10-22 09:42:15 -0400 |
| commit | 39111695b1b88a21e81983a38786d877e770da81 (patch) | |
| tree | 33c57207f5ed18a5408ed9c25ec0ab169ec0fb5d /samples | |
| parent | a43eec304259a6c637f4014a6d4767159b6a3aa3 (diff) | |
samples: bpf: add bpf_perf_event_output example
Performance test and example of bpf_perf_event_output().
kprobe is attached to sys_write() and trivial bpf program streams
pid+cookie into userspace via PERF_COUNT_SW_BPF_OUTPUT event.
Usage:
$ sudo ./bld_x64/samples/bpf/trace_output
recv 2968913 events per sec
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
| -rw-r--r-- | samples/bpf/Makefile | 7 | ||||
| -rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
| -rw-r--r-- | samples/bpf/trace_output_kern.c | 31 | ||||
| -rw-r--r-- | samples/bpf/trace_output_user.c | 196 |
4 files changed, 236 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 63e7d50e6a4f..b30514514e37 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
| @@ -13,6 +13,7 @@ hostprogs-y += tracex3 | |||
| 13 | hostprogs-y += tracex4 | 13 | hostprogs-y += tracex4 |
| 14 | hostprogs-y += tracex5 | 14 | hostprogs-y += tracex5 |
| 15 | hostprogs-y += tracex6 | 15 | hostprogs-y += tracex6 |
| 16 | hostprogs-y += trace_output | ||
| 16 | hostprogs-y += lathist | 17 | hostprogs-y += lathist |
| 17 | 18 | ||
| 18 | test_verifier-objs := test_verifier.o libbpf.o | 19 | test_verifier-objs := test_verifier.o libbpf.o |
| @@ -27,6 +28,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o | |||
| 27 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o | 28 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o |
| 28 | tracex5-objs := bpf_load.o libbpf.o tracex5_user.o | 29 | tracex5-objs := bpf_load.o libbpf.o tracex5_user.o |
| 29 | tracex6-objs := bpf_load.o libbpf.o tracex6_user.o | 30 | tracex6-objs := bpf_load.o libbpf.o tracex6_user.o |
| 31 | trace_output-objs := bpf_load.o libbpf.o trace_output_user.o | ||
| 30 | lathist-objs := bpf_load.o libbpf.o lathist_user.o | 32 | lathist-objs := bpf_load.o libbpf.o lathist_user.o |
| 31 | 33 | ||
| 32 | # Tell kbuild to always build the programs | 34 | # Tell kbuild to always build the programs |
| @@ -40,6 +42,7 @@ always += tracex3_kern.o | |||
| 40 | always += tracex4_kern.o | 42 | always += tracex4_kern.o |
| 41 | always += tracex5_kern.o | 43 | always += tracex5_kern.o |
| 42 | always += tracex6_kern.o | 44 | always += tracex6_kern.o |
| 45 | always += trace_output_kern.o | ||
| 43 | always += tcbpf1_kern.o | 46 | always += tcbpf1_kern.o |
| 44 | always += lathist_kern.o | 47 | always += lathist_kern.o |
| 45 | 48 | ||
| @@ -55,6 +58,7 @@ HOSTLOADLIBES_tracex3 += -lelf | |||
| 55 | HOSTLOADLIBES_tracex4 += -lelf -lrt | 58 | HOSTLOADLIBES_tracex4 += -lelf -lrt |
| 56 | HOSTLOADLIBES_tracex5 += -lelf | 59 | HOSTLOADLIBES_tracex5 += -lelf |
| 57 | HOSTLOADLIBES_tracex6 += -lelf | 60 | HOSTLOADLIBES_tracex6 += -lelf |
| 61 | HOSTLOADLIBES_trace_output += -lelf -lrt | ||
| 58 | HOSTLOADLIBES_lathist += -lelf | 62 | HOSTLOADLIBES_lathist += -lelf |
| 59 | 63 | ||
| 60 | # point this to your LLVM backend with bpf support | 64 | # point this to your LLVM backend with bpf support |
| @@ -64,3 +68,6 @@ $(obj)/%.o: $(src)/%.c | |||
| 64 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ | 68 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ |
| 65 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ | 69 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ |
| 66 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ | 70 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ |
| 71 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ | ||
| 72 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ | ||
| 73 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s | ||
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 21aa1b44c30c..b35c21e0b43f 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
| @@ -37,6 +37,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = | |||
| 37 | (void *) BPF_FUNC_clone_redirect; | 37 | (void *) BPF_FUNC_clone_redirect; |
| 38 | static int (*bpf_redirect)(int ifindex, int flags) = | 38 | static int (*bpf_redirect)(int ifindex, int flags) = |
| 39 | (void *) BPF_FUNC_redirect; | 39 | (void *) BPF_FUNC_redirect; |
| 40 | static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = | ||
| 41 | (void *) BPF_FUNC_perf_event_output; | ||
| 40 | 42 | ||
| 41 | /* llvm builtin functions that eBPF C program may use to | 43 | /* llvm builtin functions that eBPF C program may use to |
| 42 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 44 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c new file mode 100644 index 000000000000..8d8d1ec429eb --- /dev/null +++ b/samples/bpf/trace_output_kern.c | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | #include <linux/ptrace.h> | ||
| 2 | #include <linux/version.h> | ||
| 3 | #include <uapi/linux/bpf.h> | ||
| 4 | #include "bpf_helpers.h" | ||
| 5 | |||
| 6 | struct bpf_map_def SEC("maps") my_map = { | ||
| 7 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | ||
| 8 | .key_size = sizeof(int), | ||
| 9 | .value_size = sizeof(u32), | ||
| 10 | .max_entries = 2, | ||
| 11 | }; | ||
| 12 | |||
| 13 | SEC("kprobe/sys_write") | ||
| 14 | int bpf_prog1(struct pt_regs *ctx) | ||
| 15 | { | ||
| 16 | struct S { | ||
| 17 | u64 pid; | ||
| 18 | u64 cookie; | ||
| 19 | } data; | ||
| 20 | |||
| 21 | memset(&data, 0, sizeof(data)); | ||
| 22 | data.pid = bpf_get_current_pid_tgid(); | ||
| 23 | data.cookie = 0x12345678; | ||
| 24 | |||
| 25 | bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data)); | ||
| 26 | |||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | char _license[] SEC("license") = "GPL"; | ||
| 31 | u32 _version SEC("version") = LINUX_VERSION_CODE; | ||
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c new file mode 100644 index 000000000000..661a7d052f2c --- /dev/null +++ b/samples/bpf/trace_output_user.c | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | /* This program is free software; you can redistribute it and/or | ||
| 2 | * modify it under the terms of version 2 of the GNU General Public | ||
| 3 | * License as published by the Free Software Foundation. | ||
| 4 | */ | ||
| 5 | #include <stdio.h> | ||
| 6 | #include <unistd.h> | ||
| 7 | #include <stdlib.h> | ||
| 8 | #include <stdbool.h> | ||
| 9 | #include <string.h> | ||
| 10 | #include <fcntl.h> | ||
| 11 | #include <poll.h> | ||
| 12 | #include <sys/ioctl.h> | ||
| 13 | #include <linux/perf_event.h> | ||
| 14 | #include <linux/bpf.h> | ||
| 15 | #include <errno.h> | ||
| 16 | #include <assert.h> | ||
| 17 | #include <sys/syscall.h> | ||
| 18 | #include <sys/ioctl.h> | ||
| 19 | #include <sys/mman.h> | ||
| 20 | #include <time.h> | ||
| 21 | #include <signal.h> | ||
| 22 | #include "libbpf.h" | ||
| 23 | #include "bpf_load.h" | ||
| 24 | |||
| 25 | static int pmu_fd; | ||
| 26 | |||
| 27 | int page_size; | ||
| 28 | int page_cnt = 8; | ||
| 29 | volatile struct perf_event_mmap_page *header; | ||
| 30 | |||
| 31 | typedef void (*print_fn)(void *data, int size); | ||
| 32 | |||
| 33 | static int perf_event_mmap(int fd) | ||
| 34 | { | ||
| 35 | void *base; | ||
| 36 | int mmap_size; | ||
| 37 | |||
| 38 | page_size = getpagesize(); | ||
| 39 | mmap_size = page_size * (page_cnt + 1); | ||
| 40 | |||
| 41 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 42 | if (base == MAP_FAILED) { | ||
| 43 | printf("mmap err\n"); | ||
| 44 | return -1; | ||
| 45 | } | ||
| 46 | |||
| 47 | header = base; | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int perf_event_poll(int fd) | ||
| 52 | { | ||
| 53 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
| 54 | |||
| 55 | return poll(&pfd, 1, 1000); | ||
| 56 | } | ||
| 57 | |||
| 58 | struct perf_event_sample { | ||
| 59 | struct perf_event_header header; | ||
| 60 | __u32 size; | ||
| 61 | char data[]; | ||
| 62 | }; | ||
| 63 | |||
| 64 | void perf_event_read(print_fn fn) | ||
| 65 | { | ||
| 66 | __u64 data_tail = header->data_tail; | ||
| 67 | __u64 data_head = header->data_head; | ||
| 68 | __u64 buffer_size = page_cnt * page_size; | ||
| 69 | void *base, *begin, *end; | ||
| 70 | char buf[256]; | ||
| 71 | |||
| 72 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
| 73 | if (data_head == data_tail) | ||
| 74 | return; | ||
| 75 | |||
| 76 | base = ((char *)header) + page_size; | ||
| 77 | |||
| 78 | begin = base + data_tail % buffer_size; | ||
| 79 | end = base + data_head % buffer_size; | ||
| 80 | |||
| 81 | while (begin != end) { | ||
| 82 | struct perf_event_sample *e; | ||
| 83 | |||
| 84 | e = begin; | ||
| 85 | if (begin + e->header.size > base + buffer_size) { | ||
| 86 | long len = base + buffer_size - begin; | ||
| 87 | |||
| 88 | assert(len < e->header.size); | ||
| 89 | memcpy(buf, begin, len); | ||
| 90 | memcpy(buf + len, base, e->header.size - len); | ||
| 91 | e = (void *) buf; | ||
| 92 | begin = base + e->header.size - len; | ||
| 93 | } else if (begin + e->header.size == base + buffer_size) { | ||
| 94 | begin = base; | ||
| 95 | } else { | ||
| 96 | begin += e->header.size; | ||
| 97 | } | ||
| 98 | |||
| 99 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
| 100 | fn(e->data, e->size); | ||
| 101 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
| 102 | struct { | ||
| 103 | struct perf_event_header header; | ||
| 104 | __u64 id; | ||
| 105 | __u64 lost; | ||
| 106 | } *lost = (void *) e; | ||
| 107 | printf("lost %lld events\n", lost->lost); | ||
| 108 | } else { | ||
| 109 | printf("unknown event type=%d size=%d\n", | ||
| 110 | e->header.type, e->header.size); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | __sync_synchronize(); /* smp_mb() */ | ||
| 115 | header->data_tail = data_head; | ||
| 116 | } | ||
| 117 | |||
| 118 | static __u64 time_get_ns(void) | ||
| 119 | { | ||
| 120 | struct timespec ts; | ||
| 121 | |||
| 122 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
| 123 | return ts.tv_sec * 1000000000ull + ts.tv_nsec; | ||
| 124 | } | ||
| 125 | |||
| 126 | static __u64 start_time; | ||
| 127 | |||
| 128 | #define MAX_CNT 100000ll | ||
| 129 | |||
| 130 | static void print_bpf_output(void *data, int size) | ||
| 131 | { | ||
| 132 | static __u64 cnt; | ||
| 133 | struct { | ||
| 134 | __u64 pid; | ||
| 135 | __u64 cookie; | ||
| 136 | } *e = data; | ||
| 137 | |||
| 138 | if (e->cookie != 0x12345678) { | ||
| 139 | printf("BUG pid %llx cookie %llx sized %d\n", | ||
| 140 | e->pid, e->cookie, size); | ||
| 141 | kill(0, SIGINT); | ||
| 142 | } | ||
| 143 | |||
| 144 | cnt++; | ||
| 145 | |||
| 146 | if (cnt == MAX_CNT) { | ||
| 147 | printf("recv %lld events per sec\n", | ||
| 148 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | ||
| 149 | kill(0, SIGINT); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | static void test_bpf_perf_event(void) | ||
| 154 | { | ||
| 155 | struct perf_event_attr attr = { | ||
| 156 | .sample_type = PERF_SAMPLE_RAW, | ||
| 157 | .type = PERF_TYPE_SOFTWARE, | ||
| 158 | .config = PERF_COUNT_SW_BPF_OUTPUT, | ||
| 159 | }; | ||
| 160 | int key = 0; | ||
| 161 | |||
| 162 | pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); | ||
| 163 | |||
| 164 | assert(pmu_fd >= 0); | ||
| 165 | assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); | ||
| 166 | ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); | ||
| 167 | } | ||
| 168 | |||
| 169 | int main(int argc, char **argv) | ||
| 170 | { | ||
| 171 | char filename[256]; | ||
| 172 | FILE *f; | ||
| 173 | |||
| 174 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
| 175 | |||
| 176 | if (load_bpf_file(filename)) { | ||
| 177 | printf("%s", bpf_log_buf); | ||
| 178 | return 1; | ||
| 179 | } | ||
| 180 | |||
| 181 | test_bpf_perf_event(); | ||
| 182 | |||
| 183 | if (perf_event_mmap(pmu_fd) < 0) | ||
| 184 | return 1; | ||
| 185 | |||
| 186 | f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r"); | ||
| 187 | (void) f; | ||
| 188 | |||
| 189 | start_time = time_get_ns(); | ||
| 190 | for (;;) { | ||
| 191 | perf_event_poll(pmu_fd); | ||
| 192 | perf_event_read(print_bpf_output); | ||
| 193 | } | ||
| 194 | |||
| 195 | return 0; | ||
| 196 | } | ||
