diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2015-10-20 23:02:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-22 09:42:15 -0400 |
commit | 39111695b1b88a21e81983a38786d877e770da81 (patch) | |
tree | 33c57207f5ed18a5408ed9c25ec0ab169ec0fb5d /samples/bpf | |
parent | a43eec304259a6c637f4014a6d4767159b6a3aa3 (diff) |
samples: bpf: add bpf_perf_event_output example
Performance test and example of bpf_perf_event_output().
kprobe is attached to sys_write() and trivial bpf program streams
pid+cookie into userspace via PERF_COUNT_SW_BPF_OUTPUT event.
Usage:
$ sudo ./bld_x64/samples/bpf/trace_output
recv 2968913 events per sec
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 7 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
-rw-r--r-- | samples/bpf/trace_output_kern.c | 31 | ||||
-rw-r--r-- | samples/bpf/trace_output_user.c | 196 |
4 files changed, 236 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 63e7d50e6a4f..b30514514e37 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -13,6 +13,7 @@ hostprogs-y += tracex3 | |||
13 | hostprogs-y += tracex4 | 13 | hostprogs-y += tracex4 |
14 | hostprogs-y += tracex5 | 14 | hostprogs-y += tracex5 |
15 | hostprogs-y += tracex6 | 15 | hostprogs-y += tracex6 |
16 | hostprogs-y += trace_output | ||
16 | hostprogs-y += lathist | 17 | hostprogs-y += lathist |
17 | 18 | ||
18 | test_verifier-objs := test_verifier.o libbpf.o | 19 | test_verifier-objs := test_verifier.o libbpf.o |
@@ -27,6 +28,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o | |||
27 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o | 28 | tracex4-objs := bpf_load.o libbpf.o tracex4_user.o |
28 | tracex5-objs := bpf_load.o libbpf.o tracex5_user.o | 29 | tracex5-objs := bpf_load.o libbpf.o tracex5_user.o |
29 | tracex6-objs := bpf_load.o libbpf.o tracex6_user.o | 30 | tracex6-objs := bpf_load.o libbpf.o tracex6_user.o |
31 | trace_output-objs := bpf_load.o libbpf.o trace_output_user.o | ||
30 | lathist-objs := bpf_load.o libbpf.o lathist_user.o | 32 | lathist-objs := bpf_load.o libbpf.o lathist_user.o |
31 | 33 | ||
32 | # Tell kbuild to always build the programs | 34 | # Tell kbuild to always build the programs |
@@ -40,6 +42,7 @@ always += tracex3_kern.o | |||
40 | always += tracex4_kern.o | 42 | always += tracex4_kern.o |
41 | always += tracex5_kern.o | 43 | always += tracex5_kern.o |
42 | always += tracex6_kern.o | 44 | always += tracex6_kern.o |
45 | always += trace_output_kern.o | ||
43 | always += tcbpf1_kern.o | 46 | always += tcbpf1_kern.o |
44 | always += lathist_kern.o | 47 | always += lathist_kern.o |
45 | 48 | ||
@@ -55,6 +58,7 @@ HOSTLOADLIBES_tracex3 += -lelf | |||
55 | HOSTLOADLIBES_tracex4 += -lelf -lrt | 58 | HOSTLOADLIBES_tracex4 += -lelf -lrt |
56 | HOSTLOADLIBES_tracex5 += -lelf | 59 | HOSTLOADLIBES_tracex5 += -lelf |
57 | HOSTLOADLIBES_tracex6 += -lelf | 60 | HOSTLOADLIBES_tracex6 += -lelf |
61 | HOSTLOADLIBES_trace_output += -lelf -lrt | ||
58 | HOSTLOADLIBES_lathist += -lelf | 62 | HOSTLOADLIBES_lathist += -lelf |
59 | 63 | ||
60 | # point this to your LLVM backend with bpf support | 64 | # point this to your LLVM backend with bpf support |
@@ -64,3 +68,6 @@ $(obj)/%.o: $(src)/%.c | |||
64 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ | 68 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ |
65 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ | 69 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ |
66 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ | 70 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ |
71 | clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ | ||
72 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ | ||
73 | -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s | ||
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 21aa1b44c30c..b35c21e0b43f 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
@@ -37,6 +37,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = | |||
37 | (void *) BPF_FUNC_clone_redirect; | 37 | (void *) BPF_FUNC_clone_redirect; |
38 | static int (*bpf_redirect)(int ifindex, int flags) = | 38 | static int (*bpf_redirect)(int ifindex, int flags) = |
39 | (void *) BPF_FUNC_redirect; | 39 | (void *) BPF_FUNC_redirect; |
40 | static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = | ||
41 | (void *) BPF_FUNC_perf_event_output; | ||
40 | 42 | ||
41 | /* llvm builtin functions that eBPF C program may use to | 43 | /* llvm builtin functions that eBPF C program may use to |
42 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 44 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c new file mode 100644 index 000000000000..8d8d1ec429eb --- /dev/null +++ b/samples/bpf/trace_output_kern.c | |||
@@ -0,0 +1,31 @@ | |||
1 | #include <linux/ptrace.h> | ||
2 | #include <linux/version.h> | ||
3 | #include <uapi/linux/bpf.h> | ||
4 | #include "bpf_helpers.h" | ||
5 | |||
6 | struct bpf_map_def SEC("maps") my_map = { | ||
7 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | ||
8 | .key_size = sizeof(int), | ||
9 | .value_size = sizeof(u32), | ||
10 | .max_entries = 2, | ||
11 | }; | ||
12 | |||
13 | SEC("kprobe/sys_write") | ||
14 | int bpf_prog1(struct pt_regs *ctx) | ||
15 | { | ||
16 | struct S { | ||
17 | u64 pid; | ||
18 | u64 cookie; | ||
19 | } data; | ||
20 | |||
21 | memset(&data, 0, sizeof(data)); | ||
22 | data.pid = bpf_get_current_pid_tgid(); | ||
23 | data.cookie = 0x12345678; | ||
24 | |||
25 | bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data)); | ||
26 | |||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | char _license[] SEC("license") = "GPL"; | ||
31 | u32 _version SEC("version") = LINUX_VERSION_CODE; | ||
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c new file mode 100644 index 000000000000..661a7d052f2c --- /dev/null +++ b/samples/bpf/trace_output_user.c | |||
@@ -0,0 +1,196 @@ | |||
1 | /* This program is free software; you can redistribute it and/or | ||
2 | * modify it under the terms of version 2 of the GNU General Public | ||
3 | * License as published by the Free Software Foundation. | ||
4 | */ | ||
5 | #include <stdio.h> | ||
6 | #include <unistd.h> | ||
7 | #include <stdlib.h> | ||
8 | #include <stdbool.h> | ||
9 | #include <string.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <poll.h> | ||
12 | #include <sys/ioctl.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <linux/bpf.h> | ||
15 | #include <errno.h> | ||
16 | #include <assert.h> | ||
17 | #include <sys/syscall.h> | ||
18 | #include <sys/ioctl.h> | ||
19 | #include <sys/mman.h> | ||
20 | #include <time.h> | ||
21 | #include <signal.h> | ||
22 | #include "libbpf.h" | ||
23 | #include "bpf_load.h" | ||
24 | |||
25 | static int pmu_fd; | ||
26 | |||
27 | int page_size; | ||
28 | int page_cnt = 8; | ||
29 | volatile struct perf_event_mmap_page *header; | ||
30 | |||
31 | typedef void (*print_fn)(void *data, int size); | ||
32 | |||
33 | static int perf_event_mmap(int fd) | ||
34 | { | ||
35 | void *base; | ||
36 | int mmap_size; | ||
37 | |||
38 | page_size = getpagesize(); | ||
39 | mmap_size = page_size * (page_cnt + 1); | ||
40 | |||
41 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
42 | if (base == MAP_FAILED) { | ||
43 | printf("mmap err\n"); | ||
44 | return -1; | ||
45 | } | ||
46 | |||
47 | header = base; | ||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | static int perf_event_poll(int fd) | ||
52 | { | ||
53 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
54 | |||
55 | return poll(&pfd, 1, 1000); | ||
56 | } | ||
57 | |||
58 | struct perf_event_sample { | ||
59 | struct perf_event_header header; | ||
60 | __u32 size; | ||
61 | char data[]; | ||
62 | }; | ||
63 | |||
64 | void perf_event_read(print_fn fn) | ||
65 | { | ||
66 | __u64 data_tail = header->data_tail; | ||
67 | __u64 data_head = header->data_head; | ||
68 | __u64 buffer_size = page_cnt * page_size; | ||
69 | void *base, *begin, *end; | ||
70 | char buf[256]; | ||
71 | |||
72 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
73 | if (data_head == data_tail) | ||
74 | return; | ||
75 | |||
76 | base = ((char *)header) + page_size; | ||
77 | |||
78 | begin = base + data_tail % buffer_size; | ||
79 | end = base + data_head % buffer_size; | ||
80 | |||
81 | while (begin != end) { | ||
82 | struct perf_event_sample *e; | ||
83 | |||
84 | e = begin; | ||
85 | if (begin + e->header.size > base + buffer_size) { | ||
86 | long len = base + buffer_size - begin; | ||
87 | |||
88 | assert(len < e->header.size); | ||
89 | memcpy(buf, begin, len); | ||
90 | memcpy(buf + len, base, e->header.size - len); | ||
91 | e = (void *) buf; | ||
92 | begin = base + e->header.size - len; | ||
93 | } else if (begin + e->header.size == base + buffer_size) { | ||
94 | begin = base; | ||
95 | } else { | ||
96 | begin += e->header.size; | ||
97 | } | ||
98 | |||
99 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
100 | fn(e->data, e->size); | ||
101 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
102 | struct { | ||
103 | struct perf_event_header header; | ||
104 | __u64 id; | ||
105 | __u64 lost; | ||
106 | } *lost = (void *) e; | ||
107 | printf("lost %lld events\n", lost->lost); | ||
108 | } else { | ||
109 | printf("unknown event type=%d size=%d\n", | ||
110 | e->header.type, e->header.size); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | __sync_synchronize(); /* smp_mb() */ | ||
115 | header->data_tail = data_head; | ||
116 | } | ||
117 | |||
118 | static __u64 time_get_ns(void) | ||
119 | { | ||
120 | struct timespec ts; | ||
121 | |||
122 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
123 | return ts.tv_sec * 1000000000ull + ts.tv_nsec; | ||
124 | } | ||
125 | |||
126 | static __u64 start_time; | ||
127 | |||
128 | #define MAX_CNT 100000ll | ||
129 | |||
130 | static void print_bpf_output(void *data, int size) | ||
131 | { | ||
132 | static __u64 cnt; | ||
133 | struct { | ||
134 | __u64 pid; | ||
135 | __u64 cookie; | ||
136 | } *e = data; | ||
137 | |||
138 | if (e->cookie != 0x12345678) { | ||
139 | printf("BUG pid %llx cookie %llx sized %d\n", | ||
140 | e->pid, e->cookie, size); | ||
141 | kill(0, SIGINT); | ||
142 | } | ||
143 | |||
144 | cnt++; | ||
145 | |||
146 | if (cnt == MAX_CNT) { | ||
147 | printf("recv %lld events per sec\n", | ||
148 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | ||
149 | kill(0, SIGINT); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | static void test_bpf_perf_event(void) | ||
154 | { | ||
155 | struct perf_event_attr attr = { | ||
156 | .sample_type = PERF_SAMPLE_RAW, | ||
157 | .type = PERF_TYPE_SOFTWARE, | ||
158 | .config = PERF_COUNT_SW_BPF_OUTPUT, | ||
159 | }; | ||
160 | int key = 0; | ||
161 | |||
162 | pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); | ||
163 | |||
164 | assert(pmu_fd >= 0); | ||
165 | assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); | ||
166 | ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); | ||
167 | } | ||
168 | |||
169 | int main(int argc, char **argv) | ||
170 | { | ||
171 | char filename[256]; | ||
172 | FILE *f; | ||
173 | |||
174 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
175 | |||
176 | if (load_bpf_file(filename)) { | ||
177 | printf("%s", bpf_log_buf); | ||
178 | return 1; | ||
179 | } | ||
180 | |||
181 | test_bpf_perf_event(); | ||
182 | |||
183 | if (perf_event_mmap(pmu_fd) < 0) | ||
184 | return 1; | ||
185 | |||
186 | f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r"); | ||
187 | (void) f; | ||
188 | |||
189 | start_time = time_get_ns(); | ||
190 | for (;;) { | ||
191 | perf_event_poll(pmu_fd); | ||
192 | perf_event_read(print_bpf_output); | ||
193 | } | ||
194 | |||
195 | return 0; | ||
196 | } | ||