aboutsummaryrefslogtreecommitdiffstats
path: root/samples/bpf
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2015-10-20 23:02:35 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-22 09:42:15 -0400
commit39111695b1b88a21e81983a38786d877e770da81 (patch)
tree33c57207f5ed18a5408ed9c25ec0ab169ec0fb5d /samples/bpf
parenta43eec304259a6c637f4014a6d4767159b6a3aa3 (diff)
samples: bpf: add bpf_perf_event_output example
Performance test and example of bpf_perf_event_output(). kprobe is attached to sys_write() and trivial bpf program streams pid+cookie into userspace via PERF_COUNT_SW_BPF_OUTPUT event. Usage: $ sudo ./bld_x64/samples/bpf/trace_output recv 2968913 events per sec Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile7
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/trace_output_kern.c31
-rw-r--r--samples/bpf/trace_output_user.c196
4 files changed, 236 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 63e7d50e6a4f..b30514514e37 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -13,6 +13,7 @@ hostprogs-y += tracex3
13hostprogs-y += tracex4 13hostprogs-y += tracex4
14hostprogs-y += tracex5 14hostprogs-y += tracex5
15hostprogs-y += tracex6 15hostprogs-y += tracex6
16hostprogs-y += trace_output
16hostprogs-y += lathist 17hostprogs-y += lathist
17 18
18test_verifier-objs := test_verifier.o libbpf.o 19test_verifier-objs := test_verifier.o libbpf.o
@@ -27,6 +28,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
27tracex4-objs := bpf_load.o libbpf.o tracex4_user.o 28tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
28tracex5-objs := bpf_load.o libbpf.o tracex5_user.o 29tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
29tracex6-objs := bpf_load.o libbpf.o tracex6_user.o 30tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
31trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
30lathist-objs := bpf_load.o libbpf.o lathist_user.o 32lathist-objs := bpf_load.o libbpf.o lathist_user.o
31 33
32# Tell kbuild to always build the programs 34# Tell kbuild to always build the programs
@@ -40,6 +42,7 @@ always += tracex3_kern.o
40always += tracex4_kern.o 42always += tracex4_kern.o
41always += tracex5_kern.o 43always += tracex5_kern.o
42always += tracex6_kern.o 44always += tracex6_kern.o
45always += trace_output_kern.o
43always += tcbpf1_kern.o 46always += tcbpf1_kern.o
44always += lathist_kern.o 47always += lathist_kern.o
45 48
@@ -55,6 +58,7 @@ HOSTLOADLIBES_tracex3 += -lelf
55HOSTLOADLIBES_tracex4 += -lelf -lrt 58HOSTLOADLIBES_tracex4 += -lelf -lrt
56HOSTLOADLIBES_tracex5 += -lelf 59HOSTLOADLIBES_tracex5 += -lelf
57HOSTLOADLIBES_tracex6 += -lelf 60HOSTLOADLIBES_tracex6 += -lelf
61HOSTLOADLIBES_trace_output += -lelf -lrt
58HOSTLOADLIBES_lathist += -lelf 62HOSTLOADLIBES_lathist += -lelf
59 63
60# point this to your LLVM backend with bpf support 64# point this to your LLVM backend with bpf support
@@ -64,3 +68,6 @@ $(obj)/%.o: $(src)/%.c
64 clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ 68 clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
65 -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ 69 -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
66 -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ 70 -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
71 clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
72 -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
73 -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 21aa1b44c30c..b35c21e0b43f 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,6 +37,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
37 (void *) BPF_FUNC_clone_redirect; 37 (void *) BPF_FUNC_clone_redirect;
38static int (*bpf_redirect)(int ifindex, int flags) = 38static int (*bpf_redirect)(int ifindex, int flags) =
39 (void *) BPF_FUNC_redirect; 39 (void *) BPF_FUNC_redirect;
40static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
41 (void *) BPF_FUNC_perf_event_output;
40 42
41/* llvm builtin functions that eBPF C program may use to 43/* llvm builtin functions that eBPF C program may use to
42 * emit BPF_LD_ABS and BPF_LD_IND instructions 44 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
new file mode 100644
index 000000000000..8d8d1ec429eb
--- /dev/null
+++ b/samples/bpf/trace_output_kern.c
@@ -0,0 +1,31 @@
1#include <linux/ptrace.h>
2#include <linux/version.h>
3#include <uapi/linux/bpf.h>
4#include "bpf_helpers.h"
5
6struct bpf_map_def SEC("maps") my_map = {
7 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
8 .key_size = sizeof(int),
9 .value_size = sizeof(u32),
10 .max_entries = 2,
11};
12
13SEC("kprobe/sys_write")
14int bpf_prog1(struct pt_regs *ctx)
15{
16 struct S {
17 u64 pid;
18 u64 cookie;
19 } data;
20
21 memset(&data, 0, sizeof(data));
22 data.pid = bpf_get_current_pid_tgid();
23 data.cookie = 0x12345678;
24
25 bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
26
27 return 0;
28}
29
30char _license[] SEC("license") = "GPL";
31u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
new file mode 100644
index 000000000000..661a7d052f2c
--- /dev/null
+++ b/samples/bpf/trace_output_user.c
@@ -0,0 +1,196 @@
1/* This program is free software; you can redistribute it and/or
2 * modify it under the terms of version 2 of the GNU General Public
3 * License as published by the Free Software Foundation.
4 */
5#include <stdio.h>
6#include <unistd.h>
7#include <stdlib.h>
8#include <stdbool.h>
9#include <string.h>
10#include <fcntl.h>
11#include <poll.h>
12#include <sys/ioctl.h>
13#include <linux/perf_event.h>
14#include <linux/bpf.h>
15#include <errno.h>
16#include <assert.h>
17#include <sys/syscall.h>
18#include <sys/ioctl.h>
19#include <sys/mman.h>
20#include <time.h>
21#include <signal.h>
22#include "libbpf.h"
23#include "bpf_load.h"
24
25static int pmu_fd;
26
27int page_size;
28int page_cnt = 8;
29volatile struct perf_event_mmap_page *header;
30
31typedef void (*print_fn)(void *data, int size);
32
33static int perf_event_mmap(int fd)
34{
35 void *base;
36 int mmap_size;
37
38 page_size = getpagesize();
39 mmap_size = page_size * (page_cnt + 1);
40
41 base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
42 if (base == MAP_FAILED) {
43 printf("mmap err\n");
44 return -1;
45 }
46
47 header = base;
48 return 0;
49}
50
51static int perf_event_poll(int fd)
52{
53 struct pollfd pfd = { .fd = fd, .events = POLLIN };
54
55 return poll(&pfd, 1, 1000);
56}
57
58struct perf_event_sample {
59 struct perf_event_header header;
60 __u32 size;
61 char data[];
62};
63
64void perf_event_read(print_fn fn)
65{
66 __u64 data_tail = header->data_tail;
67 __u64 data_head = header->data_head;
68 __u64 buffer_size = page_cnt * page_size;
69 void *base, *begin, *end;
70 char buf[256];
71
72 asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
73 if (data_head == data_tail)
74 return;
75
76 base = ((char *)header) + page_size;
77
78 begin = base + data_tail % buffer_size;
79 end = base + data_head % buffer_size;
80
81 while (begin != end) {
82 struct perf_event_sample *e;
83
84 e = begin;
85 if (begin + e->header.size > base + buffer_size) {
86 long len = base + buffer_size - begin;
87
88 assert(len < e->header.size);
89 memcpy(buf, begin, len);
90 memcpy(buf + len, base, e->header.size - len);
91 e = (void *) buf;
92 begin = base + e->header.size - len;
93 } else if (begin + e->header.size == base + buffer_size) {
94 begin = base;
95 } else {
96 begin += e->header.size;
97 }
98
99 if (e->header.type == PERF_RECORD_SAMPLE) {
100 fn(e->data, e->size);
101 } else if (e->header.type == PERF_RECORD_LOST) {
102 struct {
103 struct perf_event_header header;
104 __u64 id;
105 __u64 lost;
106 } *lost = (void *) e;
107 printf("lost %lld events\n", lost->lost);
108 } else {
109 printf("unknown event type=%d size=%d\n",
110 e->header.type, e->header.size);
111 }
112 }
113
114 __sync_synchronize(); /* smp_mb() */
115 header->data_tail = data_head;
116}
117
118static __u64 time_get_ns(void)
119{
120 struct timespec ts;
121
122 clock_gettime(CLOCK_MONOTONIC, &ts);
123 return ts.tv_sec * 1000000000ull + ts.tv_nsec;
124}
125
126static __u64 start_time;
127
128#define MAX_CNT 100000ll
129
130static void print_bpf_output(void *data, int size)
131{
132 static __u64 cnt;
133 struct {
134 __u64 pid;
135 __u64 cookie;
136 } *e = data;
137
138 if (e->cookie != 0x12345678) {
139 printf("BUG pid %llx cookie %llx sized %d\n",
140 e->pid, e->cookie, size);
141 kill(0, SIGINT);
142 }
143
144 cnt++;
145
146 if (cnt == MAX_CNT) {
147 printf("recv %lld events per sec\n",
148 MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
149 kill(0, SIGINT);
150 }
151}
152
153static void test_bpf_perf_event(void)
154{
155 struct perf_event_attr attr = {
156 .sample_type = PERF_SAMPLE_RAW,
157 .type = PERF_TYPE_SOFTWARE,
158 .config = PERF_COUNT_SW_BPF_OUTPUT,
159 };
160 int key = 0;
161
162 pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
163
164 assert(pmu_fd >= 0);
165 assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
166 ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
167}
168
169int main(int argc, char **argv)
170{
171 char filename[256];
172 FILE *f;
173
174 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
175
176 if (load_bpf_file(filename)) {
177 printf("%s", bpf_log_buf);
178 return 1;
179 }
180
181 test_bpf_perf_event();
182
183 if (perf_event_mmap(pmu_fd) < 0)
184 return 1;
185
186 f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
187 (void) f;
188
189 start_time = time_get_ns();
190 for (;;) {
191 perf_event_poll(pmu_fd);
192 perf_event_read(print_bpf_output);
193 }
194
195 return 0;
196}