diff options
author | Alexei Starovoitov <ast@fb.com> | 2016-09-01 21:37:25 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-09-02 13:46:45 -0400 |
commit | 1c47910ef80135ac89e4d0b471d123572cee5535 (patch) | |
tree | 95066aef056b16a4e5e477c7f277cc69db418e5c /samples | |
parent | aa6a5f3cb2b2edc5b9aab0b4fdfdfa9c3b5096a8 (diff) |
samples/bpf: add perf_event+bpf example
The bpf program is called 50 times a second and does hashmap[kern&user_stackid]++
It's primary purpose to check that key bpf helpers like map lookup, update,
get_stackid, trace_printk and ctx access are all working.
It checks:
- PERF_COUNT_HW_CPU_CYCLES on all cpus
- PERF_COUNT_HW_CPU_CYCLES for current process and inherited perf_events to children
- PERF_COUNT_SW_CPU_CLOCK on all cpus
- PERF_COUNT_SW_CPU_CLOCK for current process
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 7 | ||||
-rw-r--r-- | samples/bpf/trace_event_kern.c | 65 | ||||
-rw-r--r-- | samples/bpf/trace_event_user.c | 213 |
5 files changed, 290 insertions, 1 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db3cb061bfcd..a69cf9045285 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -25,6 +25,7 @@ hostprogs-y += test_cgrp2_array_pin | |||
25 | hostprogs-y += xdp1 | 25 | hostprogs-y += xdp1 |
26 | hostprogs-y += xdp2 | 26 | hostprogs-y += xdp2 |
27 | hostprogs-y += test_current_task_under_cgroup | 27 | hostprogs-y += test_current_task_under_cgroup |
28 | hostprogs-y += trace_event | ||
28 | 29 | ||
29 | test_verifier-objs := test_verifier.o libbpf.o | 30 | test_verifier-objs := test_verifier.o libbpf.o |
30 | test_maps-objs := test_maps.o libbpf.o | 31 | test_maps-objs := test_maps.o libbpf.o |
@@ -52,6 +53,7 @@ xdp1-objs := bpf_load.o libbpf.o xdp1_user.o | |||
52 | xdp2-objs := bpf_load.o libbpf.o xdp1_user.o | 53 | xdp2-objs := bpf_load.o libbpf.o xdp1_user.o |
53 | test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ | 54 | test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ |
54 | test_current_task_under_cgroup_user.o | 55 | test_current_task_under_cgroup_user.o |
56 | trace_event-objs := bpf_load.o libbpf.o trace_event_user.o | ||
55 | 57 | ||
56 | # Tell kbuild to always build the programs | 58 | # Tell kbuild to always build the programs |
57 | always := $(hostprogs-y) | 59 | always := $(hostprogs-y) |
@@ -79,6 +81,7 @@ always += test_cgrp2_tc_kern.o | |||
79 | always += xdp1_kern.o | 81 | always += xdp1_kern.o |
80 | always += xdp2_kern.o | 82 | always += xdp2_kern.o |
81 | always += test_current_task_under_cgroup_kern.o | 83 | always += test_current_task_under_cgroup_kern.o |
84 | always += trace_event_kern.o | ||
82 | 85 | ||
83 | HOSTCFLAGS += -I$(objtree)/usr/include | 86 | HOSTCFLAGS += -I$(objtree)/usr/include |
84 | 87 | ||
@@ -103,6 +106,7 @@ HOSTLOADLIBES_test_overhead += -lelf -lrt | |||
103 | HOSTLOADLIBES_xdp1 += -lelf | 106 | HOSTLOADLIBES_xdp1 += -lelf |
104 | HOSTLOADLIBES_xdp2 += -lelf | 107 | HOSTLOADLIBES_xdp2 += -lelf |
105 | HOSTLOADLIBES_test_current_task_under_cgroup += -lelf | 108 | HOSTLOADLIBES_test_current_task_under_cgroup += -lelf |
109 | HOSTLOADLIBES_trace_event += -lelf | ||
106 | 110 | ||
107 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: | 111 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: |
108 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang | 112 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang |
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index bbdf62a1e45e..90f44bd2045e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
@@ -55,6 +55,8 @@ static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = | |||
55 | (void *) BPF_FUNC_skb_get_tunnel_opt; | 55 | (void *) BPF_FUNC_skb_get_tunnel_opt; |
56 | static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = | 56 | static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = |
57 | (void *) BPF_FUNC_skb_set_tunnel_opt; | 57 | (void *) BPF_FUNC_skb_set_tunnel_opt; |
58 | static unsigned long long (*bpf_get_prandom_u32)(void) = | ||
59 | (void *) BPF_FUNC_get_prandom_u32; | ||
58 | 60 | ||
59 | /* llvm builtin functions that eBPF C program may use to | 61 | /* llvm builtin functions that eBPF C program may use to |
60 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 62 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 0cfda2320320..97913e109b14 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c | |||
@@ -51,6 +51,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
51 | bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; | 51 | bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; |
52 | bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; | 52 | bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; |
53 | bool is_xdp = strncmp(event, "xdp", 3) == 0; | 53 | bool is_xdp = strncmp(event, "xdp", 3) == 0; |
54 | bool is_perf_event = strncmp(event, "perf_event", 10) == 0; | ||
54 | enum bpf_prog_type prog_type; | 55 | enum bpf_prog_type prog_type; |
55 | char buf[256]; | 56 | char buf[256]; |
56 | int fd, efd, err, id; | 57 | int fd, efd, err, id; |
@@ -69,6 +70,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
69 | prog_type = BPF_PROG_TYPE_TRACEPOINT; | 70 | prog_type = BPF_PROG_TYPE_TRACEPOINT; |
70 | } else if (is_xdp) { | 71 | } else if (is_xdp) { |
71 | prog_type = BPF_PROG_TYPE_XDP; | 72 | prog_type = BPF_PROG_TYPE_XDP; |
73 | } else if (is_perf_event) { | ||
74 | prog_type = BPF_PROG_TYPE_PERF_EVENT; | ||
72 | } else { | 75 | } else { |
73 | printf("Unknown event '%s'\n", event); | 76 | printf("Unknown event '%s'\n", event); |
74 | return -1; | 77 | return -1; |
@@ -82,7 +85,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
82 | 85 | ||
83 | prog_fd[prog_cnt++] = fd; | 86 | prog_fd[prog_cnt++] = fd; |
84 | 87 | ||
85 | if (is_xdp) | 88 | if (is_xdp || is_perf_event) |
86 | return 0; | 89 | return 0; |
87 | 90 | ||
88 | if (is_socket) { | 91 | if (is_socket) { |
@@ -326,6 +329,7 @@ int load_bpf_file(char *path) | |||
326 | memcmp(shname_prog, "kretprobe/", 10) == 0 || | 329 | memcmp(shname_prog, "kretprobe/", 10) == 0 || |
327 | memcmp(shname_prog, "tracepoint/", 11) == 0 || | 330 | memcmp(shname_prog, "tracepoint/", 11) == 0 || |
328 | memcmp(shname_prog, "xdp", 3) == 0 || | 331 | memcmp(shname_prog, "xdp", 3) == 0 || |
332 | memcmp(shname_prog, "perf_event", 10) == 0 || | ||
329 | memcmp(shname_prog, "socket", 6) == 0) | 333 | memcmp(shname_prog, "socket", 6) == 0) |
330 | load_and_attach(shname_prog, insns, data_prog->d_size); | 334 | load_and_attach(shname_prog, insns, data_prog->d_size); |
331 | } | 335 | } |
@@ -344,6 +348,7 @@ int load_bpf_file(char *path) | |||
344 | memcmp(shname, "kretprobe/", 10) == 0 || | 348 | memcmp(shname, "kretprobe/", 10) == 0 || |
345 | memcmp(shname, "tracepoint/", 11) == 0 || | 349 | memcmp(shname, "tracepoint/", 11) == 0 || |
346 | memcmp(shname, "xdp", 3) == 0 || | 350 | memcmp(shname, "xdp", 3) == 0 || |
351 | memcmp(shname, "perf_event", 10) == 0 || | ||
347 | memcmp(shname, "socket", 6) == 0) | 352 | memcmp(shname, "socket", 6) == 0) |
348 | load_and_attach(shname, data->d_buf, data->d_size); | 353 | load_and_attach(shname, data->d_buf, data->d_size); |
349 | } | 354 | } |
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c new file mode 100644 index 000000000000..71a8ed32823e --- /dev/null +++ b/samples/bpf/trace_event_kern.c | |||
@@ -0,0 +1,65 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <linux/ptrace.h> | ||
8 | #include <linux/version.h> | ||
9 | #include <uapi/linux/bpf.h> | ||
10 | #include <uapi/linux/bpf_perf_event.h> | ||
11 | #include <uapi/linux/perf_event.h> | ||
12 | #include "bpf_helpers.h" | ||
13 | |||
14 | struct key_t { | ||
15 | char comm[TASK_COMM_LEN]; | ||
16 | u32 kernstack; | ||
17 | u32 userstack; | ||
18 | }; | ||
19 | |||
20 | struct bpf_map_def SEC("maps") counts = { | ||
21 | .type = BPF_MAP_TYPE_HASH, | ||
22 | .key_size = sizeof(struct key_t), | ||
23 | .value_size = sizeof(u64), | ||
24 | .max_entries = 10000, | ||
25 | }; | ||
26 | |||
27 | struct bpf_map_def SEC("maps") stackmap = { | ||
28 | .type = BPF_MAP_TYPE_STACK_TRACE, | ||
29 | .key_size = sizeof(u32), | ||
30 | .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), | ||
31 | .max_entries = 10000, | ||
32 | }; | ||
33 | |||
34 | #define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) | ||
35 | #define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK) | ||
36 | |||
37 | SEC("perf_event") | ||
38 | int bpf_prog1(struct bpf_perf_event_data *ctx) | ||
39 | { | ||
40 | char fmt[] = "CPU-%d period %lld ip %llx"; | ||
41 | u32 cpu = bpf_get_smp_processor_id(); | ||
42 | struct key_t key; | ||
43 | u64 *val, one = 1; | ||
44 | |||
45 | if (ctx->sample_period < 10000) | ||
46 | /* ignore warmup */ | ||
47 | return 0; | ||
48 | bpf_get_current_comm(&key.comm, sizeof(key.comm)); | ||
49 | key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); | ||
50 | key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS); | ||
51 | if ((int)key.kernstack < 0 && (int)key.userstack < 0) { | ||
52 | bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period, | ||
53 | ctx->regs.ip); | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | val = bpf_map_lookup_elem(&counts, &key); | ||
58 | if (val) | ||
59 | (*val)++; | ||
60 | else | ||
61 | bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c new file mode 100644 index 000000000000..9a130d31ecf2 --- /dev/null +++ b/samples/bpf/trace_event_user.c | |||
@@ -0,0 +1,213 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <stdio.h> | ||
8 | #include <unistd.h> | ||
9 | #include <stdlib.h> | ||
10 | #include <stdbool.h> | ||
11 | #include <string.h> | ||
12 | #include <fcntl.h> | ||
13 | #include <poll.h> | ||
14 | #include <sys/ioctl.h> | ||
15 | #include <linux/perf_event.h> | ||
16 | #include <linux/bpf.h> | ||
17 | #include <signal.h> | ||
18 | #include <assert.h> | ||
19 | #include <errno.h> | ||
20 | #include <sys/resource.h> | ||
21 | #include "libbpf.h" | ||
22 | #include "bpf_load.h" | ||
23 | |||
24 | #define SAMPLE_FREQ 50 | ||
25 | |||
26 | static bool sys_read_seen, sys_write_seen; | ||
27 | |||
28 | static void print_ksym(__u64 addr) | ||
29 | { | ||
30 | struct ksym *sym; | ||
31 | |||
32 | if (!addr) | ||
33 | return; | ||
34 | sym = ksym_search(addr); | ||
35 | printf("%s;", sym->name); | ||
36 | if (!strcmp(sym->name, "sys_read")) | ||
37 | sys_read_seen = true; | ||
38 | else if (!strcmp(sym->name, "sys_write")) | ||
39 | sys_write_seen = true; | ||
40 | } | ||
41 | |||
42 | static void print_addr(__u64 addr) | ||
43 | { | ||
44 | if (!addr) | ||
45 | return; | ||
46 | printf("%llx;", addr); | ||
47 | } | ||
48 | |||
49 | #define TASK_COMM_LEN 16 | ||
50 | |||
51 | struct key_t { | ||
52 | char comm[TASK_COMM_LEN]; | ||
53 | __u32 kernstack; | ||
54 | __u32 userstack; | ||
55 | }; | ||
56 | |||
57 | static void print_stack(struct key_t *key, __u64 count) | ||
58 | { | ||
59 | __u64 ip[PERF_MAX_STACK_DEPTH] = {}; | ||
60 | static bool warned; | ||
61 | int i; | ||
62 | |||
63 | printf("%3lld %s;", count, key->comm); | ||
64 | if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { | ||
65 | printf("---;"); | ||
66 | } else { | ||
67 | for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) | ||
68 | print_ksym(ip[i]); | ||
69 | } | ||
70 | printf("-;"); | ||
71 | if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { | ||
72 | printf("---;"); | ||
73 | } else { | ||
74 | for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) | ||
75 | print_addr(ip[i]); | ||
76 | } | ||
77 | printf("\n"); | ||
78 | |||
79 | if (key->kernstack == -EEXIST && !warned) { | ||
80 | printf("stackmap collisions seen. Consider increasing size\n"); | ||
81 | warned = true; | ||
82 | } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { | ||
83 | printf("err stackid %d %d\n", key->kernstack, key->userstack); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static void int_exit(int sig) | ||
88 | { | ||
89 | kill(0, SIGKILL); | ||
90 | exit(0); | ||
91 | } | ||
92 | |||
93 | static void print_stacks(void) | ||
94 | { | ||
95 | struct key_t key = {}, next_key; | ||
96 | __u64 value; | ||
97 | __u32 stackid = 0, next_id; | ||
98 | int fd = map_fd[0], stack_map = map_fd[1]; | ||
99 | |||
100 | sys_read_seen = sys_write_seen = false; | ||
101 | while (bpf_get_next_key(fd, &key, &next_key) == 0) { | ||
102 | bpf_lookup_elem(fd, &next_key, &value); | ||
103 | print_stack(&next_key, value); | ||
104 | bpf_delete_elem(fd, &next_key); | ||
105 | key = next_key; | ||
106 | } | ||
107 | |||
108 | if (!sys_read_seen || !sys_write_seen) { | ||
109 | printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); | ||
110 | int_exit(0); | ||
111 | } | ||
112 | |||
113 | /* clear stack map */ | ||
114 | while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { | ||
115 | bpf_delete_elem(stack_map, &next_id); | ||
116 | stackid = next_id; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static void test_perf_event_all_cpu(struct perf_event_attr *attr) | ||
121 | { | ||
122 | int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); | ||
123 | int *pmu_fd = malloc(nr_cpus * sizeof(int)); | ||
124 | int i; | ||
125 | |||
126 | /* open perf_event on all cpus */ | ||
127 | for (i = 0; i < nr_cpus; i++) { | ||
128 | pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); | ||
129 | if (pmu_fd[i] < 0) { | ||
130 | printf("perf_event_open failed\n"); | ||
131 | goto all_cpu_err; | ||
132 | } | ||
133 | assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); | ||
134 | assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); | ||
135 | } | ||
136 | system("dd if=/dev/zero of=/dev/null count=5000k"); | ||
137 | print_stacks(); | ||
138 | all_cpu_err: | ||
139 | for (i--; i >= 0; i--) | ||
140 | close(pmu_fd[i]); | ||
141 | free(pmu_fd); | ||
142 | } | ||
143 | |||
144 | static void test_perf_event_task(struct perf_event_attr *attr) | ||
145 | { | ||
146 | int pmu_fd; | ||
147 | |||
148 | /* open task bound event */ | ||
149 | pmu_fd = perf_event_open(attr, 0, -1, -1, 0); | ||
150 | if (pmu_fd < 0) { | ||
151 | printf("perf_event_open failed\n"); | ||
152 | return; | ||
153 | } | ||
154 | assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); | ||
155 | assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); | ||
156 | system("dd if=/dev/zero of=/dev/null count=5000k"); | ||
157 | print_stacks(); | ||
158 | close(pmu_fd); | ||
159 | } | ||
160 | |||
161 | static void test_bpf_perf_event(void) | ||
162 | { | ||
163 | struct perf_event_attr attr_type_hw = { | ||
164 | .sample_freq = SAMPLE_FREQ, | ||
165 | .freq = 1, | ||
166 | .type = PERF_TYPE_HARDWARE, | ||
167 | .config = PERF_COUNT_HW_CPU_CYCLES, | ||
168 | .inherit = 1, | ||
169 | }; | ||
170 | struct perf_event_attr attr_type_sw = { | ||
171 | .sample_freq = SAMPLE_FREQ, | ||
172 | .freq = 1, | ||
173 | .type = PERF_TYPE_SOFTWARE, | ||
174 | .config = PERF_COUNT_SW_CPU_CLOCK, | ||
175 | .inherit = 1, | ||
176 | }; | ||
177 | |||
178 | test_perf_event_all_cpu(&attr_type_hw); | ||
179 | test_perf_event_task(&attr_type_hw); | ||
180 | test_perf_event_all_cpu(&attr_type_sw); | ||
181 | test_perf_event_task(&attr_type_sw); | ||
182 | } | ||
183 | |||
184 | |||
185 | int main(int argc, char **argv) | ||
186 | { | ||
187 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | ||
188 | char filename[256]; | ||
189 | |||
190 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
191 | setrlimit(RLIMIT_MEMLOCK, &r); | ||
192 | |||
193 | signal(SIGINT, int_exit); | ||
194 | |||
195 | if (load_kallsyms()) { | ||
196 | printf("failed to process /proc/kallsyms\n"); | ||
197 | return 1; | ||
198 | } | ||
199 | |||
200 | if (load_bpf_file(filename)) { | ||
201 | printf("%s", bpf_log_buf); | ||
202 | return 2; | ||
203 | } | ||
204 | |||
205 | if (fork() == 0) { | ||
206 | read_trace_pipe(); | ||
207 | return 0; | ||
208 | } | ||
209 | test_bpf_perf_event(); | ||
210 | |||
211 | int_exit(0); | ||
212 | return 0; | ||
213 | } | ||