diff options
author | Alexei Starovoitov <ast@fb.com> | 2016-04-06 21:43:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-04-07 21:04:27 -0400 |
commit | e3edfdec04d43aa6276db639d3721e073161d2c2 (patch) | |
tree | c7516296b2081373659aa0dfe39e61fe7598b56e /samples/bpf | |
parent | 3c9b16448cf6924c203e3c01696c87fcbfb71fc6 (diff) |
samples/bpf: add tracepoint vs kprobe performance tests
the first microbenchmark does
fd=open("/proc/self/comm");
for() {
write(fd, "test");
}
and on 4 cpus in parallel:
writes per sec
base (no tracepoints, no kprobes) 930k
with kprobe at __set_task_comm() 420k
with tracepoint at task:task_rename 730k
For kprobe + full bpf program manully fetches oldcomm, newcomm via bpf_probe_read.
For tracepint bpf program does nothing, since arguments are copied by tracepoint.
2nd microbenchmark does:
fd=open("/dev/urandom");
for() {
read(fd, buf);
}
and on 4 cpus in parallel:
reads per sec
base (no tracepoints, no kprobes) 300k
with kprobe at urandom_read() 279k
with tracepoint at random:urandom_read 290k
bpf progs attached to kprobe and tracepoint are noop.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 5 | ||||
-rw-r--r-- | samples/bpf/test_overhead_kprobe_kern.c | 41 | ||||
-rw-r--r-- | samples/bpf/test_overhead_tp_kern.c | 36 | ||||
-rw-r--r-- | samples/bpf/test_overhead_user.c | 162 |
4 files changed, 244 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 502c9fc8db85..9959771bf808 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -19,6 +19,7 @@ hostprogs-y += lathist | |||
19 | hostprogs-y += offwaketime | 19 | hostprogs-y += offwaketime |
20 | hostprogs-y += spintest | 20 | hostprogs-y += spintest |
21 | hostprogs-y += map_perf_test | 21 | hostprogs-y += map_perf_test |
22 | hostprogs-y += test_overhead | ||
22 | 23 | ||
23 | test_verifier-objs := test_verifier.o libbpf.o | 24 | test_verifier-objs := test_verifier.o libbpf.o |
24 | test_maps-objs := test_maps.o libbpf.o | 25 | test_maps-objs := test_maps.o libbpf.o |
@@ -38,6 +39,7 @@ lathist-objs := bpf_load.o libbpf.o lathist_user.o | |||
38 | offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o | 39 | offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o |
39 | spintest-objs := bpf_load.o libbpf.o spintest_user.o | 40 | spintest-objs := bpf_load.o libbpf.o spintest_user.o |
40 | map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o | 41 | map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o |
42 | test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o | ||
41 | 43 | ||
42 | # Tell kbuild to always build the programs | 44 | # Tell kbuild to always build the programs |
43 | always := $(hostprogs-y) | 45 | always := $(hostprogs-y) |
@@ -56,6 +58,8 @@ always += lathist_kern.o | |||
56 | always += offwaketime_kern.o | 58 | always += offwaketime_kern.o |
57 | always += spintest_kern.o | 59 | always += spintest_kern.o |
58 | always += map_perf_test_kern.o | 60 | always += map_perf_test_kern.o |
61 | always += test_overhead_tp_kern.o | ||
62 | always += test_overhead_kprobe_kern.o | ||
59 | 63 | ||
60 | HOSTCFLAGS += -I$(objtree)/usr/include | 64 | HOSTCFLAGS += -I$(objtree)/usr/include |
61 | 65 | ||
@@ -75,6 +79,7 @@ HOSTLOADLIBES_lathist += -lelf | |||
75 | HOSTLOADLIBES_offwaketime += -lelf | 79 | HOSTLOADLIBES_offwaketime += -lelf |
76 | HOSTLOADLIBES_spintest += -lelf | 80 | HOSTLOADLIBES_spintest += -lelf |
77 | HOSTLOADLIBES_map_perf_test += -lelf -lrt | 81 | HOSTLOADLIBES_map_perf_test += -lelf -lrt |
82 | HOSTLOADLIBES_test_overhead += -lelf -lrt | ||
78 | 83 | ||
79 | # point this to your LLVM backend with bpf support | 84 | # point this to your LLVM backend with bpf support |
80 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc | 85 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc |
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c new file mode 100644 index 000000000000..468a66a92ef9 --- /dev/null +++ b/samples/bpf/test_overhead_kprobe_kern.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <linux/version.h> | ||
8 | #include <linux/ptrace.h> | ||
9 | #include <uapi/linux/bpf.h> | ||
10 | #include "bpf_helpers.h" | ||
11 | |||
12 | #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) | ||
13 | |||
14 | SEC("kprobe/__set_task_comm") | ||
15 | int prog(struct pt_regs *ctx) | ||
16 | { | ||
17 | struct signal_struct *signal; | ||
18 | struct task_struct *tsk; | ||
19 | char oldcomm[16] = {}; | ||
20 | char newcomm[16] = {}; | ||
21 | u16 oom_score_adj; | ||
22 | u32 pid; | ||
23 | |||
24 | tsk = (void *)PT_REGS_PARM1(ctx); | ||
25 | |||
26 | pid = _(tsk->pid); | ||
27 | bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm); | ||
28 | bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx)); | ||
29 | signal = _(tsk->signal); | ||
30 | oom_score_adj = _(signal->oom_score_adj); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | SEC("kprobe/urandom_read") | ||
35 | int prog2(struct pt_regs *ctx) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | char _license[] SEC("license") = "GPL"; | ||
41 | u32 _version SEC("version") = LINUX_VERSION_CODE; | ||
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c new file mode 100644 index 000000000000..38f5c0b9da9f --- /dev/null +++ b/samples/bpf/test_overhead_tp_kern.c | |||
@@ -0,0 +1,36 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <uapi/linux/bpf.h> | ||
8 | #include "bpf_helpers.h" | ||
9 | |||
10 | /* from /sys/kernel/debug/tracing/events/task/task_rename/format */ | ||
11 | struct task_rename { | ||
12 | __u64 pad; | ||
13 | __u32 pid; | ||
14 | char oldcomm[16]; | ||
15 | char newcomm[16]; | ||
16 | __u16 oom_score_adj; | ||
17 | }; | ||
18 | SEC("tracepoint/task/task_rename") | ||
19 | int prog(struct task_rename *ctx) | ||
20 | { | ||
21 | return 0; | ||
22 | } | ||
23 | |||
24 | /* from /sys/kernel/debug/tracing/events/random/urandom_read/format */ | ||
25 | struct urandom_read { | ||
26 | __u64 pad; | ||
27 | int got_bits; | ||
28 | int pool_left; | ||
29 | int input_left; | ||
30 | }; | ||
31 | SEC("tracepoint/random/urandom_read") | ||
32 | int prog2(struct urandom_read *ctx) | ||
33 | { | ||
34 | return 0; | ||
35 | } | ||
36 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c new file mode 100644 index 000000000000..d291167fd3c7 --- /dev/null +++ b/samples/bpf/test_overhead_user.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #define _GNU_SOURCE | ||
8 | #include <sched.h> | ||
9 | #include <stdio.h> | ||
10 | #include <sys/types.h> | ||
11 | #include <asm/unistd.h> | ||
12 | #include <fcntl.h> | ||
13 | #include <unistd.h> | ||
14 | #include <assert.h> | ||
15 | #include <sys/wait.h> | ||
16 | #include <stdlib.h> | ||
17 | #include <signal.h> | ||
18 | #include <linux/bpf.h> | ||
19 | #include <string.h> | ||
20 | #include <time.h> | ||
21 | #include <sys/resource.h> | ||
22 | #include "libbpf.h" | ||
23 | #include "bpf_load.h" | ||
24 | |||
25 | #define MAX_CNT 1000000 | ||
26 | |||
27 | static __u64 time_get_ns(void) | ||
28 | { | ||
29 | struct timespec ts; | ||
30 | |||
31 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
32 | return ts.tv_sec * 1000000000ull + ts.tv_nsec; | ||
33 | } | ||
34 | |||
35 | static void test_task_rename(int cpu) | ||
36 | { | ||
37 | __u64 start_time; | ||
38 | char buf[] = "test\n"; | ||
39 | int i, fd; | ||
40 | |||
41 | fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); | ||
42 | if (fd < 0) { | ||
43 | printf("couldn't open /proc\n"); | ||
44 | exit(1); | ||
45 | } | ||
46 | start_time = time_get_ns(); | ||
47 | for (i = 0; i < MAX_CNT; i++) | ||
48 | write(fd, buf, sizeof(buf)); | ||
49 | printf("task_rename:%d: %lld events per sec\n", | ||
50 | cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | ||
51 | close(fd); | ||
52 | } | ||
53 | |||
54 | static void test_urandom_read(int cpu) | ||
55 | { | ||
56 | __u64 start_time; | ||
57 | char buf[4]; | ||
58 | int i, fd; | ||
59 | |||
60 | fd = open("/dev/urandom", O_RDONLY); | ||
61 | if (fd < 0) { | ||
62 | printf("couldn't open /dev/urandom\n"); | ||
63 | exit(1); | ||
64 | } | ||
65 | start_time = time_get_ns(); | ||
66 | for (i = 0; i < MAX_CNT; i++) | ||
67 | read(fd, buf, sizeof(buf)); | ||
68 | printf("urandom_read:%d: %lld events per sec\n", | ||
69 | cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | ||
70 | close(fd); | ||
71 | } | ||
72 | |||
73 | static void loop(int cpu, int flags) | ||
74 | { | ||
75 | cpu_set_t cpuset; | ||
76 | |||
77 | CPU_ZERO(&cpuset); | ||
78 | CPU_SET(cpu, &cpuset); | ||
79 | sched_setaffinity(0, sizeof(cpuset), &cpuset); | ||
80 | |||
81 | if (flags & 1) | ||
82 | test_task_rename(cpu); | ||
83 | if (flags & 2) | ||
84 | test_urandom_read(cpu); | ||
85 | } | ||
86 | |||
87 | static void run_perf_test(int tasks, int flags) | ||
88 | { | ||
89 | pid_t pid[tasks]; | ||
90 | int i; | ||
91 | |||
92 | for (i = 0; i < tasks; i++) { | ||
93 | pid[i] = fork(); | ||
94 | if (pid[i] == 0) { | ||
95 | loop(i, flags); | ||
96 | exit(0); | ||
97 | } else if (pid[i] == -1) { | ||
98 | printf("couldn't spawn #%d process\n", i); | ||
99 | exit(1); | ||
100 | } | ||
101 | } | ||
102 | for (i = 0; i < tasks; i++) { | ||
103 | int status; | ||
104 | |||
105 | assert(waitpid(pid[i], &status, 0) == pid[i]); | ||
106 | assert(status == 0); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | static void unload_progs(void) | ||
111 | { | ||
112 | close(prog_fd[0]); | ||
113 | close(prog_fd[1]); | ||
114 | close(event_fd[0]); | ||
115 | close(event_fd[1]); | ||
116 | } | ||
117 | |||
118 | int main(int argc, char **argv) | ||
119 | { | ||
120 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | ||
121 | char filename[256]; | ||
122 | int num_cpu = 8; | ||
123 | int test_flags = ~0; | ||
124 | |||
125 | setrlimit(RLIMIT_MEMLOCK, &r); | ||
126 | |||
127 | if (argc > 1) | ||
128 | test_flags = atoi(argv[1]) ? : test_flags; | ||
129 | if (argc > 2) | ||
130 | num_cpu = atoi(argv[2]) ? : num_cpu; | ||
131 | |||
132 | if (test_flags & 0x3) { | ||
133 | printf("BASE\n"); | ||
134 | run_perf_test(num_cpu, test_flags); | ||
135 | } | ||
136 | |||
137 | if (test_flags & 0xC) { | ||
138 | snprintf(filename, sizeof(filename), | ||
139 | "%s_kprobe_kern.o", argv[0]); | ||
140 | if (load_bpf_file(filename)) { | ||
141 | printf("%s", bpf_log_buf); | ||
142 | return 1; | ||
143 | } | ||
144 | printf("w/KPROBE\n"); | ||
145 | run_perf_test(num_cpu, test_flags >> 2); | ||
146 | unload_progs(); | ||
147 | } | ||
148 | |||
149 | if (test_flags & 0x30) { | ||
150 | snprintf(filename, sizeof(filename), | ||
151 | "%s_tp_kern.o", argv[0]); | ||
152 | if (load_bpf_file(filename)) { | ||
153 | printf("%s", bpf_log_buf); | ||
154 | return 1; | ||
155 | } | ||
156 | printf("w/TRACEPOINT\n"); | ||
157 | run_perf_test(num_cpu, test_flags >> 4); | ||
158 | unload_progs(); | ||
159 | } | ||
160 | |||
161 | return 0; | ||
162 | } | ||