diff options
author | Alexei Starovoitov <ast@fb.com> | 2017-03-15 21:26:44 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-03-16 23:44:12 -0400 |
commit | 95ff141e52f84f476fcde50560f42d4f118539c0 (patch) | |
tree | f35cf082e9d34087212bff37049f505389b38d57 /samples | |
parent | 9015d2f5953590e8273392b44c2b0f864350b427 (diff) |
samples/bpf: add map_lookup microbenchmark
$ map_perf_test 128
speed of HASH bpf_map_lookup_elem() in lookups per second
w/o JIT w/JIT
before 46M 58M
after 42M 74M
perf report
before:
54.23% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem
14.24% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw
8.84% map_perf_test [kernel.kallsyms] [k] htab_map_lookup_elem
5.93% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem
2.30% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2
1.49% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler
after:
60.03% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem
18.07% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw
2.91% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2
1.94% map_perf_test [kernel.kallsyms] [k] _einittext
1.90% map_perf_test [kernel.kallsyms] [k] __audit_syscall_exit
1.72% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler
Notice that bpf_map_lookup_elem() and htab_map_lookup_elem() are trivial
functions, yet they take sizeable amount of cpu time.
htab_map_gen_lookup() removes bpf_map_lookup_elem() and converts
htab_map_lookup_elem() into three BPF insns which causing cpu time
for bpf_prog_da4fc6a3f41761a2() slightly increase.
$ map_perf_test 256
speed of ARRAY bpf_map_lookup_elem() in lookups per second
w/o JIT w/JIT
before 97M 174M
after 64M 280M
before:
37.33% map_perf_test [kernel.kallsyms] [k] array_map_lookup_elem
13.95% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem
6.54% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2
4.57% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler
after:
32.86% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2
6.54% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler
array_map_gen_lookup() removes calls to array_map_lookup_elem()
and bpf_map_lookup_elem() and replaces them with 7 bpf insns.
The performance without JIT is slower, since executing extra insns
in the interpreter is slower than running native C code,
but with JIT the performance gains are obvious,
since native C->x86 code is replaced with fewer bpf->x86 instructions.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/map_perf_test_kern.c | 33 | ||||
-rw-r--r-- | samples/bpf/map_perf_test_user.c | 32 |
2 files changed, 65 insertions, 0 deletions
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index a91872a97742..9da2a3441b0a 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c | |||
@@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { | |||
65 | .map_flags = BPF_F_NO_PREALLOC, | 65 | .map_flags = BPF_F_NO_PREALLOC, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | struct bpf_map_def SEC("maps") array_map = { | ||
69 | .type = BPF_MAP_TYPE_ARRAY, | ||
70 | .key_size = sizeof(u32), | ||
71 | .value_size = sizeof(long), | ||
72 | .max_entries = MAX_ENTRIES, | ||
73 | }; | ||
74 | |||
68 | SEC("kprobe/sys_getuid") | 75 | SEC("kprobe/sys_getuid") |
69 | int stress_hmap(struct pt_regs *ctx) | 76 | int stress_hmap(struct pt_regs *ctx) |
70 | { | 77 | { |
@@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) | |||
165 | return 0; | 172 | return 0; |
166 | } | 173 | } |
167 | 174 | ||
175 | SEC("kprobe/sys_getpgid") | ||
176 | int stress_hash_map_lookup(struct pt_regs *ctx) | ||
177 | { | ||
178 | u32 key = 1, i; | ||
179 | long *value; | ||
180 | |||
181 | #pragma clang loop unroll(full) | ||
182 | for (i = 0; i < 64; ++i) | ||
183 | value = bpf_map_lookup_elem(&hash_map, &key); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | SEC("kprobe/sys_getpgrp") | ||
189 | int stress_array_map_lookup(struct pt_regs *ctx) | ||
190 | { | ||
191 | u32 key = 1, i; | ||
192 | long *value; | ||
193 | |||
194 | #pragma clang loop unroll(full) | ||
195 | for (i = 0; i < 64; ++i) | ||
196 | value = bpf_map_lookup_elem(&array_map, &key); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
168 | char _license[] SEC("license") = "GPL"; | 201 | char _license[] SEC("license") = "GPL"; |
169 | u32 _version SEC("version") = LINUX_VERSION_CODE; | 202 | u32 _version SEC("version") = LINUX_VERSION_CODE; |
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 680260a91f50..e29ff318a793 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c | |||
@@ -38,6 +38,8 @@ static __u64 time_get_ns(void) | |||
38 | #define LRU_HASH_PREALLOC (1 << 4) | 38 | #define LRU_HASH_PREALLOC (1 << 4) |
39 | #define PERCPU_LRU_HASH_PREALLOC (1 << 5) | 39 | #define PERCPU_LRU_HASH_PREALLOC (1 << 5) |
40 | #define LPM_KMALLOC (1 << 6) | 40 | #define LPM_KMALLOC (1 << 6) |
41 | #define HASH_LOOKUP (1 << 7) | ||
42 | #define ARRAY_LOOKUP (1 << 8) | ||
41 | 43 | ||
42 | static int test_flags = ~0; | 44 | static int test_flags = ~0; |
43 | 45 | ||
@@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu) | |||
125 | cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | 127 | cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); |
126 | } | 128 | } |
127 | 129 | ||
130 | static void test_hash_lookup(int cpu) | ||
131 | { | ||
132 | __u64 start_time; | ||
133 | int i; | ||
134 | |||
135 | start_time = time_get_ns(); | ||
136 | for (i = 0; i < MAX_CNT; i++) | ||
137 | syscall(__NR_getpgid, 0); | ||
138 | printf("%d:hash_lookup %lld lookups per sec\n", | ||
139 | cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); | ||
140 | } | ||
141 | |||
142 | static void test_array_lookup(int cpu) | ||
143 | { | ||
144 | __u64 start_time; | ||
145 | int i; | ||
146 | |||
147 | start_time = time_get_ns(); | ||
148 | for (i = 0; i < MAX_CNT; i++) | ||
149 | syscall(__NR_getpgrp, 0); | ||
150 | printf("%d:array_lookup %lld lookups per sec\n", | ||
151 | cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time)); | ||
152 | } | ||
153 | |||
128 | static void loop(int cpu) | 154 | static void loop(int cpu) |
129 | { | 155 | { |
130 | cpu_set_t cpuset; | 156 | cpu_set_t cpuset; |
@@ -153,6 +179,12 @@ static void loop(int cpu) | |||
153 | 179 | ||
154 | if (test_flags & LPM_KMALLOC) | 180 | if (test_flags & LPM_KMALLOC) |
155 | test_lpm_kmalloc(cpu); | 181 | test_lpm_kmalloc(cpu); |
182 | |||
183 | if (test_flags & HASH_LOOKUP) | ||
184 | test_hash_lookup(cpu); | ||
185 | |||
186 | if (test_flags & ARRAY_LOOKUP) | ||
187 | test_array_lookup(cpu); | ||
156 | } | 188 | } |
157 | 189 | ||
158 | static void run_perf_test(int tasks) | 190 | static void run_perf_test(int tasks) |