summaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2017-03-15 21:26:44 -0400
committerDavid S. Miller <davem@davemloft.net>2017-03-16 23:44:12 -0400
commit95ff141e52f84f476fcde50560f42d4f118539c0 (patch)
treef35cf082e9d34087212bff37049f505389b38d57 /samples
parent9015d2f5953590e8273392b44c2b0f864350b427 (diff)
samples/bpf: add map_lookup microbenchmark
$ map_perf_test 128 speed of HASH bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 46M 58M after 42M 74M perf report before: 54.23% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 14.24% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 8.84% map_perf_test [kernel.kallsyms] [k] htab_map_lookup_elem 5.93% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 2.30% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.49% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 60.03% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 18.07% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 2.91% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.94% map_perf_test [kernel.kallsyms] [k] _einittext 1.90% map_perf_test [kernel.kallsyms] [k] __audit_syscall_exit 1.72% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler Notice that bpf_map_lookup_elem() and htab_map_lookup_elem() are trivial functions, yet they take sizeable amount of cpu time. htab_map_gen_lookup() removes bpf_map_lookup_elem() and converts htab_map_lookup_elem() into three BPF insns which causing cpu time for bpf_prog_da4fc6a3f41761a2() slightly increase. $ map_perf_test 256 speed of ARRAY bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 97M 174M after 64M 280M before: 37.33% map_perf_test [kernel.kallsyms] [k] array_map_lookup_elem 13.95% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 6.54% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 4.57% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 32.86% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 6.54% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler array_map_gen_lookup() removes calls to array_map_lookup_elem() and bpf_map_lookup_elem() and replaces them with 7 bpf insns. The performance without JIT is slower, since executing extra insns in the interpreter is slower than running native C code, but with JIT the performance gains are obvious, since native C->x86 code is replaced with fewer bpf->x86 instructions. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/map_perf_test_kern.c33
-rw-r--r--samples/bpf/map_perf_test_user.c32
2 files changed, 65 insertions, 0 deletions
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index a91872a97742..9da2a3441b0a 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -65,6 +65,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
65 .map_flags = BPF_F_NO_PREALLOC, 65 .map_flags = BPF_F_NO_PREALLOC,
66}; 66};
67 67
68struct bpf_map_def SEC("maps") array_map = {
69 .type = BPF_MAP_TYPE_ARRAY,
70 .key_size = sizeof(u32),
71 .value_size = sizeof(long),
72 .max_entries = MAX_ENTRIES,
73};
74
68SEC("kprobe/sys_getuid") 75SEC("kprobe/sys_getuid")
69int stress_hmap(struct pt_regs *ctx) 76int stress_hmap(struct pt_regs *ctx)
70{ 77{
@@ -165,5 +172,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
165 return 0; 172 return 0;
166} 173}
167 174
175SEC("kprobe/sys_getpgid")
176int stress_hash_map_lookup(struct pt_regs *ctx)
177{
178 u32 key = 1, i;
179 long *value;
180
181#pragma clang loop unroll(full)
182 for (i = 0; i < 64; ++i)
183 value = bpf_map_lookup_elem(&hash_map, &key);
184
185 return 0;
186}
187
188SEC("kprobe/sys_getpgrp")
189int stress_array_map_lookup(struct pt_regs *ctx)
190{
191 u32 key = 1, i;
192 long *value;
193
194#pragma clang loop unroll(full)
195 for (i = 0; i < 64; ++i)
196 value = bpf_map_lookup_elem(&array_map, &key);
197
198 return 0;
199}
200
168char _license[] SEC("license") = "GPL"; 201char _license[] SEC("license") = "GPL";
169u32 _version SEC("version") = LINUX_VERSION_CODE; 202u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 680260a91f50..e29ff318a793 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -38,6 +38,8 @@ static __u64 time_get_ns(void)
38#define LRU_HASH_PREALLOC (1 << 4) 38#define LRU_HASH_PREALLOC (1 << 4)
39#define PERCPU_LRU_HASH_PREALLOC (1 << 5) 39#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
40#define LPM_KMALLOC (1 << 6) 40#define LPM_KMALLOC (1 << 6)
41#define HASH_LOOKUP (1 << 7)
42#define ARRAY_LOOKUP (1 << 8)
41 43
42static int test_flags = ~0; 44static int test_flags = ~0;
43 45
@@ -125,6 +127,30 @@ static void test_lpm_kmalloc(int cpu)
125 cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); 127 cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
126} 128}
127 129
130static void test_hash_lookup(int cpu)
131{
132 __u64 start_time;
133 int i;
134
135 start_time = time_get_ns();
136 for (i = 0; i < MAX_CNT; i++)
137 syscall(__NR_getpgid, 0);
138 printf("%d:hash_lookup %lld lookups per sec\n",
139 cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
140}
141
142static void test_array_lookup(int cpu)
143{
144 __u64 start_time;
145 int i;
146
147 start_time = time_get_ns();
148 for (i = 0; i < MAX_CNT; i++)
149 syscall(__NR_getpgrp, 0);
150 printf("%d:array_lookup %lld lookups per sec\n",
151 cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
152}
153
128static void loop(int cpu) 154static void loop(int cpu)
129{ 155{
130 cpu_set_t cpuset; 156 cpu_set_t cpuset;
@@ -153,6 +179,12 @@ static void loop(int cpu)
153 179
154 if (test_flags & LPM_KMALLOC) 180 if (test_flags & LPM_KMALLOC)
155 test_lpm_kmalloc(cpu); 181 test_lpm_kmalloc(cpu);
182
183 if (test_flags & HASH_LOOKUP)
184 test_hash_lookup(cpu);
185
186 if (test_flags & ARRAY_LOOKUP)
187 test_array_lookup(cpu);
156} 188}
157 189
158static void run_perf_test(int tasks) 190static void run_perf_test(int tasks)