aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2016-09-08 20:45:31 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-09 22:36:04 -0400
commitf3694e00123802d688180e7ae90b240669910e3c (patch)
tree321a9b95e9df3e64adbc8340a5f63a778db69e70 /kernel
parent374fb54eeaaa6b2cb82bca73a11273687bb2a96a (diff)
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros that are used today. Motivation for this is to hide all the register handling and all necessary casts from the user, so that it is done automatically in the background when adding a BPF_CALL_<n>() call. This makes current helpers easier to review, eases to write future helpers, avoids getting the casting mess wrong, and allows for extending all helpers at once (f.e. build time checks, etc). It also helps detecting more easily in code reviews that unused registers are not instrumented in the code by accident, breaking compatibility with existing programs. BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some fundamental differences, for example, for generating the actual helper function that carries all u64 regs, we need to fill unused regs, so that we always end up with 5 u64 regs as an argument. I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and they look all as expected. No sparse issue spotted. We let this also sit for a few days with Fengguang's kbuild test robot, and there were no issues seen. On s390, it barked on the "uses dynamic stack allocation" notice, which is an old one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion to the call wrapper, just telling that the perf raw record/frag sits on stack (gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests and they were fine as well. All eBPF helpers are now converted to use these macros, getting rid of a good chunk of all the raw castings. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/helpers.c46
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/trace/bpf_trace.c75
4 files changed, 51 insertions, 77 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 03fd23d4d587..7b7baaed9ed4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
1018 prandom_init_once(&bpf_user_rnd_state); 1018 prandom_init_once(&bpf_user_rnd_state);
1019} 1019}
1020 1020
1021u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1021BPF_CALL_0(bpf_user_rnd_u32)
1022{ 1022{
1023 /* Should someone ever have the rather unwise idea to use some 1023 /* Should someone ever have the rather unwise idea to use some
1024 * of the registers passed into this function, then note that 1024 * of the registers passed into this function, then note that
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6df73bd1ba34..a5b8bf8cfcfd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
16#include <linux/ktime.h> 16#include <linux/ktime.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/uidgid.h> 18#include <linux/uidgid.h>
19#include <linux/filter.h>
19 20
20/* If kernel subsystem is allowing eBPF programs to call this function, 21/* If kernel subsystem is allowing eBPF programs to call this function,
21 * inside its own verifier_ops->get_func_proto() callback it should return 22 * inside its own verifier_ops->get_func_proto() callback it should return
@@ -26,24 +27,10 @@
26 * if program is allowed to access maps, so check rcu_read_lock_held in 27 * if program is allowed to access maps, so check rcu_read_lock_held in
27 * all three functions. 28 * all three functions.
28 */ 29 */
29static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 30BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
30{ 31{
31 /* verifier checked that R1 contains a valid pointer to bpf_map
32 * and R2 points to a program stack and map->key_size bytes were
33 * initialized
34 */
35 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
36 void *key = (void *) (unsigned long) r2;
37 void *value;
38
39 WARN_ON_ONCE(!rcu_read_lock_held()); 32 WARN_ON_ONCE(!rcu_read_lock_held());
40 33 return (unsigned long) map->ops->map_lookup_elem(map, key);
41 value = map->ops->map_lookup_elem(map, key);
42
43 /* lookup() returns either pointer to element value or NULL
44 * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
45 */
46 return (unsigned long) value;
47} 34}
48 35
49const struct bpf_func_proto bpf_map_lookup_elem_proto = { 36const struct bpf_func_proto bpf_map_lookup_elem_proto = {
@@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
54 .arg2_type = ARG_PTR_TO_MAP_KEY, 41 .arg2_type = ARG_PTR_TO_MAP_KEY,
55}; 42};
56 43
57static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 44BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
45 void *, value, u64, flags)
58{ 46{
59 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
60 void *key = (void *) (unsigned long) r2;
61 void *value = (void *) (unsigned long) r3;
62
63 WARN_ON_ONCE(!rcu_read_lock_held()); 47 WARN_ON_ONCE(!rcu_read_lock_held());
64 48 return map->ops->map_update_elem(map, key, value, flags);
65 return map->ops->map_update_elem(map, key, value, r4);
66} 49}
67 50
68const struct bpf_func_proto bpf_map_update_elem_proto = { 51const struct bpf_func_proto bpf_map_update_elem_proto = {
@@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
75 .arg4_type = ARG_ANYTHING, 58 .arg4_type = ARG_ANYTHING,
76}; 59};
77 60
78static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 61BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
79{ 62{
80 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
81 void *key = (void *) (unsigned long) r2;
82
83 WARN_ON_ONCE(!rcu_read_lock_held()); 63 WARN_ON_ONCE(!rcu_read_lock_held());
84
85 return map->ops->map_delete_elem(map, key); 64 return map->ops->map_delete_elem(map, key);
86} 65}
87 66
@@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
99 .ret_type = RET_INTEGER, 78 .ret_type = RET_INTEGER,
100}; 79};
101 80
102static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 81BPF_CALL_0(bpf_get_smp_processor_id)
103{ 82{
104 return smp_processor_id(); 83 return smp_processor_id();
105} 84}
@@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
110 .ret_type = RET_INTEGER, 89 .ret_type = RET_INTEGER,
111}; 90};
112 91
113static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 92BPF_CALL_0(bpf_ktime_get_ns)
114{ 93{
115 /* NMI safe access to clock monotonic */ 94 /* NMI safe access to clock monotonic */
116 return ktime_get_mono_fast_ns(); 95 return ktime_get_mono_fast_ns();
@@ -122,7 +101,7 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
122 .ret_type = RET_INTEGER, 101 .ret_type = RET_INTEGER,
123}; 102};
124 103
125static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 104BPF_CALL_0(bpf_get_current_pid_tgid)
126{ 105{
127 struct task_struct *task = current; 106 struct task_struct *task = current;
128 107
@@ -138,7 +117,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
138 .ret_type = RET_INTEGER, 117 .ret_type = RET_INTEGER,
139}; 118};
140 119
141static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 120BPF_CALL_0(bpf_get_current_uid_gid)
142{ 121{
143 struct task_struct *task = current; 122 struct task_struct *task = current;
144 kuid_t uid; 123 kuid_t uid;
@@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
158 .ret_type = RET_INTEGER, 137 .ret_type = RET_INTEGER,
159}; 138};
160 139
161static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) 140BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
162{ 141{
163 struct task_struct *task = current; 142 struct task_struct *task = current;
164 char *buf = (char *) (long) r1;
165 143
166 if (unlikely(!task)) 144 if (unlikely(!task))
167 goto err_clear; 145 goto err_clear;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index bf4495fcd25d..732ae16d12b7 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -116,10 +116,9 @@ free_smap:
116 return ERR_PTR(err); 116 return ERR_PTR(err);
117} 117}
118 118
119u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) 119BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
120 u64, flags)
120{ 121{
121 struct pt_regs *regs = (struct pt_regs *) (long) r1;
122 struct bpf_map *map = (struct bpf_map *) (long) r2;
123 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 122 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
124 struct perf_callchain_entry *trace; 123 struct perf_callchain_entry *trace;
125 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 124 struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e63d7d435796..5dcb99281259 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -61,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
61} 61}
62EXPORT_SYMBOL_GPL(trace_call_bpf); 62EXPORT_SYMBOL_GPL(trace_call_bpf);
63 63
64static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 64BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
65{ 65{
66 void *dst = (void *) (long) r1; 66 int ret;
67 int ret, size = (int) r2;
68 void *unsafe_ptr = (void *) (long) r3;
69 67
70 ret = probe_kernel_read(dst, unsafe_ptr, size); 68 ret = probe_kernel_read(dst, unsafe_ptr, size);
71 if (unlikely(ret < 0)) 69 if (unlikely(ret < 0))
@@ -83,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
83 .arg3_type = ARG_ANYTHING, 81 .arg3_type = ARG_ANYTHING,
84}; 82};
85 83
86static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 84BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
85 u32, size)
87{ 86{
88 void *unsafe_ptr = (void *) (long) r1;
89 void *src = (void *) (long) r2;
90 int size = (int) r3;
91
92 /* 87 /*
93 * Ensure we're in user context which is safe for the helper to 88 * Ensure we're in user context which is safe for the helper to
94 * run. This helper has no business in a kthread. 89 * run. This helper has no business in a kthread.
@@ -130,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
130 * limited trace_printk() 125 * limited trace_printk()
131 * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed 126 * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
132 */ 127 */
133static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) 128BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
129 u64, arg2, u64, arg3)
134{ 130{
135 char *fmt = (char *) (long) r1;
136 bool str_seen = false; 131 bool str_seen = false;
137 int mod[3] = {}; 132 int mod[3] = {};
138 int fmt_cnt = 0; 133 int fmt_cnt = 0;
@@ -178,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
178 173
179 switch (fmt_cnt) { 174 switch (fmt_cnt) {
180 case 1: 175 case 1:
181 unsafe_addr = r3; 176 unsafe_addr = arg1;
182 r3 = (long) buf; 177 arg1 = (long) buf;
183 break; 178 break;
184 case 2: 179 case 2:
185 unsafe_addr = r4; 180 unsafe_addr = arg2;
186 r4 = (long) buf; 181 arg2 = (long) buf;
187 break; 182 break;
188 case 3: 183 case 3:
189 unsafe_addr = r5; 184 unsafe_addr = arg3;
190 r5 = (long) buf; 185 arg3 = (long) buf;
191 break; 186 break;
192 } 187 }
193 buf[0] = 0; 188 buf[0] = 0;
@@ -209,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
209 } 204 }
210 205
211 return __trace_printk(1/* fake ip will not be printed */, fmt, 206 return __trace_printk(1/* fake ip will not be printed */, fmt,
212 mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, 207 mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
213 mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, 208 mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
214 mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); 209 mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
215} 210}
216 211
217static const struct bpf_func_proto bpf_trace_printk_proto = { 212static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -233,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
233 return &bpf_trace_printk_proto; 228 return &bpf_trace_printk_proto;
234} 229}
235 230
236static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) 231BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
237{ 232{
238 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
239 struct bpf_array *array = container_of(map, struct bpf_array, map); 233 struct bpf_array *array = container_of(map, struct bpf_array, map);
240 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
241 u64 index = flags & BPF_F_INDEX_MASK; 235 u64 index = flags & BPF_F_INDEX_MASK;
@@ -312,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
312 return 0; 306 return 0;
313} 307}
314 308
315static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) 309BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
310 u64, flags, void *, data, u64, size)
316{ 311{
317 struct pt_regs *regs = (struct pt_regs *)(long) r1;
318 struct bpf_map *map = (struct bpf_map *)(long) r2;
319 void *data = (void *)(long) r4;
320 struct perf_raw_record raw = { 312 struct perf_raw_record raw = {
321 .frag = { 313 .frag = {
322 .size = size, 314 .size = size,
@@ -367,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
367 return __bpf_perf_event_output(regs, map, flags, &raw); 359 return __bpf_perf_event_output(regs, map, flags, &raw);
368} 360}
369 361
370static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 362BPF_CALL_0(bpf_get_current_task)
371{ 363{
372 return (long) current; 364 return (long) current;
373} 365}
@@ -378,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
378 .ret_type = RET_INTEGER, 370 .ret_type = RET_INTEGER,
379}; 371};
380 372
381static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 373BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
382{ 374{
383 struct bpf_map *map = (struct bpf_map *)(long)r1;
384 struct bpf_array *array = container_of(map, struct bpf_array, map); 375 struct bpf_array *array = container_of(map, struct bpf_array, map);
385 struct cgroup *cgrp; 376 struct cgroup *cgrp;
386 u32 idx = (u32)r2;
387 377
388 if (unlikely(in_interrupt())) 378 if (unlikely(in_interrupt()))
389 return -EINVAL; 379 return -EINVAL;
390
391 if (unlikely(idx >= array->map.max_entries)) 380 if (unlikely(idx >= array->map.max_entries))
392 return -E2BIG; 381 return -E2BIG;
393 382
@@ -481,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
481 .type = BPF_PROG_TYPE_KPROBE, 470 .type = BPF_PROG_TYPE_KPROBE,
482}; 471};
483 472
484static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) 473BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
474 u64, flags, void *, data, u64, size)
485{ 475{
476 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
477
486 /* 478 /*
487 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 479 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
488 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 480 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
489 * from there and call the same bpf_perf_event_output() helper 481 * from there and call the same bpf_perf_event_output() helper inline.
490 */ 482 */
491 u64 ctx = *(long *)(uintptr_t)r1; 483 return ____bpf_perf_event_output(regs, map, flags, data, size);
492
493 return bpf_perf_event_output(ctx, r2, index, r4, size);
494} 484}
495 485
496static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 486static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -504,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
504 .arg5_type = ARG_CONST_STACK_SIZE, 494 .arg5_type = ARG_CONST_STACK_SIZE,
505}; 495};
506 496
507static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 497BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
498 u64, flags)
508{ 499{
509 u64 ctx = *(long *)(uintptr_t)r1; 500 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
510 501
511 return bpf_get_stackid(ctx, r2, r3, r4, r5); 502 /*
503 * Same comment as in bpf_perf_event_output_tp(), only that this time
504 * the other helper's function body cannot be inlined due to being
505 * external, thus we need to call raw helper function.
506 */
507 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
508 flags, 0, 0);
512} 509}
513 510
514static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 511static const struct bpf_func_proto bpf_get_stackid_proto_tp = {