diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2014-09-26 03:17:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-26 15:05:14 -0400 |
commit | 09756af46893c18839062976c3252e93a1beeba7 (patch) | |
tree | 203642a5473496ecb6ff10cd22dba39b22ed5f0a /kernel/bpf | |
parent | db20fd2b01087bdfbe30bce314a198eefedcc42e (diff) |
bpf: expand BPF syscall with program load/unload
eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.
The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_cnt,
const char *license)
{
union bpf_attr attr = {
.prog_type = prog_type,
.insns = ptr_to_u64(insns),
.insn_cnt = insn_cnt,
.license = ptr_to_u64(license),
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only
Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.
User space tests and examples follow in the later patches
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/core.c | 29 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 165 |
2 files changed, 180 insertions, 14 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8b7002488251..f0c30c59b317 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/random.h> | 27 | #include <linux/random.h> |
28 | #include <linux/moduleloader.h> | 28 | #include <linux/moduleloader.h> |
29 | #include <asm/unaligned.h> | 29 | #include <asm/unaligned.h> |
30 | #include <linux/bpf.h> | ||
30 | 31 | ||
31 | /* Registers */ | 32 | /* Registers */ |
32 | #define BPF_R0 regs[BPF_REG_0] | 33 | #define BPF_R0 regs[BPF_REG_0] |
@@ -71,7 +72,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | |||
71 | { | 72 | { |
72 | gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | | 73 | gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | |
73 | gfp_extra_flags; | 74 | gfp_extra_flags; |
74 | struct bpf_work_struct *ws; | 75 | struct bpf_prog_aux *aux; |
75 | struct bpf_prog *fp; | 76 | struct bpf_prog *fp; |
76 | 77 | ||
77 | size = round_up(size, PAGE_SIZE); | 78 | size = round_up(size, PAGE_SIZE); |
@@ -79,14 +80,14 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | |||
79 | if (fp == NULL) | 80 | if (fp == NULL) |
80 | return NULL; | 81 | return NULL; |
81 | 82 | ||
82 | ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags); | 83 | aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); |
83 | if (ws == NULL) { | 84 | if (aux == NULL) { |
84 | vfree(fp); | 85 | vfree(fp); |
85 | return NULL; | 86 | return NULL; |
86 | } | 87 | } |
87 | 88 | ||
88 | fp->pages = size / PAGE_SIZE; | 89 | fp->pages = size / PAGE_SIZE; |
89 | fp->work = ws; | 90 | fp->aux = aux; |
90 | 91 | ||
91 | return fp; | 92 | return fp; |
92 | } | 93 | } |
@@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, | |||
110 | memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); | 111 | memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); |
111 | fp->pages = size / PAGE_SIZE; | 112 | fp->pages = size / PAGE_SIZE; |
112 | 113 | ||
113 | /* We keep fp->work from fp_old around in the new | 114 | /* We keep fp->aux from fp_old around in the new |
114 | * reallocated structure. | 115 | * reallocated structure. |
115 | */ | 116 | */ |
116 | fp_old->work = NULL; | 117 | fp_old->aux = NULL; |
117 | __bpf_prog_free(fp_old); | 118 | __bpf_prog_free(fp_old); |
118 | } | 119 | } |
119 | 120 | ||
@@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc); | |||
123 | 124 | ||
124 | void __bpf_prog_free(struct bpf_prog *fp) | 125 | void __bpf_prog_free(struct bpf_prog *fp) |
125 | { | 126 | { |
126 | kfree(fp->work); | 127 | kfree(fp->aux); |
127 | vfree(fp); | 128 | vfree(fp); |
128 | } | 129 | } |
129 | EXPORT_SYMBOL_GPL(__bpf_prog_free); | 130 | EXPORT_SYMBOL_GPL(__bpf_prog_free); |
@@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); | |||
638 | 639 | ||
639 | static void bpf_prog_free_deferred(struct work_struct *work) | 640 | static void bpf_prog_free_deferred(struct work_struct *work) |
640 | { | 641 | { |
641 | struct bpf_work_struct *ws; | 642 | struct bpf_prog_aux *aux; |
642 | 643 | ||
643 | ws = container_of(work, struct bpf_work_struct, work); | 644 | aux = container_of(work, struct bpf_prog_aux, work); |
644 | bpf_jit_free(ws->prog); | 645 | bpf_jit_free(aux->prog); |
645 | } | 646 | } |
646 | 647 | ||
647 | /* Free internal BPF program */ | 648 | /* Free internal BPF program */ |
648 | void bpf_prog_free(struct bpf_prog *fp) | 649 | void bpf_prog_free(struct bpf_prog *fp) |
649 | { | 650 | { |
650 | struct bpf_work_struct *ws = fp->work; | 651 | struct bpf_prog_aux *aux = fp->aux; |
651 | 652 | ||
652 | INIT_WORK(&ws->work, bpf_prog_free_deferred); | 653 | INIT_WORK(&aux->work, bpf_prog_free_deferred); |
653 | ws->prog = fp; | 654 | aux->prog = fp; |
654 | schedule_work(&ws->work); | 655 | schedule_work(&aux->work); |
655 | } | 656 | } |
656 | EXPORT_SYMBOL_GPL(bpf_prog_free); | 657 | EXPORT_SYMBOL_GPL(bpf_prog_free); |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f94349ecaf61..0afb4eaa1887 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/anon_inodes.h> | 15 | #include <linux/anon_inodes.h> |
16 | #include <linux/file.h> | 16 | #include <linux/file.h> |
17 | #include <linux/license.h> | ||
18 | #include <linux/filter.h> | ||
17 | 19 | ||
18 | static LIST_HEAD(bpf_map_types); | 20 | static LIST_HEAD(bpf_map_types); |
19 | 21 | ||
@@ -334,6 +336,166 @@ err_put: | |||
334 | return err; | 336 | return err; |
335 | } | 337 | } |
336 | 338 | ||
339 | static LIST_HEAD(bpf_prog_types); | ||
340 | |||
341 | static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) | ||
342 | { | ||
343 | struct bpf_prog_type_list *tl; | ||
344 | |||
345 | list_for_each_entry(tl, &bpf_prog_types, list_node) { | ||
346 | if (tl->type == type) { | ||
347 | prog->aux->ops = tl->ops; | ||
348 | prog->aux->prog_type = type; | ||
349 | return 0; | ||
350 | } | ||
351 | } | ||
352 | return -EINVAL; | ||
353 | } | ||
354 | |||
355 | void bpf_register_prog_type(struct bpf_prog_type_list *tl) | ||
356 | { | ||
357 | list_add(&tl->list_node, &bpf_prog_types); | ||
358 | } | ||
359 | |||
360 | /* drop refcnt on maps used by eBPF program and free auxilary data */ | ||
361 | static void free_used_maps(struct bpf_prog_aux *aux) | ||
362 | { | ||
363 | int i; | ||
364 | |||
365 | for (i = 0; i < aux->used_map_cnt; i++) | ||
366 | bpf_map_put(aux->used_maps[i]); | ||
367 | |||
368 | kfree(aux->used_maps); | ||
369 | } | ||
370 | |||
371 | void bpf_prog_put(struct bpf_prog *prog) | ||
372 | { | ||
373 | if (atomic_dec_and_test(&prog->aux->refcnt)) { | ||
374 | free_used_maps(prog->aux); | ||
375 | bpf_prog_free(prog); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static int bpf_prog_release(struct inode *inode, struct file *filp) | ||
380 | { | ||
381 | struct bpf_prog *prog = filp->private_data; | ||
382 | |||
383 | bpf_prog_put(prog); | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | static const struct file_operations bpf_prog_fops = { | ||
388 | .release = bpf_prog_release, | ||
389 | }; | ||
390 | |||
391 | static struct bpf_prog *get_prog(struct fd f) | ||
392 | { | ||
393 | struct bpf_prog *prog; | ||
394 | |||
395 | if (!f.file) | ||
396 | return ERR_PTR(-EBADF); | ||
397 | |||
398 | if (f.file->f_op != &bpf_prog_fops) { | ||
399 | fdput(f); | ||
400 | return ERR_PTR(-EINVAL); | ||
401 | } | ||
402 | |||
403 | prog = f.file->private_data; | ||
404 | |||
405 | return prog; | ||
406 | } | ||
407 | |||
408 | /* called by sockets/tracing/seccomp before attaching program to an event | ||
409 | * pairs with bpf_prog_put() | ||
410 | */ | ||
411 | struct bpf_prog *bpf_prog_get(u32 ufd) | ||
412 | { | ||
413 | struct fd f = fdget(ufd); | ||
414 | struct bpf_prog *prog; | ||
415 | |||
416 | prog = get_prog(f); | ||
417 | |||
418 | if (IS_ERR(prog)) | ||
419 | return prog; | ||
420 | |||
421 | atomic_inc(&prog->aux->refcnt); | ||
422 | fdput(f); | ||
423 | return prog; | ||
424 | } | ||
425 | |||
426 | /* last field in 'union bpf_attr' used by this command */ | ||
427 | #define BPF_PROG_LOAD_LAST_FIELD license | ||
428 | |||
429 | static int bpf_prog_load(union bpf_attr *attr) | ||
430 | { | ||
431 | enum bpf_prog_type type = attr->prog_type; | ||
432 | struct bpf_prog *prog; | ||
433 | int err; | ||
434 | char license[128]; | ||
435 | bool is_gpl; | ||
436 | |||
437 | if (CHECK_ATTR(BPF_PROG_LOAD)) | ||
438 | return -EINVAL; | ||
439 | |||
440 | /* copy eBPF program license from user space */ | ||
441 | if (strncpy_from_user(license, u64_to_ptr(attr->license), | ||
442 | sizeof(license) - 1) < 0) | ||
443 | return -EFAULT; | ||
444 | license[sizeof(license) - 1] = 0; | ||
445 | |||
446 | /* eBPF programs must be GPL compatible to use GPL-ed functions */ | ||
447 | is_gpl = license_is_gpl_compatible(license); | ||
448 | |||
449 | if (attr->insn_cnt >= BPF_MAXINSNS) | ||
450 | return -EINVAL; | ||
451 | |||
452 | /* plain bpf_prog allocation */ | ||
453 | prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); | ||
454 | if (!prog) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | prog->len = attr->insn_cnt; | ||
458 | |||
459 | err = -EFAULT; | ||
460 | if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), | ||
461 | prog->len * sizeof(struct bpf_insn)) != 0) | ||
462 | goto free_prog; | ||
463 | |||
464 | prog->orig_prog = NULL; | ||
465 | prog->jited = false; | ||
466 | |||
467 | atomic_set(&prog->aux->refcnt, 1); | ||
468 | prog->aux->is_gpl_compatible = is_gpl; | ||
469 | |||
470 | /* find program type: socket_filter vs tracing_filter */ | ||
471 | err = find_prog_type(type, prog); | ||
472 | if (err < 0) | ||
473 | goto free_prog; | ||
474 | |||
475 | /* run eBPF verifier */ | ||
476 | /* err = bpf_check(prog, tb); */ | ||
477 | |||
478 | if (err < 0) | ||
479 | goto free_used_maps; | ||
480 | |||
481 | /* eBPF program is ready to be JITed */ | ||
482 | bpf_prog_select_runtime(prog); | ||
483 | |||
484 | err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); | ||
485 | |||
486 | if (err < 0) | ||
487 | /* failed to allocate fd */ | ||
488 | goto free_used_maps; | ||
489 | |||
490 | return err; | ||
491 | |||
492 | free_used_maps: | ||
493 | free_used_maps(prog->aux); | ||
494 | free_prog: | ||
495 | bpf_prog_free(prog); | ||
496 | return err; | ||
497 | } | ||
498 | |||
337 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | 499 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
338 | { | 500 | { |
339 | union bpf_attr attr = {}; | 501 | union bpf_attr attr = {}; |
@@ -395,6 +557,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz | |||
395 | case BPF_MAP_GET_NEXT_KEY: | 557 | case BPF_MAP_GET_NEXT_KEY: |
396 | err = map_get_next_key(&attr); | 558 | err = map_get_next_key(&attr); |
397 | break; | 559 | break; |
560 | case BPF_PROG_LOAD: | ||
561 | err = bpf_prog_load(&attr); | ||
562 | break; | ||
398 | default: | 563 | default: |
399 | err = -EINVAL; | 564 | err = -EINVAL; |
400 | break; | 565 | break; |