aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-09-26 15:05:40 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-26 15:05:40 -0400
commitb4fc1a460f3017e958e6a8ea560ea0afd91bf6fe (patch)
treead7927653cfca896fd60e2e7b2fb12750e46fd2e
parent4a8e320c929991c9480a7b936512c57ea02d87b2 (diff)
parent3c731eba48e1b0650decfc91a839b80f0e05ce8f (diff)
Merge branch 'bpf-next'
Alexei Starovoitov says: ==================== eBPF syscall, verifier, testsuite v14 -> v15: - got rid of macros with hidden control flow (suggested by David) replaced macro with explicit goto or return and simplified where possible (affected patches #9 and #10) - rebased, retested v13 -> v14: - small change to 1st patch to ease 'new userspace with old kernel' problem (done similar to perf_copy_attr()) (suggested by Daniel) - the rest unchanged v12 -> v13: - replaced 'foo __user *' pointers with __aligned_u64 (suggested by David) - added __attribute__((aligned(8)) to 'union bpf_attr' to keep constant alignment between patches - updated manpage and syscall wrappers due to __aligned_u64 - rebased, retested on x64 with 32-bit and 64-bit userspace and on i386, build tested on arm32,sparc64 v11 -> v12: - dropped patch 11 and copied few macros to libbpf.h (suggested by Daniel) - replaced 'enum bpf_prog_type' with u32 to be safe in compat (.. Andy) - implemented and tested compat support (not part of this set) (.. Daniel) - changed 'void *log_buf' to 'char *' (.. Daniel) - combined struct bpf_work_struct and bpf_prog_info (.. Daniel) - added better return value explanation to manpage (.. Andy) - added log_buf/log_size explanation to manpage (.. Andy & Daniel) - added a lot more info about prog_type and map_type to manpage (.. Andy) - rebased, tweaked test_stubs Patches 1-4 establish BPF syscall shell for maps and programs. Patches 5-10 add verifier step by step Patch 11 adds test stubs for 'unspec' program type and verifier testsuite from user space Note that patches 1,3,4,7 add commands and attributes to the syscall while being backwards compatible from each other, which should demonstrate how other commands can be added in the future. After this set the programs can be loaded for testing only. They cannot be attached to any events. Though manpage talks about tracing and sockets, it will be a subject of future patches. Please take a look at manpage: BPF(2) Linux Programmer's Manual BPF(2) NAME bpf - perform a command on eBPF map or program SYNOPSIS #include <linux/bpf.h> int bpf(int cmd, union bpf_attr *attr, unsigned int size); DESCRIPTION bpf() syscall is a multiplexor for a range of different operations on eBPF which can be characterized as "universal in-kernel virtual machine". eBPF is similar to original Berkeley Packet Filter (or "classic BPF") used to filter network packets. Both statically analyze the programs before loading them into the kernel to ensure that programs cannot harm the running system. eBPF extends classic BPF in multiple ways including ability to call in- kernel helper functions and access shared data structures like eBPF maps. The programs can be written in a restricted C that is compiled into eBPF bytecode and executed on the eBPF virtual machine or JITed into native instruction set. eBPF Design/Architecture eBPF maps is a generic storage of different types. User process can create multiple maps (with key/value being opaque bytes of data) and access them via file descriptor. In parallel eBPF programs can access maps from inside the kernel. It's up to user process and eBPF program to decide what they store inside maps. eBPF programs are similar to kernel modules. They are loaded by the user process and automatically unloaded when process exits. Each eBPF program is a safe run-to-completion set of instructions. eBPF verifier statically determines that the program terminates and is safe to execute. During verification the program takes a hold of maps that it intends to use, so selected maps cannot be removed until the program is unloaded. The program can be attached to different events. These events can be packets, tracepoint events and other types in the future. A new event triggers execution of the program which may store information about the event in the maps. Beyond storing data the programs may call into in-kernel helper functions which may, for example, dump stack, do trace_printk or other forms of live kernel debugging. The same program can be attached to multiple events. Different programs can access the same map: tracepoint tracepoint tracepoint sk_buff sk_buff event A event B event C on eth0 on eth1 | | | | | | | | | | --> tracing <-- tracing socket socket prog_1 prog_2 prog_3 prog_4 | | | | |--- -----| |-------| map_3 map_1 map_2 Syscall Arguments bpf() syscall operation is determined by cmd which can be one of the following: BPF_MAP_CREATE Create a map with given type and attributes and return map FD BPF_MAP_LOOKUP_ELEM Lookup element by key in a given map and return its value BPF_MAP_UPDATE_ELEM Create or update element (key/value pair) in a given map BPF_MAP_DELETE_ELEM Lookup and delete element by key in a given map BPF_MAP_GET_NEXT_KEY Lookup element by key in a given map and return key of next element BPF_PROG_LOAD Verify and load eBPF program attr is a pointer to a union of type bpf_attr as defined below. size is the size of the union. union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ __u32 map_fd; __aligned_u64 key; union { __aligned_u64 value; __aligned_u64 next_key; }; }; struct { /* anonymous struct used by BPF_PROG_LOAD command */ __u32 prog_type; __u32 insn_cnt; __aligned_u64 insns; /* 'const struct bpf_insn *' */ __aligned_u64 license; /* 'const char *' */ __u32 log_level; /* verbosity level of eBPF verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied 'char *' buffer */ }; } __attribute__((aligned(8))); eBPF maps maps is a generic storage of different types for sharing data between kernel and userspace. Any map type has the following attributes: . type . max number of elements . key size in bytes . value size in bytes The following wrapper functions demonstrate how this syscall can be used to access the maps. The functions use the cmd argument to invoke different operations. BPF_MAP_CREATE int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } bpf() syscall creates a map of map_type type and given attributes key_size, value_size, max_entries. On success it returns process-local file descriptor. On error, -1 is returned and errno is set to EINVAL or EPERM or ENOMEM. The attributes key_size and value_size will be used by verifier during program loading to check that program is calling bpf_map_*_elem() helper functions with correctly initialized key and that program doesn't access map element value beyond specified value_size. For example, when map is created with key_size = 8 and program does: bpf_map_lookup_elem(map_fd, fp - 4) such program will be rejected, since in-kernel helper function bpf_map_lookup_elem(map_fd, void *key) expects to read 8 bytes from 'key' pointer, but 'fp - 4' starting address will cause out of bounds stack access. Similarly, when map is created with value_size = 1 and program does: value = bpf_map_lookup_elem(...); *(u32 *)value = 1; such program will be rejected, since it accesses value pointer beyond specified 1 byte value_size limit. Currently only hash table map_type is supported: enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, }; map_type selects one of the available map implementations in kernel. For all map_types eBPF programs access maps with the same bpf_map_lookup_elem()/bpf_map_update_elem() helper functions. BPF_MAP_LOOKUP_ELEM int bpf_lookup_elem(int fd, void *key, void *value) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), }; return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } bpf() syscall looks up an element with given key in a map fd. If element is found it returns zero and stores element's value into value. If element is not found it returns -1 and sets errno to ENOENT. BPF_MAP_UPDATE_ELEM int bpf_update_elem(int fd, void *key, void *value) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), }; return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } The call creates or updates element with given key/value in a map fd. On success it returns zero. On error, -1 is returned and errno is set to EINVAL or EPERM or ENOMEM or E2BIG. E2BIG indicates that number of elements in the map reached max_entries limit specified at map creation time. BPF_MAP_DELETE_ELEM int bpf_delete_elem(int fd, void *key) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), }; return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); } The call deletes an element in a map fd with given key. Returns zero on success. If element is not found it returns -1 and sets errno to ENOENT. BPF_MAP_GET_NEXT_KEY int bpf_get_next_key(int fd, void *key, void *next_key) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .next_key = ptr_to_u64(next_key), }; return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); } The call looks up an element by key in a given map fd and returns key of the next element into next_key pointer. If key is not found, it return zero and returns key of the first element into next_key. If key is the last element, it returns -1 and sets errno to ENOENT. Other possible errno values are ENOMEM, EFAULT, EPERM, EINVAL. This method can be used to iterate over all elements of the map. close(map_fd) will delete the map map_fd. Exiting process will delete all maps automatically. eBPF programs BPF_PROG_LOAD This cmd is used to load eBPF program into the kernel. char bpf_log_buf[LOG_BUF_SIZE]; int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_cnt, const char *license) { union bpf_attr attr = { .prog_type = prog_type, .insns = ptr_to_u64(insns), .insn_cnt = insn_cnt, .license = ptr_to_u64(license), .log_buf = ptr_to_u64(bpf_log_buf), .log_size = LOG_BUF_SIZE, .log_level = 1, }; return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } prog_type is one of the available program types: enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET, BPF_PROG_TYPE_TRACING, }; By picking prog_type program author selects a set of helper functions callable from eBPF program and corresponding format of struct bpf_context (which is the data blob passed into the program as the first argument). For example, the programs loaded with prog_type = TYPE_TRACING may call bpf_printk() helper, whereas TYPE_SOCKET programs may not. The set of functions available to the programs under given type may increase in the future. Currently the set of functions for TYPE_TRACING is: bpf_map_lookup_elem(map_fd, void *key) // lookup key in a map_fd bpf_map_update_elem(map_fd, void *key, void *value) // update key/value bpf_map_delete_elem(map_fd, void *key) // delete key in a map_fd bpf_ktime_get_ns(void) // returns current ktime bpf_printk(char *fmt, int fmt_size, ...) // prints into trace buffer bpf_memcmp(void *ptr1, void *ptr2, int size) // non-faulting memcmp bpf_fetch_ptr(void *ptr) // non-faulting load pointer from any address bpf_fetch_u8(void *ptr) // non-faulting 1 byte load bpf_fetch_u16(void *ptr) // other non-faulting loads bpf_fetch_u32(void *ptr) bpf_fetch_u64(void *ptr) and bpf_context is defined as: struct bpf_context { /* argN fields match one to one to arguments passed to trace events */ u64 arg1, arg2, arg3, arg4, arg5, arg6; /* return value from kretprobe event or from syscall_exit event */ u64 ret; }; The set of helper functions for TYPE_SOCKET is TBD. More program types may be added in the future. Like BPF_PROG_TYPE_USER_TRACING for unprivileged programs. BPF_PROG_TYPE_UNSPEC is used for testing only. Such programs cannot be attached to events. insns array of "struct bpf_insn" instructions insn_cnt number of instructions in the program license license string, which must be GPL compatible to call helper functions marked gpl_only log_buf user supplied buffer that in-kernel verifier is using to store verification log. Log is a multi-line string that should be used by program author to understand how verifier came to conclusion that program is unsafe. The format of the output can change at any time as verifier evolves. log_size size of user buffer. If size of the buffer is not large enough to store all verifier messages, -1 is returned and errno is set to ENOSPC. log_level verbosity level of eBPF verifier, where zero means no logs provided close(prog_fd) will unload eBPF program The maps are accesible from programs and generally tie the two together. Programs process various events (like tracepoint, kprobe, packets) and store the data into maps. User space fetches data from maps. Either the same or a different map may be used by user space as configuration space to alter program behavior on the fly. Events Once an eBPF program is loaded, it can be attached to an event. Various kernel subsystems have different ways to do so. For example: setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)); will attach the program prog_fd to socket sock which was received by prior call to socket(). ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); will attach the program prog_fd to perf event event_fd which was received by prior call to perf_event_open(). Another way to attach the program to a tracing event is: event_fd = open("/sys/kernel/debug/tracing/events/skb/kfree_skb/filter"); write(event_fd, "bpf-123"); /* where 123 is eBPF program FD */ /* here program is attached and will be triggered by events */ close(event_fd); /* to detach from event */ EXAMPLES /* eBPF+sockets example: * 1. create map with maximum of 2 elements * 2. set map[6] = 0 and map[17] = 0 * 3. load eBPF program that counts number of TCP and UDP packets received * via map[skb->ip->proto]++ * 4. attach prog_fd to raw socket via setsockopt() * 5. print number of received TCP/UDP packets every second */ int main(int ac, char **av) { int sock, map_fd, prog_fd, key; long long value = 0, tcp_cnt, udp_cnt; map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2); if (map_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); /* likely not run as root */ return 1; } key = 6; /* ip->proto == tcp */ assert(bpf_update_elem(map_fd, &key, &value) == 0); key = 17; /* ip->proto == udp */ assert(bpf_update_elem(map_fd, &key, &value) == 0); struct bpf_insn prog[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* r6 = r1 */ BPF_LD_ABS(BPF_B, 14 + 9), /* r0 = ip->proto */ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),/* *(u32 *)(fp - 4) = r0 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = r2 - 4 */ BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* r1 = map_fd */ BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup(r1, r2) */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* if (r0 == 0) goto pc+2 */ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ BPF_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* lock *(u64 *)r0 += r1 */ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_EXIT_INSN(), /* return r0 */ }; prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET, prog, sizeof(prog), "GPL"); assert(prog_fd >= 0); sock = open_raw_sock("lo"); assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) == 0); for (;;) { key = 6; assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); key = 17; assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); printf("TCP %lld UDP %lld packets0, tcp_cnt, udp_cnt); sleep(1); } return 0; } RETURN VALUE For a successful call, the return value depends on the operation: BPF_MAP_CREATE The new file descriptor associated with eBPF map. BPF_PROG_LOAD The new file descriptor associated with eBPF program. All other commands Zero. On error, -1 is returned, and errno is set appropriately. ERRORS EPERM bpf() syscall was made without sufficient privilege (without the CAP_SYS_ADMIN capability). ENOMEM Cannot allocate sufficient memory. EBADF fd is not an open file descriptor EFAULT One of the pointers ( key or value or log_buf or insns ) is outside accessible address space. EINVAL The value specified in cmd is not recognized by this kernel. EINVAL For BPF_MAP_CREATE, either map_type or attributes are invalid. EINVAL For BPF_MAP_*_ELEM commands, some of the fields of "union bpf_attr" unused by this command are not set to zero. EINVAL For BPF_PROG_LOAD, attempt to load invalid program (unrecognized instruction or uses reserved fields or jumps out of range or loop detected or calls unknown function). EACCES For BPF_PROG_LOAD, though program has valid instructions, it was rejected, since it was deemed unsafe (may access disallowed memory region or uninitialized stack/register or function constraints don't match actual types or misaligned access). In such case it is recommended to call bpf() again with log_level = 1 and examine log_buf for specific reason provided by verifier. ENOENT For BPF_MAP_LOOKUP_ELEM or BPF_MAP_DELETE_ELEM, indicates that element with given key was not found. E2BIG program is too large or a map reached max_entries limit (max number of elements). NOTES These commands may be used only by a privileged process (one having the CAP_SYS_ADMIN capability). SEE ALSO eBPF architecture and instruction set is explained in Documentation/networking/filter.txt Linux 2014-09-16 BPF(2) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/filter.txt263
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl1
-rw-r--r--include/linux/bpf.h136
-rw-r--r--include/linux/filter.h14
-rw-r--r--include/linux/syscalls.h3
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/bpf.h90
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/core.c29
-rw-r--r--kernel/bpf/syscall.c606
-rw-r--r--kernel/bpf/test_stub.c116
-rw-r--r--kernel/bpf/verifier.c1777
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--samples/bpf/Makefile12
-rw-r--r--samples/bpf/libbpf.c94
-rw-r--r--samples/bpf/libbpf.h172
-rw-r--r--samples/bpf/test_verifier.c548
19 files changed, 3854 insertions, 24 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 014e0319a5c4..5ce4d07406a5 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,269 @@ instruction that loads 64-bit immediate value into a dst_reg.
1001Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads 1001Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
100232-bit immediate value into a register. 100232-bit immediate value into a register.
1003 1003
1004eBPF verifier
1005-------------
1006The safety of the eBPF program is determined in two steps.
1007
1008First step does DAG check to disallow loops and other CFG validation.
1009In particular it will detect programs that have unreachable instructions.
1010(though classic BPF checker allows them)
1011
1012Second step starts from the first insn and descends all possible paths.
1013It simulates execution of every insn and observes the state change of
1014registers and stack.
1015
1016At the start of the program the register R1 contains a pointer to context
1017and has type PTR_TO_CTX.
1018If verifier sees an insn that does R2=R1, then R2 has now type
1019PTR_TO_CTX as well and can be used on the right hand side of expression.
1020If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=UNKNOWN_VALUE,
1021since addition of two valid pointers makes invalid pointer.
1022(In 'secure' mode verifier will reject any type of pointer arithmetic to make
1023sure that kernel addresses don't leak to unprivileged users)
1024
1025If register was never written to, it's not readable:
1026 bpf_mov R0 = R2
1027 bpf_exit
1028will be rejected, since R2 is unreadable at the start of the program.
1029
1030After kernel function call, R1-R5 are reset to unreadable and
1031R0 has a return type of the function.
1032
1033Since R6-R9 are callee saved, their state is preserved across the call.
1034 bpf_mov R6 = 1
1035 bpf_call foo
1036 bpf_mov R0 = R6
1037 bpf_exit
1038is a correct program. If there was R1 instead of R6, it would have
1039been rejected.
1040
1041load/store instructions are allowed only with registers of valid types, which
1042are PTR_TO_CTX, PTR_TO_MAP, FRAME_PTR. They are bounds and alignment checked.
1043For example:
1044 bpf_mov R1 = 1
1045 bpf_mov R2 = 2
1046 bpf_xadd *(u32 *)(R1 + 3) += R2
1047 bpf_exit
1048will be rejected, since R1 doesn't have a valid pointer type at the time of
1049execution of instruction bpf_xadd.
1050
1051At the start R1 type is PTR_TO_CTX (a pointer to generic 'struct bpf_context')
1052A callback is used to customize verifier to restrict eBPF program access to only
1053certain fields within ctx structure with specified size and alignment.
1054
1055For example, the following insn:
1056 bpf_ld R0 = *(u32 *)(R6 + 8)
1057intends to load a word from address R6 + 8 and store it into R0
1058If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
1059that offset 8 of size 4 bytes can be accessed for reading, otherwise
1060the verifier will reject the program.
1061If R6=FRAME_PTR, then access should be aligned and be within
1062stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
1063so it will fail verification, since it's out of bounds.
1064
1065The verifier will allow eBPF program to read data from stack only after
1066it wrote into it.
1067Classic BPF verifier does similar check with M[0-15] memory slots.
1068For example:
1069 bpf_ld R0 = *(u32 *)(R10 - 4)
1070 bpf_exit
1071is invalid program.
1072Though R10 is correct read-only register and has type FRAME_PTR
1073and R10 - 4 is within stack bounds, there were no stores into that location.
1074
1075Pointer register spill/fill is tracked as well, since four (R6-R9)
1076callee saved registers may not be enough for some programs.
1077
1078Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
1079The eBPF verifier will check that registers match argument constraints.
1080After the call register R0 will be set to return type of the function.
1081
1082Function calls is a main mechanism to extend functionality of eBPF programs.
1083Socket filters may let programs to call one set of functions, whereas tracing
1084filters may allow completely different set.
1085
1086If a function made accessible to eBPF program, it needs to be thought through
1087from safety point of view. The verifier will guarantee that the function is
1088called with valid arguments.
1089
1090seccomp vs socket filters have different security restrictions for classic BPF.
1091Seccomp solves this by two stage verifier: classic BPF verifier is followed
1092by seccomp verifier. In case of eBPF one configurable verifier is shared for
1093all use cases.
1094
1095See details of eBPF verifier in kernel/bpf/verifier.c
1096
1097eBPF maps
1098---------
1099'maps' is a generic storage of different types for sharing data between kernel
1100and userspace.
1101
1102The maps are accessed from user space via BPF syscall, which has commands:
1103- create a map with given type and attributes
1104 map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
1105 using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
1106 returns process-local file descriptor or negative error
1107
1108- lookup key in a given map
1109 err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
1110 using attr->map_fd, attr->key, attr->value
1111 returns zero and stores found elem into value or negative error
1112
1113- create or update key/value pair in a given map
1114 err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
1115 using attr->map_fd, attr->key, attr->value
1116 returns zero or negative error
1117
1118- find and delete element by key in a given map
1119 err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
1120 using attr->map_fd, attr->key
1121
1122- to delete map: close(fd)
1123 Exiting process will delete maps automatically
1124
1125userspace programs use this syscall to create/access maps that eBPF programs
1126are concurrently updating.
1127
1128maps can have different types: hash, array, bloom filter, radix-tree, etc.
1129
1130The map is defined by:
1131 . type
1132 . max number of elements
1133 . key size in bytes
1134 . value size in bytes
1135
1136Understanding eBPF verifier messages
1137------------------------------------
1138
1139The following are few examples of invalid eBPF programs and verifier error
1140messages as seen in the log:
1141
1142Program with unreachable instructions:
1143static struct bpf_insn prog[] = {
1144 BPF_EXIT_INSN(),
1145 BPF_EXIT_INSN(),
1146};
1147Error:
1148 unreachable insn 1
1149
1150Program that reads uninitialized register:
1151 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
1152 BPF_EXIT_INSN(),
1153Error:
1154 0: (bf) r0 = r2
1155 R2 !read_ok
1156
1157Program that doesn't initialize R0 before exiting:
1158 BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
1159 BPF_EXIT_INSN(),
1160Error:
1161 0: (bf) r2 = r1
1162 1: (95) exit
1163 R0 !read_ok
1164
1165Program that accesses stack out of bounds:
1166 BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
1167 BPF_EXIT_INSN(),
1168Error:
1169 0: (7a) *(u64 *)(r10 +8) = 0
1170 invalid stack off=8 size=8
1171
1172Program that doesn't initialize stack before passing its address into function:
1173 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1174 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1175 BPF_LD_MAP_FD(BPF_REG_1, 0),
1176 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
1177 BPF_EXIT_INSN(),
1178Error:
1179 0: (bf) r2 = r10
1180 1: (07) r2 += -8
1181 2: (b7) r1 = 0x0
1182 3: (85) call 1
1183 invalid indirect read from stack off -8+0 size 8
1184
1185Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:
1186 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
1187 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1188 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1189 BPF_LD_MAP_FD(BPF_REG_1, 0),
1190 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
1191 BPF_EXIT_INSN(),
1192Error:
1193 0: (7a) *(u64 *)(r10 -8) = 0
1194 1: (bf) r2 = r10
1195 2: (07) r2 += -8
1196 3: (b7) r1 = 0x0
1197 4: (85) call 1
1198 fd 0 is not pointing to valid bpf_map
1199
1200Program that doesn't check return value of map_lookup_elem() before accessing
1201map element:
1202 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
1203 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1204 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1205 BPF_LD_MAP_FD(BPF_REG_1, 0),
1206 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
1207 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
1208 BPF_EXIT_INSN(),
1209Error:
1210 0: (7a) *(u64 *)(r10 -8) = 0
1211 1: (bf) r2 = r10
1212 2: (07) r2 += -8
1213 3: (b7) r1 = 0x0
1214 4: (85) call 1
1215 5: (7a) *(u64 *)(r0 +0) = 0
1216 R0 invalid mem access 'map_value_or_null'
1217
1218Program that correctly checks map_lookup_elem() returned value for NULL, but
1219accesses the memory with incorrect alignment:
1220 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
1221 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1222 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1223 BPF_LD_MAP_FD(BPF_REG_1, 0),
1224 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
1225 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
1226 BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
1227 BPF_EXIT_INSN(),
1228Error:
1229 0: (7a) *(u64 *)(r10 -8) = 0
1230 1: (bf) r2 = r10
1231 2: (07) r2 += -8
1232 3: (b7) r1 = 1
1233 4: (85) call 1
1234 5: (15) if r0 == 0x0 goto pc+1
1235 R0=map_ptr R10=fp
1236 6: (7a) *(u64 *)(r0 +4) = 0
1237 misaligned access off 4 size 8
1238
1239Program that correctly checks map_lookup_elem() returned value for NULL and
1240accesses memory with correct alignment in one side of 'if' branch, but fails
1241to do so in the other side of 'if' branch:
1242 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
1243 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1244 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1245 BPF_LD_MAP_FD(BPF_REG_1, 0),
1246 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
1247 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
1248 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
1249 BPF_EXIT_INSN(),
1250 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
1251 BPF_EXIT_INSN(),
1252Error:
1253 0: (7a) *(u64 *)(r10 -8) = 0
1254 1: (bf) r2 = r10
1255 2: (07) r2 += -8
1256 3: (b7) r1 = 1
1257 4: (85) call 1
1258 5: (15) if r0 == 0x0 goto pc+2
1259 R0=map_ptr R10=fp
1260 6: (7a) *(u64 *)(r0 +0) = 0
1261 7: (95) exit
1262
1263 from 5 to 8: R0=imm0 R10=fp
1264 8: (7a) *(u64 *)(r0 +0) = 1
1265 R0 invalid mem access 'imm'
1266
1004Testing 1267Testing
1005------- 1268-------
1006 1269
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 028b78168d85..9fe1b5d002f0 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -363,3 +363,4 @@
363354 i386 seccomp sys_seccomp 363354 i386 seccomp sys_seccomp
364355 i386 getrandom sys_getrandom 364355 i386 getrandom sys_getrandom
365356 i386 memfd_create sys_memfd_create 365356 i386 memfd_create sys_memfd_create
366357 i386 bpf sys_bpf
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 35dd922727b9..281150b539a2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -327,6 +327,7 @@
327318 common getrandom sys_getrandom 327318 common getrandom sys_getrandom
328319 common memfd_create sys_memfd_create 328319 common memfd_create sys_memfd_create
329320 common kexec_file_load sys_kexec_file_load 329320 common kexec_file_load sys_kexec_file_load
330321 common bpf sys_bpf
330 331
331# 332#
332# x32-specific system call numbers start at 512 to avoid cache impact 333# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 000000000000..3cf91754a957
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,136 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#ifndef _LINUX_BPF_H
8#define _LINUX_BPF_H 1
9
10#include <uapi/linux/bpf.h>
11#include <linux/workqueue.h>
12#include <linux/file.h>
13
14struct bpf_map;
15
16/* map is generic key/value storage optionally accesible by eBPF programs */
17struct bpf_map_ops {
18 /* funcs callable from userspace (via syscall) */
19 struct bpf_map *(*map_alloc)(union bpf_attr *attr);
20 void (*map_free)(struct bpf_map *);
21 int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
22
23 /* funcs callable from userspace and from eBPF programs */
24 void *(*map_lookup_elem)(struct bpf_map *map, void *key);
25 int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
26 int (*map_delete_elem)(struct bpf_map *map, void *key);
27};
28
29struct bpf_map {
30 atomic_t refcnt;
31 enum bpf_map_type map_type;
32 u32 key_size;
33 u32 value_size;
34 u32 max_entries;
35 struct bpf_map_ops *ops;
36 struct work_struct work;
37};
38
39struct bpf_map_type_list {
40 struct list_head list_node;
41 struct bpf_map_ops *ops;
42 enum bpf_map_type type;
43};
44
45void bpf_register_map_type(struct bpf_map_type_list *tl);
46void bpf_map_put(struct bpf_map *map);
47struct bpf_map *bpf_map_get(struct fd f);
48
49/* function argument constraints */
50enum bpf_arg_type {
51 ARG_ANYTHING = 0, /* any argument is ok */
52
53 /* the following constraints used to prototype
54 * bpf_map_lookup/update/delete_elem() functions
55 */
56 ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
57 ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
58 ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
59
60 /* the following constraints used to prototype bpf_memcmp() and other
61 * functions that access data on eBPF program stack
62 */
63 ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */
64 ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */
65};
66
67/* type of values returned from helper functions */
68enum bpf_return_type {
69 RET_INTEGER, /* function returns integer */
70 RET_VOID, /* function doesn't return anything */
71 RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
72};
73
74/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
75 * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
76 * instructions after verifying
77 */
78struct bpf_func_proto {
79 u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
80 bool gpl_only;
81 enum bpf_return_type ret_type;
82 enum bpf_arg_type arg1_type;
83 enum bpf_arg_type arg2_type;
84 enum bpf_arg_type arg3_type;
85 enum bpf_arg_type arg4_type;
86 enum bpf_arg_type arg5_type;
87};
88
89/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
90 * the first argument to eBPF programs.
91 * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
92 */
93struct bpf_context;
94
95enum bpf_access_type {
96 BPF_READ = 1,
97 BPF_WRITE = 2
98};
99
100struct bpf_verifier_ops {
101 /* return eBPF function prototype for verification */
102 const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
103
104 /* return true if 'size' wide access at offset 'off' within bpf_context
105 * with 'type' (read or write) is allowed
106 */
107 bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
108};
109
110struct bpf_prog_type_list {
111 struct list_head list_node;
112 struct bpf_verifier_ops *ops;
113 enum bpf_prog_type type;
114};
115
116void bpf_register_prog_type(struct bpf_prog_type_list *tl);
117
118struct bpf_prog;
119
120struct bpf_prog_aux {
121 atomic_t refcnt;
122 bool is_gpl_compatible;
123 enum bpf_prog_type prog_type;
124 struct bpf_verifier_ops *ops;
125 struct bpf_map **used_maps;
126 u32 used_map_cnt;
127 struct bpf_prog *prog;
128 struct work_struct work;
129};
130
131void bpf_prog_put(struct bpf_prog *prog);
132struct bpf_prog *bpf_prog_get(u32 ufd);
133/* verify correctness of eBPF program */
134int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
135
136#endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1a0bc6d134d7..ca95abd2bed1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
21struct sk_buff; 21struct sk_buff;
22struct sock; 22struct sock;
23struct seccomp_data; 23struct seccomp_data;
24struct bpf_prog_aux;
24 25
25/* ArgX, context and stack frame pointer register positions. Note, 26/* ArgX, context and stack frame pointer register positions. Note,
26 * Arg1, Arg2, Arg3, etc are used as argument mappings of function 27 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -144,6 +145,12 @@ struct seccomp_data;
144 .off = 0, \ 145 .off = 0, \
145 .imm = ((__u64) (IMM)) >> 32 }) 146 .imm = ((__u64) (IMM)) >> 32 })
146 147
148#define BPF_PSEUDO_MAP_FD 1
149
150/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
151#define BPF_LD_MAP_FD(DST, MAP_FD) \
152 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
153
147/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ 154/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
148 155
149#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ 156#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \
@@ -300,17 +307,12 @@ struct bpf_binary_header {
300 u8 image[]; 307 u8 image[];
301}; 308};
302 309
303struct bpf_work_struct {
304 struct bpf_prog *prog;
305 struct work_struct work;
306};
307
308struct bpf_prog { 310struct bpf_prog {
309 u16 pages; /* Number of allocated pages */ 311 u16 pages; /* Number of allocated pages */
310 bool jited; /* Is our filter JIT'ed? */ 312 bool jited; /* Is our filter JIT'ed? */
311 u32 len; /* Number of filter blocks */ 313 u32 len; /* Number of filter blocks */
312 struct sock_fprog_kern *orig_prog; /* Original BPF program */ 314 struct sock_fprog_kern *orig_prog; /* Original BPF program */
313 struct bpf_work_struct *work; /* Deferred free work struct */ 315 struct bpf_prog_aux *aux; /* Auxiliary fields */
314 unsigned int (*bpf_func)(const struct sk_buff *skb, 316 unsigned int (*bpf_func)(const struct sk_buff *skb,
315 const struct bpf_insn *filter); 317 const struct bpf_insn *filter);
316 /* Instructions for interpreter */ 318 /* Instructions for interpreter */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85a9ce4..bda9b81357cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
65struct perf_event_attr; 65struct perf_event_attr;
66struct file_handle; 66struct file_handle;
67struct sigaltstack; 67struct sigaltstack;
68union bpf_attr;
68 69
69#include <linux/types.h> 70#include <linux/types.h>
70#include <linux/aio_abi.h> 71#include <linux/aio_abi.h>
@@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
875 const char __user *uargs); 876 const char __user *uargs);
876asmlinkage long sys_getrandom(char __user *buf, size_t count, 877asmlinkage long sys_getrandom(char __user *buf, size_t count,
877 unsigned int flags); 878 unsigned int flags);
878 879asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
879#endif 880#endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 11d11bc5c78f..22749c134117 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
705__SYSCALL(__NR_getrandom, sys_getrandom) 705__SYSCALL(__NR_getrandom, sys_getrandom)
706#define __NR_memfd_create 279 706#define __NR_memfd_create 279
707__SYSCALL(__NR_memfd_create, sys_memfd_create) 707__SYSCALL(__NR_memfd_create, sys_memfd_create)
708#define __NR_bpf 280
709__SYSCALL(__NR_bpf, sys_bpf)
708 710
709#undef __NR_syscalls 711#undef __NR_syscalls
710#define __NR_syscalls 280 712#define __NR_syscalls 281
711 713
712/* 714/*
713 * All syscalls below here should go away really, 715 * All syscalls below here should go away really,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 479ed0b6be16..31b0ac208a52 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -62,4 +62,94 @@ struct bpf_insn {
62 __s32 imm; /* signed immediate constant */ 62 __s32 imm; /* signed immediate constant */
63}; 63};
64 64
65/* BPF syscall commands */
66enum bpf_cmd {
67 /* create a map with given type and attributes
68 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
69 * returns fd or negative error
70 * map is deleted when fd is closed
71 */
72 BPF_MAP_CREATE,
73
74 /* lookup key in a given map
75 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
76 * Using attr->map_fd, attr->key, attr->value
77 * returns zero and stores found elem into value
78 * or negative error
79 */
80 BPF_MAP_LOOKUP_ELEM,
81
82 /* create or update key/value pair in a given map
83 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
84 * Using attr->map_fd, attr->key, attr->value
85 * returns zero or negative error
86 */
87 BPF_MAP_UPDATE_ELEM,
88
89 /* find and delete elem by key in a given map
90 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
91 * Using attr->map_fd, attr->key
92 * returns zero or negative error
93 */
94 BPF_MAP_DELETE_ELEM,
95
96 /* lookup key in a given map and return next key
97 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
98 * Using attr->map_fd, attr->key, attr->next_key
99 * returns zero and stores next key or negative error
100 */
101 BPF_MAP_GET_NEXT_KEY,
102
103 /* verify and load eBPF program
104 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
105 * Using attr->prog_type, attr->insns, attr->license
106 * returns fd or negative error
107 */
108 BPF_PROG_LOAD,
109};
110
111enum bpf_map_type {
112 BPF_MAP_TYPE_UNSPEC,
113};
114
115enum bpf_prog_type {
116 BPF_PROG_TYPE_UNSPEC,
117};
118
119union bpf_attr {
120 struct { /* anonymous struct used by BPF_MAP_CREATE command */
121 __u32 map_type; /* one of enum bpf_map_type */
122 __u32 key_size; /* size of key in bytes */
123 __u32 value_size; /* size of value in bytes */
124 __u32 max_entries; /* max number of entries in a map */
125 };
126
127 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
128 __u32 map_fd;
129 __aligned_u64 key;
130 union {
131 __aligned_u64 value;
132 __aligned_u64 next_key;
133 };
134 };
135
136 struct { /* anonymous struct used by BPF_PROG_LOAD command */
137 __u32 prog_type; /* one of enum bpf_prog_type */
138 __u32 insn_cnt;
139 __aligned_u64 insns;
140 __aligned_u64 license;
141 __u32 log_level; /* verbosity level of verifier */
142 __u32 log_size; /* size of user buffer */
143 __aligned_u64 log_buf; /* user supplied buffer */
144 };
145} __attribute__((aligned(8)));
146
147/* integer value in 'imm' field of BPF_CALL instruction selects which helper
148 * function eBPF program intends to call
149 */
150enum bpf_func_id {
151 BPF_FUNC_unspec,
152 __BPF_FUNC_MAX_ID,
153};
154
65#endif /* _UAPI__LINUX_BPF_H__ */ 155#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6a71145e2769..45427239f375 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1,5 @@
1obj-y := core.o 1obj-y := core.o syscall.o verifier.o
2
3ifdef CONFIG_TEST_BPF
4obj-y += test_stub.o
5endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8b7002488251..f0c30c59b317 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
27#include <linux/random.h> 27#include <linux/random.h>
28#include <linux/moduleloader.h> 28#include <linux/moduleloader.h>
29#include <asm/unaligned.h> 29#include <asm/unaligned.h>
30#include <linux/bpf.h>
30 31
31/* Registers */ 32/* Registers */
32#define BPF_R0 regs[BPF_REG_0] 33#define BPF_R0 regs[BPF_REG_0]
@@ -71,7 +72,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
71{ 72{
72 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | 73 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
73 gfp_extra_flags; 74 gfp_extra_flags;
74 struct bpf_work_struct *ws; 75 struct bpf_prog_aux *aux;
75 struct bpf_prog *fp; 76 struct bpf_prog *fp;
76 77
77 size = round_up(size, PAGE_SIZE); 78 size = round_up(size, PAGE_SIZE);
@@ -79,14 +80,14 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
79 if (fp == NULL) 80 if (fp == NULL)
80 return NULL; 81 return NULL;
81 82
82 ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags); 83 aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
83 if (ws == NULL) { 84 if (aux == NULL) {
84 vfree(fp); 85 vfree(fp);
85 return NULL; 86 return NULL;
86 } 87 }
87 88
88 fp->pages = size / PAGE_SIZE; 89 fp->pages = size / PAGE_SIZE;
89 fp->work = ws; 90 fp->aux = aux;
90 91
91 return fp; 92 return fp;
92} 93}
@@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
110 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); 111 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
111 fp->pages = size / PAGE_SIZE; 112 fp->pages = size / PAGE_SIZE;
112 113
113 /* We keep fp->work from fp_old around in the new 114 /* We keep fp->aux from fp_old around in the new
114 * reallocated structure. 115 * reallocated structure.
115 */ 116 */
116 fp_old->work = NULL; 117 fp_old->aux = NULL;
117 __bpf_prog_free(fp_old); 118 __bpf_prog_free(fp_old);
118 } 119 }
119 120
@@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc);
123 124
124void __bpf_prog_free(struct bpf_prog *fp) 125void __bpf_prog_free(struct bpf_prog *fp)
125{ 126{
126 kfree(fp->work); 127 kfree(fp->aux);
127 vfree(fp); 128 vfree(fp);
128} 129}
129EXPORT_SYMBOL_GPL(__bpf_prog_free); 130EXPORT_SYMBOL_GPL(__bpf_prog_free);
@@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
638 639
639static void bpf_prog_free_deferred(struct work_struct *work) 640static void bpf_prog_free_deferred(struct work_struct *work)
640{ 641{
641 struct bpf_work_struct *ws; 642 struct bpf_prog_aux *aux;
642 643
643 ws = container_of(work, struct bpf_work_struct, work); 644 aux = container_of(work, struct bpf_prog_aux, work);
644 bpf_jit_free(ws->prog); 645 bpf_jit_free(aux->prog);
645} 646}
646 647
647/* Free internal BPF program */ 648/* Free internal BPF program */
648void bpf_prog_free(struct bpf_prog *fp) 649void bpf_prog_free(struct bpf_prog *fp)
649{ 650{
650 struct bpf_work_struct *ws = fp->work; 651 struct bpf_prog_aux *aux = fp->aux;
651 652
652 INIT_WORK(&ws->work, bpf_prog_free_deferred); 653 INIT_WORK(&aux->work, bpf_prog_free_deferred);
653 ws->prog = fp; 654 aux->prog = fp;
654 schedule_work(&ws->work); 655 schedule_work(&aux->work);
655} 656}
656EXPORT_SYMBOL_GPL(bpf_prog_free); 657EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
new file mode 100644
index 000000000000..ba61c8c16032
--- /dev/null
+++ b/kernel/bpf/syscall.c
@@ -0,0 +1,606 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/syscalls.h>
14#include <linux/slab.h>
15#include <linux/anon_inodes.h>
16#include <linux/file.h>
17#include <linux/license.h>
18#include <linux/filter.h>
19
20static LIST_HEAD(bpf_map_types);
21
22static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
23{
24 struct bpf_map_type_list *tl;
25 struct bpf_map *map;
26
27 list_for_each_entry(tl, &bpf_map_types, list_node) {
28 if (tl->type == attr->map_type) {
29 map = tl->ops->map_alloc(attr);
30 if (IS_ERR(map))
31 return map;
32 map->ops = tl->ops;
33 map->map_type = attr->map_type;
34 return map;
35 }
36 }
37 return ERR_PTR(-EINVAL);
38}
39
40/* boot time registration of different map implementations */
41void bpf_register_map_type(struct bpf_map_type_list *tl)
42{
43 list_add(&tl->list_node, &bpf_map_types);
44}
45
46/* called from workqueue */
47static void bpf_map_free_deferred(struct work_struct *work)
48{
49 struct bpf_map *map = container_of(work, struct bpf_map, work);
50
51 /* implementation dependent freeing */
52 map->ops->map_free(map);
53}
54
55/* decrement map refcnt and schedule it for freeing via workqueue
56 * (unrelying map implementation ops->map_free() might sleep)
57 */
58void bpf_map_put(struct bpf_map *map)
59{
60 if (atomic_dec_and_test(&map->refcnt)) {
61 INIT_WORK(&map->work, bpf_map_free_deferred);
62 schedule_work(&map->work);
63 }
64}
65
66static int bpf_map_release(struct inode *inode, struct file *filp)
67{
68 struct bpf_map *map = filp->private_data;
69
70 bpf_map_put(map);
71 return 0;
72}
73
74static const struct file_operations bpf_map_fops = {
75 .release = bpf_map_release,
76};
77
78/* helper macro to check that unused fields 'union bpf_attr' are zero */
79#define CHECK_ATTR(CMD) \
80 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
81 sizeof(attr->CMD##_LAST_FIELD), 0, \
82 sizeof(*attr) - \
83 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
84 sizeof(attr->CMD##_LAST_FIELD)) != NULL
85
86#define BPF_MAP_CREATE_LAST_FIELD max_entries
87/* called via syscall */
88static int map_create(union bpf_attr *attr)
89{
90 struct bpf_map *map;
91 int err;
92
93 err = CHECK_ATTR(BPF_MAP_CREATE);
94 if (err)
95 return -EINVAL;
96
97 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
98 map = find_and_alloc_map(attr);
99 if (IS_ERR(map))
100 return PTR_ERR(map);
101
102 atomic_set(&map->refcnt, 1);
103
104 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
105
106 if (err < 0)
107 /* failed to allocate fd */
108 goto free_map;
109
110 return err;
111
112free_map:
113 map->ops->map_free(map);
114 return err;
115}
116
117/* if error is returned, fd is released.
118 * On success caller should complete fd access with matching fdput()
119 */
120struct bpf_map *bpf_map_get(struct fd f)
121{
122 struct bpf_map *map;
123
124 if (!f.file)
125 return ERR_PTR(-EBADF);
126
127 if (f.file->f_op != &bpf_map_fops) {
128 fdput(f);
129 return ERR_PTR(-EINVAL);
130 }
131
132 map = f.file->private_data;
133
134 return map;
135}
136
137/* helper to convert user pointers passed inside __aligned_u64 fields */
138static void __user *u64_to_ptr(__u64 val)
139{
140 return (void __user *) (unsigned long) val;
141}
142
143/* last field in 'union bpf_attr' used by this command */
144#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
145
146static int map_lookup_elem(union bpf_attr *attr)
147{
148 void __user *ukey = u64_to_ptr(attr->key);
149 void __user *uvalue = u64_to_ptr(attr->value);
150 int ufd = attr->map_fd;
151 struct fd f = fdget(ufd);
152 struct bpf_map *map;
153 void *key, *value;
154 int err;
155
156 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
157 return -EINVAL;
158
159 map = bpf_map_get(f);
160 if (IS_ERR(map))
161 return PTR_ERR(map);
162
163 err = -ENOMEM;
164 key = kmalloc(map->key_size, GFP_USER);
165 if (!key)
166 goto err_put;
167
168 err = -EFAULT;
169 if (copy_from_user(key, ukey, map->key_size) != 0)
170 goto free_key;
171
172 err = -ESRCH;
173 rcu_read_lock();
174 value = map->ops->map_lookup_elem(map, key);
175 if (!value)
176 goto err_unlock;
177
178 err = -EFAULT;
179 if (copy_to_user(uvalue, value, map->value_size) != 0)
180 goto err_unlock;
181
182 err = 0;
183
184err_unlock:
185 rcu_read_unlock();
186free_key:
187 kfree(key);
188err_put:
189 fdput(f);
190 return err;
191}
192
193#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
194
195static int map_update_elem(union bpf_attr *attr)
196{
197 void __user *ukey = u64_to_ptr(attr->key);
198 void __user *uvalue = u64_to_ptr(attr->value);
199 int ufd = attr->map_fd;
200 struct fd f = fdget(ufd);
201 struct bpf_map *map;
202 void *key, *value;
203 int err;
204
205 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
206 return -EINVAL;
207
208 map = bpf_map_get(f);
209 if (IS_ERR(map))
210 return PTR_ERR(map);
211
212 err = -ENOMEM;
213 key = kmalloc(map->key_size, GFP_USER);
214 if (!key)
215 goto err_put;
216
217 err = -EFAULT;
218 if (copy_from_user(key, ukey, map->key_size) != 0)
219 goto free_key;
220
221 err = -ENOMEM;
222 value = kmalloc(map->value_size, GFP_USER);
223 if (!value)
224 goto free_key;
225
226 err = -EFAULT;
227 if (copy_from_user(value, uvalue, map->value_size) != 0)
228 goto free_value;
229
230 /* eBPF program that use maps are running under rcu_read_lock(),
231 * therefore all map accessors rely on this fact, so do the same here
232 */
233 rcu_read_lock();
234 err = map->ops->map_update_elem(map, key, value);
235 rcu_read_unlock();
236
237free_value:
238 kfree(value);
239free_key:
240 kfree(key);
241err_put:
242 fdput(f);
243 return err;
244}
245
246#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
247
248static int map_delete_elem(union bpf_attr *attr)
249{
250 void __user *ukey = u64_to_ptr(attr->key);
251 int ufd = attr->map_fd;
252 struct fd f = fdget(ufd);
253 struct bpf_map *map;
254 void *key;
255 int err;
256
257 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
258 return -EINVAL;
259
260 map = bpf_map_get(f);
261 if (IS_ERR(map))
262 return PTR_ERR(map);
263
264 err = -ENOMEM;
265 key = kmalloc(map->key_size, GFP_USER);
266 if (!key)
267 goto err_put;
268
269 err = -EFAULT;
270 if (copy_from_user(key, ukey, map->key_size) != 0)
271 goto free_key;
272
273 rcu_read_lock();
274 err = map->ops->map_delete_elem(map, key);
275 rcu_read_unlock();
276
277free_key:
278 kfree(key);
279err_put:
280 fdput(f);
281 return err;
282}
283
284/* last field in 'union bpf_attr' used by this command */
285#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
286
287static int map_get_next_key(union bpf_attr *attr)
288{
289 void __user *ukey = u64_to_ptr(attr->key);
290 void __user *unext_key = u64_to_ptr(attr->next_key);
291 int ufd = attr->map_fd;
292 struct fd f = fdget(ufd);
293 struct bpf_map *map;
294 void *key, *next_key;
295 int err;
296
297 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
298 return -EINVAL;
299
300 map = bpf_map_get(f);
301 if (IS_ERR(map))
302 return PTR_ERR(map);
303
304 err = -ENOMEM;
305 key = kmalloc(map->key_size, GFP_USER);
306 if (!key)
307 goto err_put;
308
309 err = -EFAULT;
310 if (copy_from_user(key, ukey, map->key_size) != 0)
311 goto free_key;
312
313 err = -ENOMEM;
314 next_key = kmalloc(map->key_size, GFP_USER);
315 if (!next_key)
316 goto free_key;
317
318 rcu_read_lock();
319 err = map->ops->map_get_next_key(map, key, next_key);
320 rcu_read_unlock();
321 if (err)
322 goto free_next_key;
323
324 err = -EFAULT;
325 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
326 goto free_next_key;
327
328 err = 0;
329
330free_next_key:
331 kfree(next_key);
332free_key:
333 kfree(key);
334err_put:
335 fdput(f);
336 return err;
337}
338
339static LIST_HEAD(bpf_prog_types);
340
341static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
342{
343 struct bpf_prog_type_list *tl;
344
345 list_for_each_entry(tl, &bpf_prog_types, list_node) {
346 if (tl->type == type) {
347 prog->aux->ops = tl->ops;
348 prog->aux->prog_type = type;
349 return 0;
350 }
351 }
352 return -EINVAL;
353}
354
355void bpf_register_prog_type(struct bpf_prog_type_list *tl)
356{
357 list_add(&tl->list_node, &bpf_prog_types);
358}
359
360/* fixup insn->imm field of bpf_call instructions:
361 * if (insn->imm == BPF_FUNC_map_lookup_elem)
362 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
363 * else if (insn->imm == BPF_FUNC_map_update_elem)
364 * insn->imm = bpf_map_update_elem - __bpf_call_base;
365 * else ...
366 *
367 * this function is called after eBPF program passed verification
368 */
369static void fixup_bpf_calls(struct bpf_prog *prog)
370{
371 const struct bpf_func_proto *fn;
372 int i;
373
374 for (i = 0; i < prog->len; i++) {
375 struct bpf_insn *insn = &prog->insnsi[i];
376
377 if (insn->code == (BPF_JMP | BPF_CALL)) {
378 /* we reach here when program has bpf_call instructions
379 * and it passed bpf_check(), means that
380 * ops->get_func_proto must have been supplied, check it
381 */
382 BUG_ON(!prog->aux->ops->get_func_proto);
383
384 fn = prog->aux->ops->get_func_proto(insn->imm);
385 /* all functions that have prototype and verifier allowed
386 * programs to call them, must be real in-kernel functions
387 */
388 BUG_ON(!fn->func);
389 insn->imm = fn->func - __bpf_call_base;
390 }
391 }
392}
393
394/* drop refcnt on maps used by eBPF program and free auxilary data */
395static void free_used_maps(struct bpf_prog_aux *aux)
396{
397 int i;
398
399 for (i = 0; i < aux->used_map_cnt; i++)
400 bpf_map_put(aux->used_maps[i]);
401
402 kfree(aux->used_maps);
403}
404
405void bpf_prog_put(struct bpf_prog *prog)
406{
407 if (atomic_dec_and_test(&prog->aux->refcnt)) {
408 free_used_maps(prog->aux);
409 bpf_prog_free(prog);
410 }
411}
412
413static int bpf_prog_release(struct inode *inode, struct file *filp)
414{
415 struct bpf_prog *prog = filp->private_data;
416
417 bpf_prog_put(prog);
418 return 0;
419}
420
421static const struct file_operations bpf_prog_fops = {
422 .release = bpf_prog_release,
423};
424
425static struct bpf_prog *get_prog(struct fd f)
426{
427 struct bpf_prog *prog;
428
429 if (!f.file)
430 return ERR_PTR(-EBADF);
431
432 if (f.file->f_op != &bpf_prog_fops) {
433 fdput(f);
434 return ERR_PTR(-EINVAL);
435 }
436
437 prog = f.file->private_data;
438
439 return prog;
440}
441
442/* called by sockets/tracing/seccomp before attaching program to an event
443 * pairs with bpf_prog_put()
444 */
445struct bpf_prog *bpf_prog_get(u32 ufd)
446{
447 struct fd f = fdget(ufd);
448 struct bpf_prog *prog;
449
450 prog = get_prog(f);
451
452 if (IS_ERR(prog))
453 return prog;
454
455 atomic_inc(&prog->aux->refcnt);
456 fdput(f);
457 return prog;
458}
459
460/* last field in 'union bpf_attr' used by this command */
461#define BPF_PROG_LOAD_LAST_FIELD log_buf
462
463static int bpf_prog_load(union bpf_attr *attr)
464{
465 enum bpf_prog_type type = attr->prog_type;
466 struct bpf_prog *prog;
467 int err;
468 char license[128];
469 bool is_gpl;
470
471 if (CHECK_ATTR(BPF_PROG_LOAD))
472 return -EINVAL;
473
474 /* copy eBPF program license from user space */
475 if (strncpy_from_user(license, u64_to_ptr(attr->license),
476 sizeof(license) - 1) < 0)
477 return -EFAULT;
478 license[sizeof(license) - 1] = 0;
479
480 /* eBPF programs must be GPL compatible to use GPL-ed functions */
481 is_gpl = license_is_gpl_compatible(license);
482
483 if (attr->insn_cnt >= BPF_MAXINSNS)
484 return -EINVAL;
485
486 /* plain bpf_prog allocation */
487 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
488 if (!prog)
489 return -ENOMEM;
490
491 prog->len = attr->insn_cnt;
492
493 err = -EFAULT;
494 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
495 prog->len * sizeof(struct bpf_insn)) != 0)
496 goto free_prog;
497
498 prog->orig_prog = NULL;
499 prog->jited = false;
500
501 atomic_set(&prog->aux->refcnt, 1);
502 prog->aux->is_gpl_compatible = is_gpl;
503
504 /* find program type: socket_filter vs tracing_filter */
505 err = find_prog_type(type, prog);
506 if (err < 0)
507 goto free_prog;
508
509 /* run eBPF verifier */
510 err = bpf_check(prog, attr);
511
512 if (err < 0)
513 goto free_used_maps;
514
515 /* fixup BPF_CALL->imm field */
516 fixup_bpf_calls(prog);
517
518 /* eBPF program is ready to be JITed */
519 bpf_prog_select_runtime(prog);
520
521 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
522
523 if (err < 0)
524 /* failed to allocate fd */
525 goto free_used_maps;
526
527 return err;
528
529free_used_maps:
530 free_used_maps(prog->aux);
531free_prog:
532 bpf_prog_free(prog);
533 return err;
534}
535
536SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
537{
538 union bpf_attr attr = {};
539 int err;
540
541 /* the syscall is limited to root temporarily. This restriction will be
542 * lifted when security audit is clean. Note that eBPF+tracing must have
543 * this restriction, since it may pass kernel data to user space
544 */
545 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM;
547
548 if (!access_ok(VERIFY_READ, uattr, 1))
549 return -EFAULT;
550
551 if (size > PAGE_SIZE) /* silly large */
552 return -E2BIG;
553
554 /* If we're handed a bigger struct than we know of,
555 * ensure all the unknown bits are 0 - i.e. new
556 * user-space does not rely on any kernel feature
557 * extensions we dont know about yet.
558 */
559 if (size > sizeof(attr)) {
560 unsigned char __user *addr;
561 unsigned char __user *end;
562 unsigned char val;
563
564 addr = (void __user *)uattr + sizeof(attr);
565 end = (void __user *)uattr + size;
566
567 for (; addr < end; addr++) {
568 err = get_user(val, addr);
569 if (err)
570 return err;
571 if (val)
572 return -E2BIG;
573 }
574 size = sizeof(attr);
575 }
576
577 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
578 if (copy_from_user(&attr, uattr, size) != 0)
579 return -EFAULT;
580
581 switch (cmd) {
582 case BPF_MAP_CREATE:
583 err = map_create(&attr);
584 break;
585 case BPF_MAP_LOOKUP_ELEM:
586 err = map_lookup_elem(&attr);
587 break;
588 case BPF_MAP_UPDATE_ELEM:
589 err = map_update_elem(&attr);
590 break;
591 case BPF_MAP_DELETE_ELEM:
592 err = map_delete_elem(&attr);
593 break;
594 case BPF_MAP_GET_NEXT_KEY:
595 err = map_get_next_key(&attr);
596 break;
597 case BPF_PROG_LOAD:
598 err = bpf_prog_load(&attr);
599 break;
600 default:
601 err = -EINVAL;
602 break;
603 }
604
605 return err;
606}
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c
new file mode 100644
index 000000000000..fcaddff4003e
--- /dev/null
+++ b/kernel/bpf/test_stub.c
@@ -0,0 +1,116 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/err.h>
11#include <linux/bpf.h>
12
13/* test stubs for BPF_MAP_TYPE_UNSPEC and for BPF_PROG_TYPE_UNSPEC
14 * to be used by user space verifier testsuite
15 */
16struct bpf_context {
17 u64 arg1;
18 u64 arg2;
19};
20
21static u64 test_func(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
22{
23 return 0;
24}
25
26static struct bpf_func_proto test_funcs[] = {
27 [BPF_FUNC_unspec] = {
28 .func = test_func,
29 .gpl_only = true,
30 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
31 .arg1_type = ARG_CONST_MAP_PTR,
32 .arg2_type = ARG_PTR_TO_MAP_KEY,
33 },
34};
35
36static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id)
37{
38 if (func_id < 0 || func_id >= ARRAY_SIZE(test_funcs))
39 return NULL;
40 return &test_funcs[func_id];
41}
42
43static const struct bpf_context_access {
44 int size;
45 enum bpf_access_type type;
46} test_ctx_access[] = {
47 [offsetof(struct bpf_context, arg1)] = {
48 FIELD_SIZEOF(struct bpf_context, arg1),
49 BPF_READ
50 },
51 [offsetof(struct bpf_context, arg2)] = {
52 FIELD_SIZEOF(struct bpf_context, arg2),
53 BPF_READ
54 },
55};
56
57static bool test_is_valid_access(int off, int size, enum bpf_access_type type)
58{
59 const struct bpf_context_access *access;
60
61 if (off < 0 || off >= ARRAY_SIZE(test_ctx_access))
62 return false;
63
64 access = &test_ctx_access[off];
65 if (access->size == size && (access->type & type))
66 return true;
67
68 return false;
69}
70
71static struct bpf_verifier_ops test_ops = {
72 .get_func_proto = test_func_proto,
73 .is_valid_access = test_is_valid_access,
74};
75
76static struct bpf_prog_type_list tl_prog = {
77 .ops = &test_ops,
78 .type = BPF_PROG_TYPE_UNSPEC,
79};
80
81static struct bpf_map *test_map_alloc(union bpf_attr *attr)
82{
83 struct bpf_map *map;
84
85 map = kzalloc(sizeof(*map), GFP_USER);
86 if (!map)
87 return ERR_PTR(-ENOMEM);
88
89 map->key_size = attr->key_size;
90 map->value_size = attr->value_size;
91 map->max_entries = attr->max_entries;
92 return map;
93}
94
95static void test_map_free(struct bpf_map *map)
96{
97 kfree(map);
98}
99
100static struct bpf_map_ops test_map_ops = {
101 .map_alloc = test_map_alloc,
102 .map_free = test_map_free,
103};
104
105static struct bpf_map_type_list tl_map = {
106 .ops = &test_map_ops,
107 .type = BPF_MAP_TYPE_UNSPEC,
108};
109
110static int __init register_test_ops(void)
111{
112 bpf_register_map_type(&tl_map);
113 bpf_register_prog_type(&tl_prog);
114 return 0;
115}
116late_initcall(register_test_ops);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
new file mode 100644
index 000000000000..a086dd3210a8
--- /dev/null
+++ b/kernel/bpf/verifier.c
@@ -0,0 +1,1777 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/kernel.h>
13#include <linux/types.h>
14#include <linux/slab.h>
15#include <linux/bpf.h>
16#include <linux/filter.h>
17#include <net/netlink.h>
18#include <linux/file.h>
19#include <linux/vmalloc.h>
20
21/* bpf_check() is a static code analyzer that walks eBPF program
22 * instruction by instruction and updates register/stack state.
23 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
24 *
25 * The first pass is depth-first-search to check that the program is a DAG.
26 * It rejects the following programs:
27 * - larger than BPF_MAXINSNS insns
28 * - if loop is present (detected via back-edge)
29 * - unreachable insns exist (shouldn't be a forest. program = one function)
30 * - out of bounds or malformed jumps
31 * The second pass is all possible path descent from the 1st insn.
32 * Since it's analyzing all pathes through the program, the length of the
33 * analysis is limited to 32k insn, which may be hit even if total number of
34 * insn is less then 4K, but there are too many branches that change stack/regs.
35 * Number of 'branches to be analyzed' is limited to 1k
36 *
37 * On entry to each instruction, each register has a type, and the instruction
38 * changes the types of the registers depending on instruction semantics.
39 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
40 * copied to R1.
41 *
42 * All registers are 64-bit.
43 * R0 - return register
44 * R1-R5 argument passing registers
45 * R6-R9 callee saved registers
46 * R10 - frame pointer read-only
47 *
48 * At the start of BPF program the register R1 contains a pointer to bpf_context
49 * and has type PTR_TO_CTX.
50 *
51 * Verifier tracks arithmetic operations on pointers in case:
52 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
53 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
54 * 1st insn copies R10 (which has FRAME_PTR) type into R1
55 * and 2nd arithmetic instruction is pattern matched to recognize
56 * that it wants to construct a pointer to some element within stack.
57 * So after 2nd insn, the register R1 has type PTR_TO_STACK
58 * (and -20 constant is saved for further stack bounds checking).
59 * Meaning that this reg is a pointer to stack plus known immediate constant.
60 *
61 * Most of the time the registers have UNKNOWN_VALUE type, which
62 * means the register has some value, but it's not a valid pointer.
63 * (like pointer plus pointer becomes UNKNOWN_VALUE type)
64 *
65 * When verifier sees load or store instructions the type of base register
66 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
67 * types recognized by check_mem_access() function.
68 *
69 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
70 * and the range of [ptr, ptr + map's value_size) is accessible.
71 *
72 * registers used to pass values to function calls are checked against
73 * function argument constraints.
74 *
75 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
76 * It means that the register type passed to this function must be
77 * PTR_TO_STACK and it will be used inside the function as
78 * 'pointer to map element key'
79 *
80 * For example the argument constraints for bpf_map_lookup_elem():
81 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
82 * .arg1_type = ARG_CONST_MAP_PTR,
83 * .arg2_type = ARG_PTR_TO_MAP_KEY,
84 *
85 * ret_type says that this function returns 'pointer to map elem value or null'
86 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
87 * 2nd argument should be a pointer to stack, which will be used inside
88 * the helper function as a pointer to map element key.
89 *
90 * On the kernel side the helper function looks like:
91 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
92 * {
93 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
94 * void *key = (void *) (unsigned long) r2;
95 * void *value;
96 *
97 * here kernel can access 'key' and 'map' pointers safely, knowing that
98 * [key, key + map->key_size) bytes are valid and were initialized on
99 * the stack of eBPF program.
100 * }
101 *
102 * Corresponding eBPF program may look like:
103 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
104 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
105 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
106 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
107 * here verifier looks at prototype of map_lookup_elem() and sees:
108 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
109 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
110 *
111 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
112 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
113 * and were initialized prior to this call.
114 * If it's ok, then verifier allows this BPF_CALL insn and looks at
115 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
116 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
117 * returns ether pointer to map value or NULL.
118 *
119 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
120 * insn, the register holding that pointer in the true branch changes state to
121 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
122 * branch. See check_cond_jmp_op().
123 *
124 * After the call R0 is set to return type of the function and registers R1-R5
125 * are set to NOT_INIT to indicate that they are no longer readable.
126 */
127
128/* types of values stored in eBPF registers */
129enum bpf_reg_type {
130 NOT_INIT = 0, /* nothing was written into register */
131 UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */
132 PTR_TO_CTX, /* reg points to bpf_context */
133 CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
134 PTR_TO_MAP_VALUE, /* reg points to map element value */
135 PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
136 FRAME_PTR, /* reg == frame_pointer */
137 PTR_TO_STACK, /* reg == frame_pointer + imm */
138 CONST_IMM, /* constant integer value */
139};
140
141struct reg_state {
142 enum bpf_reg_type type;
143 union {
144 /* valid when type == CONST_IMM | PTR_TO_STACK */
145 int imm;
146
147 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
148 * PTR_TO_MAP_VALUE_OR_NULL
149 */
150 struct bpf_map *map_ptr;
151 };
152};
153
154enum bpf_stack_slot_type {
155 STACK_INVALID, /* nothing was stored in this stack slot */
156 STACK_SPILL, /* 1st byte of register spilled into stack */
157 STACK_SPILL_PART, /* other 7 bytes of register spill */
158 STACK_MISC /* BPF program wrote some data into this slot */
159};
160
161struct bpf_stack_slot {
162 enum bpf_stack_slot_type stype;
163 struct reg_state reg_st;
164};
165
166/* state of the program:
167 * type of all registers and stack info
168 */
169struct verifier_state {
170 struct reg_state regs[MAX_BPF_REG];
171 struct bpf_stack_slot stack[MAX_BPF_STACK];
172};
173
174/* linked list of verifier states used to prune search */
175struct verifier_state_list {
176 struct verifier_state state;
177 struct verifier_state_list *next;
178};
179
180/* verifier_state + insn_idx are pushed to stack when branch is encountered */
181struct verifier_stack_elem {
182 /* verifer state is 'st'
183 * before processing instruction 'insn_idx'
184 * and after processing instruction 'prev_insn_idx'
185 */
186 struct verifier_state st;
187 int insn_idx;
188 int prev_insn_idx;
189 struct verifier_stack_elem *next;
190};
191
192#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
193
194/* single container for all structs
195 * one verifier_env per bpf_check() call
196 */
197struct verifier_env {
198 struct bpf_prog *prog; /* eBPF program being verified */
199 struct verifier_stack_elem *head; /* stack of verifier states to be processed */
200 int stack_size; /* number of states to be processed */
201 struct verifier_state cur_state; /* current verifier state */
202 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
203 u32 used_map_cnt; /* number of used maps */
204};
205
206/* verbose verifier prints what it's seeing
207 * bpf_check() is called under lock, so no race to access these global vars
208 */
209static u32 log_level, log_size, log_len;
210static char *log_buf;
211
212static DEFINE_MUTEX(bpf_verifier_lock);
213
214/* log_level controls verbosity level of eBPF verifier.
215 * verbose() is used to dump the verification trace to the log, so the user
216 * can figure out what's wrong with the program
217 */
218static void verbose(const char *fmt, ...)
219{
220 va_list args;
221
222 if (log_level == 0 || log_len >= log_size - 1)
223 return;
224
225 va_start(args, fmt);
226 log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
227 va_end(args);
228}
229
230/* string representation of 'enum bpf_reg_type' */
231static const char * const reg_type_str[] = {
232 [NOT_INIT] = "?",
233 [UNKNOWN_VALUE] = "inv",
234 [PTR_TO_CTX] = "ctx",
235 [CONST_PTR_TO_MAP] = "map_ptr",
236 [PTR_TO_MAP_VALUE] = "map_value",
237 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
238 [FRAME_PTR] = "fp",
239 [PTR_TO_STACK] = "fp",
240 [CONST_IMM] = "imm",
241};
242
243static void print_verifier_state(struct verifier_env *env)
244{
245 enum bpf_reg_type t;
246 int i;
247
248 for (i = 0; i < MAX_BPF_REG; i++) {
249 t = env->cur_state.regs[i].type;
250 if (t == NOT_INIT)
251 continue;
252 verbose(" R%d=%s", i, reg_type_str[t]);
253 if (t == CONST_IMM || t == PTR_TO_STACK)
254 verbose("%d", env->cur_state.regs[i].imm);
255 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
256 t == PTR_TO_MAP_VALUE_OR_NULL)
257 verbose("(ks=%d,vs=%d)",
258 env->cur_state.regs[i].map_ptr->key_size,
259 env->cur_state.regs[i].map_ptr->value_size);
260 }
261 for (i = 0; i < MAX_BPF_STACK; i++) {
262 if (env->cur_state.stack[i].stype == STACK_SPILL)
263 verbose(" fp%d=%s", -MAX_BPF_STACK + i,
264 reg_type_str[env->cur_state.stack[i].reg_st.type]);
265 }
266 verbose("\n");
267}
268
269static const char *const bpf_class_string[] = {
270 [BPF_LD] = "ld",
271 [BPF_LDX] = "ldx",
272 [BPF_ST] = "st",
273 [BPF_STX] = "stx",
274 [BPF_ALU] = "alu",
275 [BPF_JMP] = "jmp",
276 [BPF_RET] = "BUG",
277 [BPF_ALU64] = "alu64",
278};
279
280static const char *const bpf_alu_string[] = {
281 [BPF_ADD >> 4] = "+=",
282 [BPF_SUB >> 4] = "-=",
283 [BPF_MUL >> 4] = "*=",
284 [BPF_DIV >> 4] = "/=",
285 [BPF_OR >> 4] = "|=",
286 [BPF_AND >> 4] = "&=",
287 [BPF_LSH >> 4] = "<<=",
288 [BPF_RSH >> 4] = ">>=",
289 [BPF_NEG >> 4] = "neg",
290 [BPF_MOD >> 4] = "%=",
291 [BPF_XOR >> 4] = "^=",
292 [BPF_MOV >> 4] = "=",
293 [BPF_ARSH >> 4] = "s>>=",
294 [BPF_END >> 4] = "endian",
295};
296
297static const char *const bpf_ldst_string[] = {
298 [BPF_W >> 3] = "u32",
299 [BPF_H >> 3] = "u16",
300 [BPF_B >> 3] = "u8",
301 [BPF_DW >> 3] = "u64",
302};
303
304static const char *const bpf_jmp_string[] = {
305 [BPF_JA >> 4] = "jmp",
306 [BPF_JEQ >> 4] = "==",
307 [BPF_JGT >> 4] = ">",
308 [BPF_JGE >> 4] = ">=",
309 [BPF_JSET >> 4] = "&",
310 [BPF_JNE >> 4] = "!=",
311 [BPF_JSGT >> 4] = "s>",
312 [BPF_JSGE >> 4] = "s>=",
313 [BPF_CALL >> 4] = "call",
314 [BPF_EXIT >> 4] = "exit",
315};
316
317static void print_bpf_insn(struct bpf_insn *insn)
318{
319 u8 class = BPF_CLASS(insn->code);
320
321 if (class == BPF_ALU || class == BPF_ALU64) {
322 if (BPF_SRC(insn->code) == BPF_X)
323 verbose("(%02x) %sr%d %s %sr%d\n",
324 insn->code, class == BPF_ALU ? "(u32) " : "",
325 insn->dst_reg,
326 bpf_alu_string[BPF_OP(insn->code) >> 4],
327 class == BPF_ALU ? "(u32) " : "",
328 insn->src_reg);
329 else
330 verbose("(%02x) %sr%d %s %s%d\n",
331 insn->code, class == BPF_ALU ? "(u32) " : "",
332 insn->dst_reg,
333 bpf_alu_string[BPF_OP(insn->code) >> 4],
334 class == BPF_ALU ? "(u32) " : "",
335 insn->imm);
336 } else if (class == BPF_STX) {
337 if (BPF_MODE(insn->code) == BPF_MEM)
338 verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
339 insn->code,
340 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
341 insn->dst_reg,
342 insn->off, insn->src_reg);
343 else if (BPF_MODE(insn->code) == BPF_XADD)
344 verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
345 insn->code,
346 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
347 insn->dst_reg, insn->off,
348 insn->src_reg);
349 else
350 verbose("BUG_%02x\n", insn->code);
351 } else if (class == BPF_ST) {
352 if (BPF_MODE(insn->code) != BPF_MEM) {
353 verbose("BUG_st_%02x\n", insn->code);
354 return;
355 }
356 verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
357 insn->code,
358 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
359 insn->dst_reg,
360 insn->off, insn->imm);
361 } else if (class == BPF_LDX) {
362 if (BPF_MODE(insn->code) != BPF_MEM) {
363 verbose("BUG_ldx_%02x\n", insn->code);
364 return;
365 }
366 verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
367 insn->code, insn->dst_reg,
368 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
369 insn->src_reg, insn->off);
370 } else if (class == BPF_LD) {
371 if (BPF_MODE(insn->code) == BPF_ABS) {
372 verbose("(%02x) r0 = *(%s *)skb[%d]\n",
373 insn->code,
374 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
375 insn->imm);
376 } else if (BPF_MODE(insn->code) == BPF_IND) {
377 verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
378 insn->code,
379 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
380 insn->src_reg, insn->imm);
381 } else if (BPF_MODE(insn->code) == BPF_IMM) {
382 verbose("(%02x) r%d = 0x%x\n",
383 insn->code, insn->dst_reg, insn->imm);
384 } else {
385 verbose("BUG_ld_%02x\n", insn->code);
386 return;
387 }
388 } else if (class == BPF_JMP) {
389 u8 opcode = BPF_OP(insn->code);
390
391 if (opcode == BPF_CALL) {
392 verbose("(%02x) call %d\n", insn->code, insn->imm);
393 } else if (insn->code == (BPF_JMP | BPF_JA)) {
394 verbose("(%02x) goto pc%+d\n",
395 insn->code, insn->off);
396 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
397 verbose("(%02x) exit\n", insn->code);
398 } else if (BPF_SRC(insn->code) == BPF_X) {
399 verbose("(%02x) if r%d %s r%d goto pc%+d\n",
400 insn->code, insn->dst_reg,
401 bpf_jmp_string[BPF_OP(insn->code) >> 4],
402 insn->src_reg, insn->off);
403 } else {
404 verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
405 insn->code, insn->dst_reg,
406 bpf_jmp_string[BPF_OP(insn->code) >> 4],
407 insn->imm, insn->off);
408 }
409 } else {
410 verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
411 }
412}
413
414static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
415{
416 struct verifier_stack_elem *elem;
417 int insn_idx;
418
419 if (env->head == NULL)
420 return -1;
421
422 memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
423 insn_idx = env->head->insn_idx;
424 if (prev_insn_idx)
425 *prev_insn_idx = env->head->prev_insn_idx;
426 elem = env->head->next;
427 kfree(env->head);
428 env->head = elem;
429 env->stack_size--;
430 return insn_idx;
431}
432
433static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
434 int prev_insn_idx)
435{
436 struct verifier_stack_elem *elem;
437
438 elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
439 if (!elem)
440 goto err;
441
442 memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
443 elem->insn_idx = insn_idx;
444 elem->prev_insn_idx = prev_insn_idx;
445 elem->next = env->head;
446 env->head = elem;
447 env->stack_size++;
448 if (env->stack_size > 1024) {
449 verbose("BPF program is too complex\n");
450 goto err;
451 }
452 return &elem->st;
453err:
454 /* pop all elements and return */
455 while (pop_stack(env, NULL) >= 0);
456 return NULL;
457}
458
459#define CALLER_SAVED_REGS 6
460static const int caller_saved[CALLER_SAVED_REGS] = {
461 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
462};
463
464static void init_reg_state(struct reg_state *regs)
465{
466 int i;
467
468 for (i = 0; i < MAX_BPF_REG; i++) {
469 regs[i].type = NOT_INIT;
470 regs[i].imm = 0;
471 regs[i].map_ptr = NULL;
472 }
473
474 /* frame pointer */
475 regs[BPF_REG_FP].type = FRAME_PTR;
476
477 /* 1st arg to a function */
478 regs[BPF_REG_1].type = PTR_TO_CTX;
479}
480
481static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
482{
483 BUG_ON(regno >= MAX_BPF_REG);
484 regs[regno].type = UNKNOWN_VALUE;
485 regs[regno].imm = 0;
486 regs[regno].map_ptr = NULL;
487}
488
489enum reg_arg_type {
490 SRC_OP, /* register is used as source operand */
491 DST_OP, /* register is used as destination operand */
492 DST_OP_NO_MARK /* same as above, check only, don't mark */
493};
494
495static int check_reg_arg(struct reg_state *regs, u32 regno,
496 enum reg_arg_type t)
497{
498 if (regno >= MAX_BPF_REG) {
499 verbose("R%d is invalid\n", regno);
500 return -EINVAL;
501 }
502
503 if (t == SRC_OP) {
504 /* check whether register used as source operand can be read */
505 if (regs[regno].type == NOT_INIT) {
506 verbose("R%d !read_ok\n", regno);
507 return -EACCES;
508 }
509 } else {
510 /* check whether register used as dest operand can be written to */
511 if (regno == BPF_REG_FP) {
512 verbose("frame pointer is read only\n");
513 return -EACCES;
514 }
515 if (t == DST_OP)
516 mark_reg_unknown_value(regs, regno);
517 }
518 return 0;
519}
520
521static int bpf_size_to_bytes(int bpf_size)
522{
523 if (bpf_size == BPF_W)
524 return 4;
525 else if (bpf_size == BPF_H)
526 return 2;
527 else if (bpf_size == BPF_B)
528 return 1;
529 else if (bpf_size == BPF_DW)
530 return 8;
531 else
532 return -EINVAL;
533}
534
535/* check_stack_read/write functions track spill/fill of registers,
536 * stack boundary and alignment are checked in check_mem_access()
537 */
538static int check_stack_write(struct verifier_state *state, int off, int size,
539 int value_regno)
540{
541 struct bpf_stack_slot *slot;
542 int i;
543
544 if (value_regno >= 0 &&
545 (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
546 state->regs[value_regno].type == PTR_TO_STACK ||
547 state->regs[value_regno].type == PTR_TO_CTX)) {
548
549 /* register containing pointer is being spilled into stack */
550 if (size != 8) {
551 verbose("invalid size of register spill\n");
552 return -EACCES;
553 }
554
555 slot = &state->stack[MAX_BPF_STACK + off];
556 slot->stype = STACK_SPILL;
557 /* save register state */
558 slot->reg_st = state->regs[value_regno];
559 for (i = 1; i < 8; i++) {
560 slot = &state->stack[MAX_BPF_STACK + off + i];
561 slot->stype = STACK_SPILL_PART;
562 slot->reg_st.type = UNKNOWN_VALUE;
563 slot->reg_st.map_ptr = NULL;
564 }
565 } else {
566
567 /* regular write of data into stack */
568 for (i = 0; i < size; i++) {
569 slot = &state->stack[MAX_BPF_STACK + off + i];
570 slot->stype = STACK_MISC;
571 slot->reg_st.type = UNKNOWN_VALUE;
572 slot->reg_st.map_ptr = NULL;
573 }
574 }
575 return 0;
576}
577
578static int check_stack_read(struct verifier_state *state, int off, int size,
579 int value_regno)
580{
581 int i;
582 struct bpf_stack_slot *slot;
583
584 slot = &state->stack[MAX_BPF_STACK + off];
585
586 if (slot->stype == STACK_SPILL) {
587 if (size != 8) {
588 verbose("invalid size of register spill\n");
589 return -EACCES;
590 }
591 for (i = 1; i < 8; i++) {
592 if (state->stack[MAX_BPF_STACK + off + i].stype !=
593 STACK_SPILL_PART) {
594 verbose("corrupted spill memory\n");
595 return -EACCES;
596 }
597 }
598
599 if (value_regno >= 0)
600 /* restore register state from stack */
601 state->regs[value_regno] = slot->reg_st;
602 return 0;
603 } else {
604 for (i = 0; i < size; i++) {
605 if (state->stack[MAX_BPF_STACK + off + i].stype !=
606 STACK_MISC) {
607 verbose("invalid read from stack off %d+%d size %d\n",
608 off, i, size);
609 return -EACCES;
610 }
611 }
612 if (value_regno >= 0)
613 /* have read misc data from the stack */
614 mark_reg_unknown_value(state->regs, value_regno);
615 return 0;
616 }
617}
618
619/* check read/write into map element returned by bpf_map_lookup_elem() */
620static int check_map_access(struct verifier_env *env, u32 regno, int off,
621 int size)
622{
623 struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
624
625 if (off < 0 || off + size > map->value_size) {
626 verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
627 map->value_size, off, size);
628 return -EACCES;
629 }
630 return 0;
631}
632
633/* check access to 'struct bpf_context' fields */
634static int check_ctx_access(struct verifier_env *env, int off, int size,
635 enum bpf_access_type t)
636{
637 if (env->prog->aux->ops->is_valid_access &&
638 env->prog->aux->ops->is_valid_access(off, size, t))
639 return 0;
640
641 verbose("invalid bpf_context access off=%d size=%d\n", off, size);
642 return -EACCES;
643}
644
645/* check whether memory at (regno + off) is accessible for t = (read | write)
646 * if t==write, value_regno is a register which value is stored into memory
647 * if t==read, value_regno is a register which will receive the value from memory
648 * if t==write && value_regno==-1, some unknown value is stored into memory
649 * if t==read && value_regno==-1, don't care what we read from memory
650 */
651static int check_mem_access(struct verifier_env *env, u32 regno, int off,
652 int bpf_size, enum bpf_access_type t,
653 int value_regno)
654{
655 struct verifier_state *state = &env->cur_state;
656 int size, err = 0;
657
658 size = bpf_size_to_bytes(bpf_size);
659 if (size < 0)
660 return size;
661
662 if (off % size != 0) {
663 verbose("misaligned access off %d size %d\n", off, size);
664 return -EACCES;
665 }
666
667 if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
668 err = check_map_access(env, regno, off, size);
669 if (!err && t == BPF_READ && value_regno >= 0)
670 mark_reg_unknown_value(state->regs, value_regno);
671
672 } else if (state->regs[regno].type == PTR_TO_CTX) {
673 err = check_ctx_access(env, off, size, t);
674 if (!err && t == BPF_READ && value_regno >= 0)
675 mark_reg_unknown_value(state->regs, value_regno);
676
677 } else if (state->regs[regno].type == FRAME_PTR) {
678 if (off >= 0 || off < -MAX_BPF_STACK) {
679 verbose("invalid stack off=%d size=%d\n", off, size);
680 return -EACCES;
681 }
682 if (t == BPF_WRITE)
683 err = check_stack_write(state, off, size, value_regno);
684 else
685 err = check_stack_read(state, off, size, value_regno);
686 } else {
687 verbose("R%d invalid mem access '%s'\n",
688 regno, reg_type_str[state->regs[regno].type]);
689 return -EACCES;
690 }
691 return err;
692}
693
694static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
695{
696 struct reg_state *regs = env->cur_state.regs;
697 int err;
698
699 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
700 insn->imm != 0) {
701 verbose("BPF_XADD uses reserved fields\n");
702 return -EINVAL;
703 }
704
705 /* check src1 operand */
706 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
707 if (err)
708 return err;
709
710 /* check src2 operand */
711 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
712 if (err)
713 return err;
714
715 /* check whether atomic_add can read the memory */
716 err = check_mem_access(env, insn->dst_reg, insn->off,
717 BPF_SIZE(insn->code), BPF_READ, -1);
718 if (err)
719 return err;
720
721 /* check whether atomic_add can write into the same memory */
722 return check_mem_access(env, insn->dst_reg, insn->off,
723 BPF_SIZE(insn->code), BPF_WRITE, -1);
724}
725
726/* when register 'regno' is passed into function that will read 'access_size'
727 * bytes from that pointer, make sure that it's within stack boundary
728 * and all elements of stack are initialized
729 */
730static int check_stack_boundary(struct verifier_env *env,
731 int regno, int access_size)
732{
733 struct verifier_state *state = &env->cur_state;
734 struct reg_state *regs = state->regs;
735 int off, i;
736
737 if (regs[regno].type != PTR_TO_STACK)
738 return -EACCES;
739
740 off = regs[regno].imm;
741 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
742 access_size <= 0) {
743 verbose("invalid stack type R%d off=%d access_size=%d\n",
744 regno, off, access_size);
745 return -EACCES;
746 }
747
748 for (i = 0; i < access_size; i++) {
749 if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
750 verbose("invalid indirect read from stack off %d+%d size %d\n",
751 off, i, access_size);
752 return -EACCES;
753 }
754 }
755 return 0;
756}
757
758static int check_func_arg(struct verifier_env *env, u32 regno,
759 enum bpf_arg_type arg_type, struct bpf_map **mapp)
760{
761 struct reg_state *reg = env->cur_state.regs + regno;
762 enum bpf_reg_type expected_type;
763 int err = 0;
764
765 if (arg_type == ARG_ANYTHING)
766 return 0;
767
768 if (reg->type == NOT_INIT) {
769 verbose("R%d !read_ok\n", regno);
770 return -EACCES;
771 }
772
773 if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
774 arg_type == ARG_PTR_TO_MAP_VALUE) {
775 expected_type = PTR_TO_STACK;
776 } else if (arg_type == ARG_CONST_STACK_SIZE) {
777 expected_type = CONST_IMM;
778 } else if (arg_type == ARG_CONST_MAP_PTR) {
779 expected_type = CONST_PTR_TO_MAP;
780 } else {
781 verbose("unsupported arg_type %d\n", arg_type);
782 return -EFAULT;
783 }
784
785 if (reg->type != expected_type) {
786 verbose("R%d type=%s expected=%s\n", regno,
787 reg_type_str[reg->type], reg_type_str[expected_type]);
788 return -EACCES;
789 }
790
791 if (arg_type == ARG_CONST_MAP_PTR) {
792 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
793 *mapp = reg->map_ptr;
794
795 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
796 /* bpf_map_xxx(..., map_ptr, ..., key) call:
797 * check that [key, key + map->key_size) are within
798 * stack limits and initialized
799 */
800 if (!*mapp) {
801 /* in function declaration map_ptr must come before
802 * map_key, so that it's verified and known before
803 * we have to check map_key here. Otherwise it means
804 * that kernel subsystem misconfigured verifier
805 */
806 verbose("invalid map_ptr to access map->key\n");
807 return -EACCES;
808 }
809 err = check_stack_boundary(env, regno, (*mapp)->key_size);
810
811 } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
812 /* bpf_map_xxx(..., map_ptr, ..., value) call:
813 * check [value, value + map->value_size) validity
814 */
815 if (!*mapp) {
816 /* kernel subsystem misconfigured verifier */
817 verbose("invalid map_ptr to access map->value\n");
818 return -EACCES;
819 }
820 err = check_stack_boundary(env, regno, (*mapp)->value_size);
821
822 } else if (arg_type == ARG_CONST_STACK_SIZE) {
823 /* bpf_xxx(..., buf, len) call will access 'len' bytes
824 * from stack pointer 'buf'. Check it
825 * note: regno == len, regno - 1 == buf
826 */
827 if (regno == 0) {
828 /* kernel subsystem misconfigured verifier */
829 verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
830 return -EACCES;
831 }
832 err = check_stack_boundary(env, regno - 1, reg->imm);
833 }
834
835 return err;
836}
837
838static int check_call(struct verifier_env *env, int func_id)
839{
840 struct verifier_state *state = &env->cur_state;
841 const struct bpf_func_proto *fn = NULL;
842 struct reg_state *regs = state->regs;
843 struct bpf_map *map = NULL;
844 struct reg_state *reg;
845 int i, err;
846
847 /* find function prototype */
848 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
849 verbose("invalid func %d\n", func_id);
850 return -EINVAL;
851 }
852
853 if (env->prog->aux->ops->get_func_proto)
854 fn = env->prog->aux->ops->get_func_proto(func_id);
855
856 if (!fn) {
857 verbose("unknown func %d\n", func_id);
858 return -EINVAL;
859 }
860
861 /* eBPF programs must be GPL compatible to use GPL-ed functions */
862 if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
863 verbose("cannot call GPL only function from proprietary program\n");
864 return -EINVAL;
865 }
866
867 /* check args */
868 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
869 if (err)
870 return err;
871 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
872 if (err)
873 return err;
874 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
875 if (err)
876 return err;
877 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
878 if (err)
879 return err;
880 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
881 if (err)
882 return err;
883
884 /* reset caller saved regs */
885 for (i = 0; i < CALLER_SAVED_REGS; i++) {
886 reg = regs + caller_saved[i];
887 reg->type = NOT_INIT;
888 reg->imm = 0;
889 }
890
891 /* update return register */
892 if (fn->ret_type == RET_INTEGER) {
893 regs[BPF_REG_0].type = UNKNOWN_VALUE;
894 } else if (fn->ret_type == RET_VOID) {
895 regs[BPF_REG_0].type = NOT_INIT;
896 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
897 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
898 /* remember map_ptr, so that check_map_access()
899 * can check 'value_size' boundary of memory access
900 * to map element returned from bpf_map_lookup_elem()
901 */
902 if (map == NULL) {
903 verbose("kernel subsystem misconfigured verifier\n");
904 return -EINVAL;
905 }
906 regs[BPF_REG_0].map_ptr = map;
907 } else {
908 verbose("unknown return type %d of func %d\n",
909 fn->ret_type, func_id);
910 return -EINVAL;
911 }
912 return 0;
913}
914
915/* check validity of 32-bit and 64-bit arithmetic operations */
916static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
917{
918 u8 opcode = BPF_OP(insn->code);
919 int err;
920
921 if (opcode == BPF_END || opcode == BPF_NEG) {
922 if (opcode == BPF_NEG) {
923 if (BPF_SRC(insn->code) != 0 ||
924 insn->src_reg != BPF_REG_0 ||
925 insn->off != 0 || insn->imm != 0) {
926 verbose("BPF_NEG uses reserved fields\n");
927 return -EINVAL;
928 }
929 } else {
930 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
931 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
932 verbose("BPF_END uses reserved fields\n");
933 return -EINVAL;
934 }
935 }
936
937 /* check src operand */
938 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
939 if (err)
940 return err;
941
942 /* check dest operand */
943 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
944 if (err)
945 return err;
946
947 } else if (opcode == BPF_MOV) {
948
949 if (BPF_SRC(insn->code) == BPF_X) {
950 if (insn->imm != 0 || insn->off != 0) {
951 verbose("BPF_MOV uses reserved fields\n");
952 return -EINVAL;
953 }
954
955 /* check src operand */
956 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
957 if (err)
958 return err;
959 } else {
960 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
961 verbose("BPF_MOV uses reserved fields\n");
962 return -EINVAL;
963 }
964 }
965
966 /* check dest operand */
967 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
968 if (err)
969 return err;
970
971 if (BPF_SRC(insn->code) == BPF_X) {
972 if (BPF_CLASS(insn->code) == BPF_ALU64) {
973 /* case: R1 = R2
974 * copy register state to dest reg
975 */
976 regs[insn->dst_reg] = regs[insn->src_reg];
977 } else {
978 regs[insn->dst_reg].type = UNKNOWN_VALUE;
979 regs[insn->dst_reg].map_ptr = NULL;
980 }
981 } else {
982 /* case: R = imm
983 * remember the value we stored into this reg
984 */
985 regs[insn->dst_reg].type = CONST_IMM;
986 regs[insn->dst_reg].imm = insn->imm;
987 }
988
989 } else if (opcode > BPF_END) {
990 verbose("invalid BPF_ALU opcode %x\n", opcode);
991 return -EINVAL;
992
993 } else { /* all other ALU ops: and, sub, xor, add, ... */
994
995 bool stack_relative = false;
996
997 if (BPF_SRC(insn->code) == BPF_X) {
998 if (insn->imm != 0 || insn->off != 0) {
999 verbose("BPF_ALU uses reserved fields\n");
1000 return -EINVAL;
1001 }
1002 /* check src1 operand */
1003 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1004 if (err)
1005 return err;
1006 } else {
1007 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
1008 verbose("BPF_ALU uses reserved fields\n");
1009 return -EINVAL;
1010 }
1011 }
1012
1013 /* check src2 operand */
1014 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1015 if (err)
1016 return err;
1017
1018 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
1019 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
1020 verbose("div by zero\n");
1021 return -EINVAL;
1022 }
1023
1024 /* pattern match 'bpf_add Rx, imm' instruction */
1025 if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
1026 regs[insn->dst_reg].type == FRAME_PTR &&
1027 BPF_SRC(insn->code) == BPF_K)
1028 stack_relative = true;
1029
1030 /* check dest operand */
1031 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1032 if (err)
1033 return err;
1034
1035 if (stack_relative) {
1036 regs[insn->dst_reg].type = PTR_TO_STACK;
1037 regs[insn->dst_reg].imm = insn->imm;
1038 }
1039 }
1040
1041 return 0;
1042}
1043
1044static int check_cond_jmp_op(struct verifier_env *env,
1045 struct bpf_insn *insn, int *insn_idx)
1046{
1047 struct reg_state *regs = env->cur_state.regs;
1048 struct verifier_state *other_branch;
1049 u8 opcode = BPF_OP(insn->code);
1050 int err;
1051
1052 if (opcode > BPF_EXIT) {
1053 verbose("invalid BPF_JMP opcode %x\n", opcode);
1054 return -EINVAL;
1055 }
1056
1057 if (BPF_SRC(insn->code) == BPF_X) {
1058 if (insn->imm != 0) {
1059 verbose("BPF_JMP uses reserved fields\n");
1060 return -EINVAL;
1061 }
1062
1063 /* check src1 operand */
1064 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1065 if (err)
1066 return err;
1067 } else {
1068 if (insn->src_reg != BPF_REG_0) {
1069 verbose("BPF_JMP uses reserved fields\n");
1070 return -EINVAL;
1071 }
1072 }
1073
1074 /* check src2 operand */
1075 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1076 if (err)
1077 return err;
1078
1079 /* detect if R == 0 where R was initialized to zero earlier */
1080 if (BPF_SRC(insn->code) == BPF_K &&
1081 (opcode == BPF_JEQ || opcode == BPF_JNE) &&
1082 regs[insn->dst_reg].type == CONST_IMM &&
1083 regs[insn->dst_reg].imm == insn->imm) {
1084 if (opcode == BPF_JEQ) {
1085 /* if (imm == imm) goto pc+off;
1086 * only follow the goto, ignore fall-through
1087 */
1088 *insn_idx += insn->off;
1089 return 0;
1090 } else {
1091 /* if (imm != imm) goto pc+off;
1092 * only follow fall-through branch, since
1093 * that's where the program will go
1094 */
1095 return 0;
1096 }
1097 }
1098
1099 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
1100 if (!other_branch)
1101 return -EFAULT;
1102
1103 /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
1104 if (BPF_SRC(insn->code) == BPF_K &&
1105 insn->imm == 0 && (opcode == BPF_JEQ ||
1106 opcode == BPF_JNE) &&
1107 regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
1108 if (opcode == BPF_JEQ) {
1109 /* next fallthrough insn can access memory via
1110 * this register
1111 */
1112 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
1113 /* branch targer cannot access it, since reg == 0 */
1114 other_branch->regs[insn->dst_reg].type = CONST_IMM;
1115 other_branch->regs[insn->dst_reg].imm = 0;
1116 } else {
1117 other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
1118 regs[insn->dst_reg].type = CONST_IMM;
1119 regs[insn->dst_reg].imm = 0;
1120 }
1121 } else if (BPF_SRC(insn->code) == BPF_K &&
1122 (opcode == BPF_JEQ || opcode == BPF_JNE)) {
1123
1124 if (opcode == BPF_JEQ) {
1125 /* detect if (R == imm) goto
1126 * and in the target state recognize that R = imm
1127 */
1128 other_branch->regs[insn->dst_reg].type = CONST_IMM;
1129 other_branch->regs[insn->dst_reg].imm = insn->imm;
1130 } else {
1131 /* detect if (R != imm) goto
1132 * and in the fall-through state recognize that R = imm
1133 */
1134 regs[insn->dst_reg].type = CONST_IMM;
1135 regs[insn->dst_reg].imm = insn->imm;
1136 }
1137 }
1138 if (log_level)
1139 print_verifier_state(env);
1140 return 0;
1141}
1142
1143/* return the map pointer stored inside BPF_LD_IMM64 instruction */
1144static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
1145{
1146 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
1147
1148 return (struct bpf_map *) (unsigned long) imm64;
1149}
1150
1151/* verify BPF_LD_IMM64 instruction */
1152static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
1153{
1154 struct reg_state *regs = env->cur_state.regs;
1155 int err;
1156
1157 if (BPF_SIZE(insn->code) != BPF_DW) {
1158 verbose("invalid BPF_LD_IMM insn\n");
1159 return -EINVAL;
1160 }
1161 if (insn->off != 0) {
1162 verbose("BPF_LD_IMM64 uses reserved fields\n");
1163 return -EINVAL;
1164 }
1165
1166 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1167 if (err)
1168 return err;
1169
1170 if (insn->src_reg == 0)
1171 /* generic move 64-bit immediate into a register */
1172 return 0;
1173
1174 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
1175 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
1176
1177 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
1178 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
1179 return 0;
1180}
1181
1182/* non-recursive DFS pseudo code
1183 * 1 procedure DFS-iterative(G,v):
1184 * 2 label v as discovered
1185 * 3 let S be a stack
1186 * 4 S.push(v)
1187 * 5 while S is not empty
1188 * 6 t <- S.pop()
1189 * 7 if t is what we're looking for:
1190 * 8 return t
1191 * 9 for all edges e in G.adjacentEdges(t) do
1192 * 10 if edge e is already labelled
1193 * 11 continue with the next edge
1194 * 12 w <- G.adjacentVertex(t,e)
1195 * 13 if vertex w is not discovered and not explored
1196 * 14 label e as tree-edge
1197 * 15 label w as discovered
1198 * 16 S.push(w)
1199 * 17 continue at 5
1200 * 18 else if vertex w is discovered
1201 * 19 label e as back-edge
1202 * 20 else
1203 * 21 // vertex w is explored
1204 * 22 label e as forward- or cross-edge
1205 * 23 label t as explored
1206 * 24 S.pop()
1207 *
1208 * convention:
1209 * 0x10 - discovered
1210 * 0x11 - discovered and fall-through edge labelled
1211 * 0x12 - discovered and fall-through and branch edges labelled
1212 * 0x20 - explored
1213 */
1214
1215enum {
1216 DISCOVERED = 0x10,
1217 EXPLORED = 0x20,
1218 FALLTHROUGH = 1,
1219 BRANCH = 2,
1220};
1221
1222static int *insn_stack; /* stack of insns to process */
1223static int cur_stack; /* current stack index */
1224static int *insn_state;
1225
1226/* t, w, e - match pseudo-code above:
1227 * t - index of current instruction
1228 * w - next instruction
1229 * e - edge
1230 */
1231static int push_insn(int t, int w, int e, struct verifier_env *env)
1232{
1233 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
1234 return 0;
1235
1236 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
1237 return 0;
1238
1239 if (w < 0 || w >= env->prog->len) {
1240 verbose("jump out of range from insn %d to %d\n", t, w);
1241 return -EINVAL;
1242 }
1243
1244 if (insn_state[w] == 0) {
1245 /* tree-edge */
1246 insn_state[t] = DISCOVERED | e;
1247 insn_state[w] = DISCOVERED;
1248 if (cur_stack >= env->prog->len)
1249 return -E2BIG;
1250 insn_stack[cur_stack++] = w;
1251 return 1;
1252 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
1253 verbose("back-edge from insn %d to %d\n", t, w);
1254 return -EINVAL;
1255 } else if (insn_state[w] == EXPLORED) {
1256 /* forward- or cross-edge */
1257 insn_state[t] = DISCOVERED | e;
1258 } else {
1259 verbose("insn state internal bug\n");
1260 return -EFAULT;
1261 }
1262 return 0;
1263}
1264
1265/* non-recursive depth-first-search to detect loops in BPF program
1266 * loop == back-edge in directed graph
1267 */
1268static int check_cfg(struct verifier_env *env)
1269{
1270 struct bpf_insn *insns = env->prog->insnsi;
1271 int insn_cnt = env->prog->len;
1272 int ret = 0;
1273 int i, t;
1274
1275 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
1276 if (!insn_state)
1277 return -ENOMEM;
1278
1279 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
1280 if (!insn_stack) {
1281 kfree(insn_state);
1282 return -ENOMEM;
1283 }
1284
1285 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
1286 insn_stack[0] = 0; /* 0 is the first instruction */
1287 cur_stack = 1;
1288
1289peek_stack:
1290 if (cur_stack == 0)
1291 goto check_state;
1292 t = insn_stack[cur_stack - 1];
1293
1294 if (BPF_CLASS(insns[t].code) == BPF_JMP) {
1295 u8 opcode = BPF_OP(insns[t].code);
1296
1297 if (opcode == BPF_EXIT) {
1298 goto mark_explored;
1299 } else if (opcode == BPF_CALL) {
1300 ret = push_insn(t, t + 1, FALLTHROUGH, env);
1301 if (ret == 1)
1302 goto peek_stack;
1303 else if (ret < 0)
1304 goto err_free;
1305 } else if (opcode == BPF_JA) {
1306 if (BPF_SRC(insns[t].code) != BPF_K) {
1307 ret = -EINVAL;
1308 goto err_free;
1309 }
1310 /* unconditional jump with single edge */
1311 ret = push_insn(t, t + insns[t].off + 1,
1312 FALLTHROUGH, env);
1313 if (ret == 1)
1314 goto peek_stack;
1315 else if (ret < 0)
1316 goto err_free;
1317 } else {
1318 /* conditional jump with two edges */
1319 ret = push_insn(t, t + 1, FALLTHROUGH, env);
1320 if (ret == 1)
1321 goto peek_stack;
1322 else if (ret < 0)
1323 goto err_free;
1324
1325 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
1326 if (ret == 1)
1327 goto peek_stack;
1328 else if (ret < 0)
1329 goto err_free;
1330 }
1331 } else {
1332 /* all other non-branch instructions with single
1333 * fall-through edge
1334 */
1335 ret = push_insn(t, t + 1, FALLTHROUGH, env);
1336 if (ret == 1)
1337 goto peek_stack;
1338 else if (ret < 0)
1339 goto err_free;
1340 }
1341
1342mark_explored:
1343 insn_state[t] = EXPLORED;
1344 if (cur_stack-- <= 0) {
1345 verbose("pop stack internal bug\n");
1346 ret = -EFAULT;
1347 goto err_free;
1348 }
1349 goto peek_stack;
1350
1351check_state:
1352 for (i = 0; i < insn_cnt; i++) {
1353 if (insn_state[i] != EXPLORED) {
1354 verbose("unreachable insn %d\n", i);
1355 ret = -EINVAL;
1356 goto err_free;
1357 }
1358 }
1359 ret = 0; /* cfg looks good */
1360
1361err_free:
1362 kfree(insn_state);
1363 kfree(insn_stack);
1364 return ret;
1365}
1366
1367static int do_check(struct verifier_env *env)
1368{
1369 struct verifier_state *state = &env->cur_state;
1370 struct bpf_insn *insns = env->prog->insnsi;
1371 struct reg_state *regs = state->regs;
1372 int insn_cnt = env->prog->len;
1373 int insn_idx, prev_insn_idx = 0;
1374 int insn_processed = 0;
1375 bool do_print_state = false;
1376
1377 init_reg_state(regs);
1378 insn_idx = 0;
1379 for (;;) {
1380 struct bpf_insn *insn;
1381 u8 class;
1382 int err;
1383
1384 if (insn_idx >= insn_cnt) {
1385 verbose("invalid insn idx %d insn_cnt %d\n",
1386 insn_idx, insn_cnt);
1387 return -EFAULT;
1388 }
1389
1390 insn = &insns[insn_idx];
1391 class = BPF_CLASS(insn->code);
1392
1393 if (++insn_processed > 32768) {
1394 verbose("BPF program is too large. Proccessed %d insn\n",
1395 insn_processed);
1396 return -E2BIG;
1397 }
1398
1399 if (log_level && do_print_state) {
1400 verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
1401 print_verifier_state(env);
1402 do_print_state = false;
1403 }
1404
1405 if (log_level) {
1406 verbose("%d: ", insn_idx);
1407 print_bpf_insn(insn);
1408 }
1409
1410 if (class == BPF_ALU || class == BPF_ALU64) {
1411 err = check_alu_op(regs, insn);
1412 if (err)
1413 return err;
1414
1415 } else if (class == BPF_LDX) {
1416 if (BPF_MODE(insn->code) != BPF_MEM ||
1417 insn->imm != 0) {
1418 verbose("BPF_LDX uses reserved fields\n");
1419 return -EINVAL;
1420 }
1421 /* check src operand */
1422 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1423 if (err)
1424 return err;
1425
1426 err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
1427 if (err)
1428 return err;
1429
1430 /* check that memory (src_reg + off) is readable,
1431 * the state of dst_reg will be updated by this func
1432 */
1433 err = check_mem_access(env, insn->src_reg, insn->off,
1434 BPF_SIZE(insn->code), BPF_READ,
1435 insn->dst_reg);
1436 if (err)
1437 return err;
1438
1439 } else if (class == BPF_STX) {
1440 if (BPF_MODE(insn->code) == BPF_XADD) {
1441 err = check_xadd(env, insn);
1442 if (err)
1443 return err;
1444 insn_idx++;
1445 continue;
1446 }
1447
1448 if (BPF_MODE(insn->code) != BPF_MEM ||
1449 insn->imm != 0) {
1450 verbose("BPF_STX uses reserved fields\n");
1451 return -EINVAL;
1452 }
1453 /* check src1 operand */
1454 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1455 if (err)
1456 return err;
1457 /* check src2 operand */
1458 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1459 if (err)
1460 return err;
1461
1462 /* check that memory (dst_reg + off) is writeable */
1463 err = check_mem_access(env, insn->dst_reg, insn->off,
1464 BPF_SIZE(insn->code), BPF_WRITE,
1465 insn->src_reg);
1466 if (err)
1467 return err;
1468
1469 } else if (class == BPF_ST) {
1470 if (BPF_MODE(insn->code) != BPF_MEM ||
1471 insn->src_reg != BPF_REG_0) {
1472 verbose("BPF_ST uses reserved fields\n");
1473 return -EINVAL;
1474 }
1475 /* check src operand */
1476 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1477 if (err)
1478 return err;
1479
1480 /* check that memory (dst_reg + off) is writeable */
1481 err = check_mem_access(env, insn->dst_reg, insn->off,
1482 BPF_SIZE(insn->code), BPF_WRITE,
1483 -1);
1484 if (err)
1485 return err;
1486
1487 } else if (class == BPF_JMP) {
1488 u8 opcode = BPF_OP(insn->code);
1489
1490 if (opcode == BPF_CALL) {
1491 if (BPF_SRC(insn->code) != BPF_K ||
1492 insn->off != 0 ||
1493 insn->src_reg != BPF_REG_0 ||
1494 insn->dst_reg != BPF_REG_0) {
1495 verbose("BPF_CALL uses reserved fields\n");
1496 return -EINVAL;
1497 }
1498
1499 err = check_call(env, insn->imm);
1500 if (err)
1501 return err;
1502
1503 } else if (opcode == BPF_JA) {
1504 if (BPF_SRC(insn->code) != BPF_K ||
1505 insn->imm != 0 ||
1506 insn->src_reg != BPF_REG_0 ||
1507 insn->dst_reg != BPF_REG_0) {
1508 verbose("BPF_JA uses reserved fields\n");
1509 return -EINVAL;
1510 }
1511
1512 insn_idx += insn->off + 1;
1513 continue;
1514
1515 } else if (opcode == BPF_EXIT) {
1516 if (BPF_SRC(insn->code) != BPF_K ||
1517 insn->imm != 0 ||
1518 insn->src_reg != BPF_REG_0 ||
1519 insn->dst_reg != BPF_REG_0) {
1520 verbose("BPF_EXIT uses reserved fields\n");
1521 return -EINVAL;
1522 }
1523
1524 /* eBPF calling convetion is such that R0 is used
1525 * to return the value from eBPF program.
1526 * Make sure that it's readable at this time
1527 * of bpf_exit, which means that program wrote
1528 * something into it earlier
1529 */
1530 err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
1531 if (err)
1532 return err;
1533
1534 insn_idx = pop_stack(env, &prev_insn_idx);
1535 if (insn_idx < 0) {
1536 break;
1537 } else {
1538 do_print_state = true;
1539 continue;
1540 }
1541 } else {
1542 err = check_cond_jmp_op(env, insn, &insn_idx);
1543 if (err)
1544 return err;
1545 }
1546 } else if (class == BPF_LD) {
1547 u8 mode = BPF_MODE(insn->code);
1548
1549 if (mode == BPF_ABS || mode == BPF_IND) {
1550 verbose("LD_ABS is not supported yet\n");
1551 return -EINVAL;
1552 } else if (mode == BPF_IMM) {
1553 err = check_ld_imm(env, insn);
1554 if (err)
1555 return err;
1556
1557 insn_idx++;
1558 } else {
1559 verbose("invalid BPF_LD mode\n");
1560 return -EINVAL;
1561 }
1562 } else {
1563 verbose("unknown insn class %d\n", class);
1564 return -EINVAL;
1565 }
1566
1567 insn_idx++;
1568 }
1569
1570 return 0;
1571}
1572
1573/* look for pseudo eBPF instructions that access map FDs and
1574 * replace them with actual map pointers
1575 */
1576static int replace_map_fd_with_map_ptr(struct verifier_env *env)
1577{
1578 struct bpf_insn *insn = env->prog->insnsi;
1579 int insn_cnt = env->prog->len;
1580 int i, j;
1581
1582 for (i = 0; i < insn_cnt; i++, insn++) {
1583 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
1584 struct bpf_map *map;
1585 struct fd f;
1586
1587 if (i == insn_cnt - 1 || insn[1].code != 0 ||
1588 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
1589 insn[1].off != 0) {
1590 verbose("invalid bpf_ld_imm64 insn\n");
1591 return -EINVAL;
1592 }
1593
1594 if (insn->src_reg == 0)
1595 /* valid generic load 64-bit imm */
1596 goto next_insn;
1597
1598 if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
1599 verbose("unrecognized bpf_ld_imm64 insn\n");
1600 return -EINVAL;
1601 }
1602
1603 f = fdget(insn->imm);
1604
1605 map = bpf_map_get(f);
1606 if (IS_ERR(map)) {
1607 verbose("fd %d is not pointing to valid bpf_map\n",
1608 insn->imm);
1609 fdput(f);
1610 return PTR_ERR(map);
1611 }
1612
1613 /* store map pointer inside BPF_LD_IMM64 instruction */
1614 insn[0].imm = (u32) (unsigned long) map;
1615 insn[1].imm = ((u64) (unsigned long) map) >> 32;
1616
1617 /* check whether we recorded this map already */
1618 for (j = 0; j < env->used_map_cnt; j++)
1619 if (env->used_maps[j] == map) {
1620 fdput(f);
1621 goto next_insn;
1622 }
1623
1624 if (env->used_map_cnt >= MAX_USED_MAPS) {
1625 fdput(f);
1626 return -E2BIG;
1627 }
1628
1629 /* remember this map */
1630 env->used_maps[env->used_map_cnt++] = map;
1631
1632 /* hold the map. If the program is rejected by verifier,
1633 * the map will be released by release_maps() or it
1634 * will be used by the valid program until it's unloaded
1635 * and all maps are released in free_bpf_prog_info()
1636 */
1637 atomic_inc(&map->refcnt);
1638
1639 fdput(f);
1640next_insn:
1641 insn++;
1642 i++;
1643 }
1644 }
1645
1646 /* now all pseudo BPF_LD_IMM64 instructions load valid
1647 * 'struct bpf_map *' into a register instead of user map_fd.
1648 * These pointers will be used later by verifier to validate map access.
1649 */
1650 return 0;
1651}
1652
1653/* drop refcnt of maps used by the rejected program */
1654static void release_maps(struct verifier_env *env)
1655{
1656 int i;
1657
1658 for (i = 0; i < env->used_map_cnt; i++)
1659 bpf_map_put(env->used_maps[i]);
1660}
1661
1662/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
1663static void convert_pseudo_ld_imm64(struct verifier_env *env)
1664{
1665 struct bpf_insn *insn = env->prog->insnsi;
1666 int insn_cnt = env->prog->len;
1667 int i;
1668
1669 for (i = 0; i < insn_cnt; i++, insn++)
1670 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
1671 insn->src_reg = 0;
1672}
1673
1674int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
1675{
1676 char __user *log_ubuf = NULL;
1677 struct verifier_env *env;
1678 int ret = -EINVAL;
1679
1680 if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
1681 return -E2BIG;
1682
1683 /* 'struct verifier_env' can be global, but since it's not small,
1684 * allocate/free it every time bpf_check() is called
1685 */
1686 env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
1687 if (!env)
1688 return -ENOMEM;
1689
1690 env->prog = prog;
1691
1692 /* grab the mutex to protect few globals used by verifier */
1693 mutex_lock(&bpf_verifier_lock);
1694
1695 if (attr->log_level || attr->log_buf || attr->log_size) {
1696 /* user requested verbose verifier output
1697 * and supplied buffer to store the verification trace
1698 */
1699 log_level = attr->log_level;
1700 log_ubuf = (char __user *) (unsigned long) attr->log_buf;
1701 log_size = attr->log_size;
1702 log_len = 0;
1703
1704 ret = -EINVAL;
1705 /* log_* values have to be sane */
1706 if (log_size < 128 || log_size > UINT_MAX >> 8 ||
1707 log_level == 0 || log_ubuf == NULL)
1708 goto free_env;
1709
1710 ret = -ENOMEM;
1711 log_buf = vmalloc(log_size);
1712 if (!log_buf)
1713 goto free_env;
1714 } else {
1715 log_level = 0;
1716 }
1717
1718 ret = replace_map_fd_with_map_ptr(env);
1719 if (ret < 0)
1720 goto skip_full_check;
1721
1722 ret = check_cfg(env);
1723 if (ret < 0)
1724 goto skip_full_check;
1725
1726 ret = do_check(env);
1727
1728skip_full_check:
1729 while (pop_stack(env, NULL) >= 0);
1730
1731 if (log_level && log_len >= log_size - 1) {
1732 BUG_ON(log_len >= log_size);
1733 /* verifier log exceeded user supplied buffer */
1734 ret = -ENOSPC;
1735 /* fall through to return what was recorded */
1736 }
1737
1738 /* copy verifier log back to user space including trailing zero */
1739 if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
1740 ret = -EFAULT;
1741 goto free_log_buf;
1742 }
1743
1744 if (ret == 0 && env->used_map_cnt) {
1745 /* if program passed verifier, update used_maps in bpf_prog_info */
1746 prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
1747 sizeof(env->used_maps[0]),
1748 GFP_KERNEL);
1749
1750 if (!prog->aux->used_maps) {
1751 ret = -ENOMEM;
1752 goto free_log_buf;
1753 }
1754
1755 memcpy(prog->aux->used_maps, env->used_maps,
1756 sizeof(env->used_maps[0]) * env->used_map_cnt);
1757 prog->aux->used_map_cnt = env->used_map_cnt;
1758
1759 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
1760 * bpf_ld_imm64 instructions
1761 */
1762 convert_pseudo_ld_imm64(env);
1763 }
1764
1765free_log_buf:
1766 if (log_level)
1767 vfree(log_buf);
1768free_env:
1769 if (!prog->aux->used_maps)
1770 /* if we didn't copy map pointers into bpf_prog_info, release
1771 * them now. Otherwise free_bpf_prog_info() will release them.
1772 */
1773 release_maps(env);
1774 kfree(env);
1775 mutex_unlock(&bpf_verifier_lock);
1776 return ret;
1777}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 391d4ddb6f4b..b4b5083f5f5e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -218,3 +218,6 @@ cond_syscall(sys_kcmp);
218 218
219/* operate on Secure Computing state */ 219/* operate on Secure Computing state */
220cond_syscall(sys_seccomp); 220cond_syscall(sys_seccomp);
221
222/* access BPF programs and maps */
223cond_syscall(sys_bpf);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a28590083622..3ac43f34437b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1672,7 +1672,8 @@ config TEST_BPF
1672 against the BPF interpreter or BPF JIT compiler depending on the 1672 against the BPF interpreter or BPF JIT compiler depending on the
1673 current setting. This is in particular useful for BPF JIT compiler 1673 current setting. This is in particular useful for BPF JIT compiler
1674 development, but also to run regression tests against changes in 1674 development, but also to run regression tests against changes in
1675 the interpreter code. 1675 the interpreter code. It also enables test stubs for eBPF maps and
1676 verifier used by user space verifier testsuite.
1676 1677
1677 If unsure, say N. 1678 If unsure, say N.
1678 1679
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
new file mode 100644
index 000000000000..634391797856
--- /dev/null
+++ b/samples/bpf/Makefile
@@ -0,0 +1,12 @@
1# kbuild trick to avoid linker error. Can be omitted if a module is built.
2obj- := dummy.o
3
4# List of programs to build
5hostprogs-y := test_verifier
6
7test_verifier-objs := test_verifier.o libbpf.o
8
9# Tell kbuild to always build the programs
10always := $(hostprogs-y)
11
12HOSTCFLAGS += -I$(objtree)/usr/include
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
new file mode 100644
index 000000000000..ff6504420738
--- /dev/null
+++ b/samples/bpf/libbpf.c
@@ -0,0 +1,94 @@
1/* eBPF mini library */
2#include <stdlib.h>
3#include <stdio.h>
4#include <linux/unistd.h>
5#include <unistd.h>
6#include <string.h>
7#include <linux/netlink.h>
8#include <linux/bpf.h>
9#include <errno.h>
10#include "libbpf.h"
11
12static __u64 ptr_to_u64(void *ptr)
13{
14 return (__u64) (unsigned long) ptr;
15}
16
17int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
18 int max_entries)
19{
20 union bpf_attr attr = {
21 .map_type = map_type,
22 .key_size = key_size,
23 .value_size = value_size,
24 .max_entries = max_entries
25 };
26
27 return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
28}
29
30int bpf_update_elem(int fd, void *key, void *value)
31{
32 union bpf_attr attr = {
33 .map_fd = fd,
34 .key = ptr_to_u64(key),
35 .value = ptr_to_u64(value),
36 };
37
38 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
39}
40
41int bpf_lookup_elem(int fd, void *key, void *value)
42{
43 union bpf_attr attr = {
44 .map_fd = fd,
45 .key = ptr_to_u64(key),
46 .value = ptr_to_u64(value),
47 };
48
49 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
50}
51
52int bpf_delete_elem(int fd, void *key)
53{
54 union bpf_attr attr = {
55 .map_fd = fd,
56 .key = ptr_to_u64(key),
57 };
58
59 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
60}
61
62int bpf_get_next_key(int fd, void *key, void *next_key)
63{
64 union bpf_attr attr = {
65 .map_fd = fd,
66 .key = ptr_to_u64(key),
67 .next_key = ptr_to_u64(next_key),
68 };
69
70 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
71}
72
73#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
74
75char bpf_log_buf[LOG_BUF_SIZE];
76
77int bpf_prog_load(enum bpf_prog_type prog_type,
78 const struct bpf_insn *insns, int prog_len,
79 const char *license)
80{
81 union bpf_attr attr = {
82 .prog_type = prog_type,
83 .insns = ptr_to_u64((void *) insns),
84 .insn_cnt = prog_len / sizeof(struct bpf_insn),
85 .license = ptr_to_u64((void *) license),
86 .log_buf = ptr_to_u64(bpf_log_buf),
87 .log_size = LOG_BUF_SIZE,
88 .log_level = 1,
89 };
90
91 bpf_log_buf[0] = 0;
92
93 return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
94}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
new file mode 100644
index 000000000000..8a31babeca5d
--- /dev/null
+++ b/samples/bpf/libbpf.h
@@ -0,0 +1,172 @@
1/* eBPF mini library */
2#ifndef __LIBBPF_H
3#define __LIBBPF_H
4
5struct bpf_insn;
6
7int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
8 int max_entries);
9int bpf_update_elem(int fd, void *key, void *value);
10int bpf_lookup_elem(int fd, void *key, void *value);
11int bpf_delete_elem(int fd, void *key);
12int bpf_get_next_key(int fd, void *key, void *next_key);
13
14int bpf_prog_load(enum bpf_prog_type prog_type,
15 const struct bpf_insn *insns, int insn_len,
16 const char *license);
17
18#define LOG_BUF_SIZE 8192
19extern char bpf_log_buf[LOG_BUF_SIZE];
20
21/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
22
23#define BPF_ALU64_REG(OP, DST, SRC) \
24 ((struct bpf_insn) { \
25 .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
26 .dst_reg = DST, \
27 .src_reg = SRC, \
28 .off = 0, \
29 .imm = 0 })
30
31#define BPF_ALU32_REG(OP, DST, SRC) \
32 ((struct bpf_insn) { \
33 .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
34 .dst_reg = DST, \
35 .src_reg = SRC, \
36 .off = 0, \
37 .imm = 0 })
38
39/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
40
41#define BPF_ALU64_IMM(OP, DST, IMM) \
42 ((struct bpf_insn) { \
43 .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
44 .dst_reg = DST, \
45 .src_reg = 0, \
46 .off = 0, \
47 .imm = IMM })
48
49#define BPF_ALU32_IMM(OP, DST, IMM) \
50 ((struct bpf_insn) { \
51 .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
52 .dst_reg = DST, \
53 .src_reg = 0, \
54 .off = 0, \
55 .imm = IMM })
56
57/* Short form of mov, dst_reg = src_reg */
58
59#define BPF_MOV64_REG(DST, SRC) \
60 ((struct bpf_insn) { \
61 .code = BPF_ALU64 | BPF_MOV | BPF_X, \
62 .dst_reg = DST, \
63 .src_reg = SRC, \
64 .off = 0, \
65 .imm = 0 })
66
67/* Short form of mov, dst_reg = imm32 */
68
69#define BPF_MOV64_IMM(DST, IMM) \
70 ((struct bpf_insn) { \
71 .code = BPF_ALU64 | BPF_MOV | BPF_K, \
72 .dst_reg = DST, \
73 .src_reg = 0, \
74 .off = 0, \
75 .imm = IMM })
76
77/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
78#define BPF_LD_IMM64(DST, IMM) \
79 BPF_LD_IMM64_RAW(DST, 0, IMM)
80
81#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
82 ((struct bpf_insn) { \
83 .code = BPF_LD | BPF_DW | BPF_IMM, \
84 .dst_reg = DST, \
85 .src_reg = SRC, \
86 .off = 0, \
87 .imm = (__u32) (IMM) }), \
88 ((struct bpf_insn) { \
89 .code = 0, /* zero is reserved opcode */ \
90 .dst_reg = 0, \
91 .src_reg = 0, \
92 .off = 0, \
93 .imm = ((__u64) (IMM)) >> 32 })
94
95#define BPF_PSEUDO_MAP_FD 1
96
97/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
98#define BPF_LD_MAP_FD(DST, MAP_FD) \
99 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
100
101
102/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
103
104#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
105 ((struct bpf_insn) { \
106 .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
107 .dst_reg = DST, \
108 .src_reg = SRC, \
109 .off = OFF, \
110 .imm = 0 })
111
112/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
113
114#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
115 ((struct bpf_insn) { \
116 .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
117 .dst_reg = DST, \
118 .src_reg = SRC, \
119 .off = OFF, \
120 .imm = 0 })
121
122/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
123
124#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
125 ((struct bpf_insn) { \
126 .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
127 .dst_reg = DST, \
128 .src_reg = 0, \
129 .off = OFF, \
130 .imm = IMM })
131
132/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
133
134#define BPF_JMP_REG(OP, DST, SRC, OFF) \
135 ((struct bpf_insn) { \
136 .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
137 .dst_reg = DST, \
138 .src_reg = SRC, \
139 .off = OFF, \
140 .imm = 0 })
141
142/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
143
144#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
145 ((struct bpf_insn) { \
146 .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
147 .dst_reg = DST, \
148 .src_reg = 0, \
149 .off = OFF, \
150 .imm = IMM })
151
152/* Raw code statement block */
153
154#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
155 ((struct bpf_insn) { \
156 .code = CODE, \
157 .dst_reg = DST, \
158 .src_reg = SRC, \
159 .off = OFF, \
160 .imm = IMM })
161
162/* Program exit */
163
164#define BPF_EXIT_INSN() \
165 ((struct bpf_insn) { \
166 .code = BPF_JMP | BPF_EXIT, \
167 .dst_reg = 0, \
168 .src_reg = 0, \
169 .off = 0, \
170 .imm = 0 })
171
172#endif
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
new file mode 100644
index 000000000000..d10992e2740e
--- /dev/null
+++ b/samples/bpf/test_verifier.c
@@ -0,0 +1,548 @@
1/*
2 * Testsuite for eBPF verifier
3 *
4 * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 */
10#include <stdio.h>
11#include <unistd.h>
12#include <linux/bpf.h>
13#include <errno.h>
14#include <linux/unistd.h>
15#include <string.h>
16#include <linux/filter.h>
17#include "libbpf.h"
18
19#define MAX_INSNS 512
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
21
22struct bpf_test {
23 const char *descr;
24 struct bpf_insn insns[MAX_INSNS];
25 int fixup[32];
26 const char *errstr;
27 enum {
28 ACCEPT,
29 REJECT
30 } result;
31};
32
33static struct bpf_test tests[] = {
34 {
35 "add+sub+mul",
36 .insns = {
37 BPF_MOV64_IMM(BPF_REG_1, 1),
38 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
39 BPF_MOV64_IMM(BPF_REG_2, 3),
40 BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
41 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
42 BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
43 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
44 BPF_EXIT_INSN(),
45 },
46 .result = ACCEPT,
47 },
48 {
49 "unreachable",
50 .insns = {
51 BPF_EXIT_INSN(),
52 BPF_EXIT_INSN(),
53 },
54 .errstr = "unreachable",
55 .result = REJECT,
56 },
57 {
58 "unreachable2",
59 .insns = {
60 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
61 BPF_JMP_IMM(BPF_JA, 0, 0, 0),
62 BPF_EXIT_INSN(),
63 },
64 .errstr = "unreachable",
65 .result = REJECT,
66 },
67 {
68 "out of range jump",
69 .insns = {
70 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
71 BPF_EXIT_INSN(),
72 },
73 .errstr = "jump out of range",
74 .result = REJECT,
75 },
76 {
77 "out of range jump2",
78 .insns = {
79 BPF_JMP_IMM(BPF_JA, 0, 0, -2),
80 BPF_EXIT_INSN(),
81 },
82 .errstr = "jump out of range",
83 .result = REJECT,
84 },
85 {
86 "test1 ld_imm64",
87 .insns = {
88 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
89 BPF_LD_IMM64(BPF_REG_0, 0),
90 BPF_LD_IMM64(BPF_REG_0, 0),
91 BPF_LD_IMM64(BPF_REG_0, 1),
92 BPF_LD_IMM64(BPF_REG_0, 1),
93 BPF_MOV64_IMM(BPF_REG_0, 2),
94 BPF_EXIT_INSN(),
95 },
96 .errstr = "invalid BPF_LD_IMM insn",
97 .result = REJECT,
98 },
99 {
100 "test2 ld_imm64",
101 .insns = {
102 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
103 BPF_LD_IMM64(BPF_REG_0, 0),
104 BPF_LD_IMM64(BPF_REG_0, 0),
105 BPF_LD_IMM64(BPF_REG_0, 1),
106 BPF_LD_IMM64(BPF_REG_0, 1),
107 BPF_EXIT_INSN(),
108 },
109 .errstr = "invalid BPF_LD_IMM insn",
110 .result = REJECT,
111 },
112 {
113 "test3 ld_imm64",
114 .insns = {
115 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
116 BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
117 BPF_LD_IMM64(BPF_REG_0, 0),
118 BPF_LD_IMM64(BPF_REG_0, 0),
119 BPF_LD_IMM64(BPF_REG_0, 1),
120 BPF_LD_IMM64(BPF_REG_0, 1),
121 BPF_EXIT_INSN(),
122 },
123 .errstr = "invalid bpf_ld_imm64 insn",
124 .result = REJECT,
125 },
126 {
127 "test4 ld_imm64",
128 .insns = {
129 BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
130 BPF_EXIT_INSN(),
131 },
132 .errstr = "invalid bpf_ld_imm64 insn",
133 .result = REJECT,
134 },
135 {
136 "test5 ld_imm64",
137 .insns = {
138 BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
139 },
140 .errstr = "invalid bpf_ld_imm64 insn",
141 .result = REJECT,
142 },
143 {
144 "no bpf_exit",
145 .insns = {
146 BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
147 },
148 .errstr = "jump out of range",
149 .result = REJECT,
150 },
151 {
152 "loop (back-edge)",
153 .insns = {
154 BPF_JMP_IMM(BPF_JA, 0, 0, -1),
155 BPF_EXIT_INSN(),
156 },
157 .errstr = "back-edge",
158 .result = REJECT,
159 },
160 {
161 "loop2 (back-edge)",
162 .insns = {
163 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
164 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
165 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
166 BPF_JMP_IMM(BPF_JA, 0, 0, -4),
167 BPF_EXIT_INSN(),
168 },
169 .errstr = "back-edge",
170 .result = REJECT,
171 },
172 {
173 "conditional loop",
174 .insns = {
175 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
176 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
177 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
178 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
179 BPF_EXIT_INSN(),
180 },
181 .errstr = "back-edge",
182 .result = REJECT,
183 },
184 {
185 "read uninitialized register",
186 .insns = {
187 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
188 BPF_EXIT_INSN(),
189 },
190 .errstr = "R2 !read_ok",
191 .result = REJECT,
192 },
193 {
194 "read invalid register",
195 .insns = {
196 BPF_MOV64_REG(BPF_REG_0, -1),
197 BPF_EXIT_INSN(),
198 },
199 .errstr = "R15 is invalid",
200 .result = REJECT,
201 },
202 {
203 "program doesn't init R0 before exit",
204 .insns = {
205 BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
206 BPF_EXIT_INSN(),
207 },
208 .errstr = "R0 !read_ok",
209 .result = REJECT,
210 },
211 {
212 "stack out of bounds",
213 .insns = {
214 BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
215 BPF_EXIT_INSN(),
216 },
217 .errstr = "invalid stack",
218 .result = REJECT,
219 },
220 {
221 "invalid call insn1",
222 .insns = {
223 BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
224 BPF_EXIT_INSN(),
225 },
226 .errstr = "BPF_CALL uses reserved",
227 .result = REJECT,
228 },
229 {
230 "invalid call insn2",
231 .insns = {
232 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
233 BPF_EXIT_INSN(),
234 },
235 .errstr = "BPF_CALL uses reserved",
236 .result = REJECT,
237 },
238 {
239 "invalid function call",
240 .insns = {
241 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
242 BPF_EXIT_INSN(),
243 },
244 .errstr = "invalid func 1234567",
245 .result = REJECT,
246 },
247 {
248 "uninitialized stack1",
249 .insns = {
250 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
251 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
252 BPF_LD_MAP_FD(BPF_REG_1, 0),
253 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
254 BPF_EXIT_INSN(),
255 },
256 .fixup = {2},
257 .errstr = "invalid indirect read from stack",
258 .result = REJECT,
259 },
260 {
261 "uninitialized stack2",
262 .insns = {
263 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
264 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
265 BPF_EXIT_INSN(),
266 },
267 .errstr = "invalid read from stack",
268 .result = REJECT,
269 },
270 {
271 "check valid spill/fill",
272 .insns = {
273 /* spill R1(ctx) into stack */
274 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
275
276 /* fill it back into R2 */
277 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
278
279 /* should be able to access R0 = *(R2 + 8) */
280 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8),
281 BPF_EXIT_INSN(),
282 },
283 .result = ACCEPT,
284 },
285 {
286 "check corrupted spill/fill",
287 .insns = {
288 /* spill R1(ctx) into stack */
289 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
290
291 /* mess up with R1 pointer on stack */
292 BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
293
294 /* fill back into R0 should fail */
295 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
296
297 BPF_EXIT_INSN(),
298 },
299 .errstr = "corrupted spill",
300 .result = REJECT,
301 },
302 {
303 "invalid src register in STX",
304 .insns = {
305 BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
306 BPF_EXIT_INSN(),
307 },
308 .errstr = "R15 is invalid",
309 .result = REJECT,
310 },
311 {
312 "invalid dst register in STX",
313 .insns = {
314 BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
315 BPF_EXIT_INSN(),
316 },
317 .errstr = "R14 is invalid",
318 .result = REJECT,
319 },
320 {
321 "invalid dst register in ST",
322 .insns = {
323 BPF_ST_MEM(BPF_B, 14, -1, -1),
324 BPF_EXIT_INSN(),
325 },
326 .errstr = "R14 is invalid",
327 .result = REJECT,
328 },
329 {
330 "invalid src register in LDX",
331 .insns = {
332 BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
333 BPF_EXIT_INSN(),
334 },
335 .errstr = "R12 is invalid",
336 .result = REJECT,
337 },
338 {
339 "invalid dst register in LDX",
340 .insns = {
341 BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
342 BPF_EXIT_INSN(),
343 },
344 .errstr = "R11 is invalid",
345 .result = REJECT,
346 },
347 {
348 "junk insn",
349 .insns = {
350 BPF_RAW_INSN(0, 0, 0, 0, 0),
351 BPF_EXIT_INSN(),
352 },
353 .errstr = "invalid BPF_LD_IMM",
354 .result = REJECT,
355 },
356 {
357 "junk insn2",
358 .insns = {
359 BPF_RAW_INSN(1, 0, 0, 0, 0),
360 BPF_EXIT_INSN(),
361 },
362 .errstr = "BPF_LDX uses reserved fields",
363 .result = REJECT,
364 },
365 {
366 "junk insn3",
367 .insns = {
368 BPF_RAW_INSN(-1, 0, 0, 0, 0),
369 BPF_EXIT_INSN(),
370 },
371 .errstr = "invalid BPF_ALU opcode f0",
372 .result = REJECT,
373 },
374 {
375 "junk insn4",
376 .insns = {
377 BPF_RAW_INSN(-1, -1, -1, -1, -1),
378 BPF_EXIT_INSN(),
379 },
380 .errstr = "invalid BPF_ALU opcode f0",
381 .result = REJECT,
382 },
383 {
384 "junk insn5",
385 .insns = {
386 BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
387 BPF_EXIT_INSN(),
388 },
389 .errstr = "BPF_ALU uses reserved fields",
390 .result = REJECT,
391 },
392 {
393 "misaligned read from stack",
394 .insns = {
395 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
396 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
397 BPF_EXIT_INSN(),
398 },
399 .errstr = "misaligned access",
400 .result = REJECT,
401 },
402 {
403 "invalid map_fd for function call",
404 .insns = {
405 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
406 BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
407 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
408 BPF_LD_MAP_FD(BPF_REG_1, 0),
409 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
410 BPF_EXIT_INSN(),
411 },
412 .errstr = "fd 0 is not pointing to valid bpf_map",
413 .result = REJECT,
414 },
415 {
416 "don't check return value before access",
417 .insns = {
418 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
419 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
420 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
421 BPF_LD_MAP_FD(BPF_REG_1, 0),
422 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
423 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
424 BPF_EXIT_INSN(),
425 },
426 .fixup = {3},
427 .errstr = "R0 invalid mem access 'map_value_or_null'",
428 .result = REJECT,
429 },
430 {
431 "access memory with incorrect alignment",
432 .insns = {
433 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
434 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
435 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
436 BPF_LD_MAP_FD(BPF_REG_1, 0),
437 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
438 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
439 BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
440 BPF_EXIT_INSN(),
441 },
442 .fixup = {3},
443 .errstr = "misaligned access",
444 .result = REJECT,
445 },
446 {
447 "sometimes access memory with incorrect alignment",
448 .insns = {
449 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
450 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
451 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
452 BPF_LD_MAP_FD(BPF_REG_1, 0),
453 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
454 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
455 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
456 BPF_EXIT_INSN(),
457 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
458 BPF_EXIT_INSN(),
459 },
460 .fixup = {3},
461 .errstr = "R0 invalid mem access",
462 .result = REJECT,
463 },
464};
465
466static int probe_filter_length(struct bpf_insn *fp)
467{
468 int len = 0;
469
470 for (len = MAX_INSNS - 1; len > 0; --len)
471 if (fp[len].code != 0 || fp[len].imm != 0)
472 break;
473
474 return len + 1;
475}
476
477static int create_map(void)
478{
479 long long key, value = 0;
480 int map_fd;
481
482 map_fd = bpf_create_map(BPF_MAP_TYPE_UNSPEC, sizeof(key), sizeof(value), 1024);
483 if (map_fd < 0) {
484 printf("failed to create map '%s'\n", strerror(errno));
485 }
486
487 return map_fd;
488}
489
490static int test(void)
491{
492 int prog_fd, i;
493
494 for (i = 0; i < ARRAY_SIZE(tests); i++) {
495 struct bpf_insn *prog = tests[i].insns;
496 int prog_len = probe_filter_length(prog);
497 int *fixup = tests[i].fixup;
498 int map_fd = -1;
499
500 if (*fixup) {
501 map_fd = create_map();
502
503 do {
504 prog[*fixup].imm = map_fd;
505 fixup++;
506 } while (*fixup);
507 }
508 printf("#%d %s ", i, tests[i].descr);
509
510 prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog,
511 prog_len * sizeof(struct bpf_insn),
512 "GPL");
513
514 if (tests[i].result == ACCEPT) {
515 if (prog_fd < 0) {
516 printf("FAIL\nfailed to load prog '%s'\n",
517 strerror(errno));
518 printf("%s", bpf_log_buf);
519 goto fail;
520 }
521 } else {
522 if (prog_fd >= 0) {
523 printf("FAIL\nunexpected success to load\n");
524 printf("%s", bpf_log_buf);
525 goto fail;
526 }
527 if (strstr(bpf_log_buf, tests[i].errstr) == 0) {
528 printf("FAIL\nunexpected error message: %s",
529 bpf_log_buf);
530 goto fail;
531 }
532 }
533
534 printf("OK\n");
535fail:
536 if (map_fd >= 0)
537 close(map_fd);
538 close(prog_fd);
539
540 }
541
542 return 0;
543}
544
545int main(void)
546{
547 return test();
548}