aboutsummaryrefslogtreecommitdiffstats
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-05-16 22:47:11 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-16 22:47:11 -0400
commitb9f672af148bf7a08a6031743156faffd58dbc7e (patch)
tree4e3a384636147f0fd31ec01cc267a51bdab7cbb5 /include/uapi/linux
parent8e725f7caafb8e820e05707fe9853023375438cf (diff)
parente23afe5e7cba89cd0744c5218eda1b3553455c17 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-05-17 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Provide a new BPF helper for doing a FIB and neighbor lookup in the kernel tables from an XDP or tc BPF program. The helper provides a fast-path for forwarding packets. The API supports IPv4, IPv6 and MPLS protocols, but currently IPv4 and IPv6 are implemented in this initial work, from David (Ahern). 2) Just a tiny diff but huge feature enabled for nfp driver by extending the BPF offload beyond a pure host processing offload. Offloaded XDP programs are allowed to set the RX queue index and thus opening the door for defining a fully programmable RSS/n-tuple filter replacement. Once BPF decided on a queue already, the device data-path will skip the conventional RSS processing completely, from Jakub. 3) The original sockmap implementation was array based similar to devmap. However unlike devmap where an ifindex has a 1:1 mapping into the map there are use cases with sockets that need to be referenced using longer keys. Hence, sockhash map is added reusing as much of the sockmap code as possible, from John. 4) Introduce BTF ID. The ID is allocatd through an IDR similar as with BPF maps and progs. It also makes BTF accessible to user space via BPF_BTF_GET_FD_BY_ID and adds exposure of the BTF data through BPF_OBJ_GET_INFO_BY_FD, from Martin. 5) Enable BPF stackmap with build_id also in NMI context. Due to the up_read() of current->mm->mmap_sem build_id cannot be parsed. This work defers the up_read() via a per-cpu irq_work so that at least limited support can be enabled, from Song. 6) Various BPF JIT follow-up cleanups and fixups after the LD_ABS/LD_IND JIT conversion as well as implementation of an optimized 32/64 bit immediate load in the arm64 JIT that allows to reduce the number of emitted instructions; in case of tested real-world programs they were shrinking by three percent, from Daniel. 7) Add ifindex parameter to the libbpf loader in order to enable BPF offload support. Right now only iproute2 can load offloaded BPF and this will also enable libbpf for direct integration into other applications, from David (Beckett). 8) Convert the plain text documentation under Documentation/bpf/ into RST format since this is the appropriate standard the kernel is moving to for all documentation. Also add an overview README.rst, from Jesper. 9) Add __printf verification attribute to the bpf_verifier_vlog() helper. Though it uses va_list we can still allow gcc to check the format string, from Mathieu. 10) Fix a bash reference in the BPF selftest's Makefile. The '|& ...' is a bash 4.0+ feature which is not guaranteed to be available when calling out to shell, therefore use a more portable variant, from Joe. 11) Fix a 64 bit division in xdp_umem_reg() by using div_u64() instead of relying on the gcc built-in, from Björn. 12) Fix a sock hashmap kmalloc warning reported by syzbot when an overly large key size is used in hashmap then causing overflows in htab->elem_size. Reject bogus attr->key_size early in the sock_hash_alloc(), from Yonghong. 13) Ensure in BPF selftests when urandom_read is being linked that --build-id is always enabled so that test_stacktrace_build_id[_nmi] won't be failing, from Alexei. 14) Add bitsperlong.h as well as errno.h uapi headers into the tools header infrastructure which point to one of the arch specific uapi headers. This was needed in order to fix a build error on some systems for the BPF selftests, from Sirio. 15) Allow for short options to be used in the xdp_monitor BPF sample code. And also a bpf.h tools uapi header sync in order to fix a selftest build failure. Both from Prashant. 16) More formally clarify the meaning of ID in the direct packet access section of the BPF documentation, from Wang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/bpf.h142
1 files changed, 141 insertions, 1 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 93d5a4eeec2a..d94d333a8225 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -96,6 +96,7 @@ enum bpf_cmd {
96 BPF_PROG_QUERY, 96 BPF_PROG_QUERY,
97 BPF_RAW_TRACEPOINT_OPEN, 97 BPF_RAW_TRACEPOINT_OPEN,
98 BPF_BTF_LOAD, 98 BPF_BTF_LOAD,
99 BPF_BTF_GET_FD_BY_ID,
99}; 100};
100 101
101enum bpf_map_type { 102enum bpf_map_type {
@@ -117,6 +118,7 @@ enum bpf_map_type {
117 BPF_MAP_TYPE_SOCKMAP, 118 BPF_MAP_TYPE_SOCKMAP,
118 BPF_MAP_TYPE_CPUMAP, 119 BPF_MAP_TYPE_CPUMAP,
119 BPF_MAP_TYPE_XSKMAP, 120 BPF_MAP_TYPE_XSKMAP,
121 BPF_MAP_TYPE_SOCKHASH,
120}; 122};
121 123
122enum bpf_prog_type { 124enum bpf_prog_type {
@@ -344,6 +346,7 @@ union bpf_attr {
344 __u32 start_id; 346 __u32 start_id;
345 __u32 prog_id; 347 __u32 prog_id;
346 __u32 map_id; 348 __u32 map_id;
349 __u32 btf_id;
347 }; 350 };
348 __u32 next_id; 351 __u32 next_id;
349 __u32 open_flags; 352 __u32 open_flags;
@@ -1826,6 +1829,79 @@ union bpf_attr {
1826 * Return 1829 * Return
1827 * 0 on success, or a negative error in case of failure. 1830 * 0 on success, or a negative error in case of failure.
1828 * 1831 *
1832 * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
1833 * Description
1834 * Do FIB lookup in kernel tables using parameters in *params*.
1835 * If lookup is successful and result shows packet is to be
1836 * forwarded, the neighbor tables are searched for the nexthop.
1837 * If successful (ie., FIB lookup shows forwarding and nexthop
1838 * is resolved), the nexthop address is returned in ipv4_dst,
1839 * ipv6_dst or mpls_out based on family, smac is set to mac
1840 * address of egress device, dmac is set to nexthop mac address,
1841 * rt_metric is set to metric from route.
1842 *
1843 * *plen* argument is the size of the passed in struct.
1844 * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
1845 *
1846 * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
1847 * full lookup using FIB rules
1848 * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
1849 * perspective (default is ingress)
1850 *
1851 * *ctx* is either **struct xdp_md** for XDP programs or
1852 * **struct sk_buff** tc cls_act programs.
1853 *
1854 * Return
1855 * Egress device index on success, 0 if packet needs to continue
1856 * up the stack for further processing or a negative error in case
1857 * of failure.
1858 *
1859 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
1860 * Description
1861 * Add an entry to, or update a sockhash *map* referencing sockets.
1862 * The *skops* is used as a new value for the entry associated to
1863 * *key*. *flags* is one of:
1864 *
1865 * **BPF_NOEXIST**
1866 * The entry for *key* must not exist in the map.
1867 * **BPF_EXIST**
1868 * The entry for *key* must already exist in the map.
1869 * **BPF_ANY**
1870 * No condition on the existence of the entry for *key*.
1871 *
1872 * If the *map* has eBPF programs (parser and verdict), those will
1873 * be inherited by the socket being added. If the socket is
1874 * already attached to eBPF programs, this results in an error.
1875 * Return
1876 * 0 on success, or a negative error in case of failure.
1877 *
1878 * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
1879 * Description
1880 * This helper is used in programs implementing policies at the
1881 * socket level. If the message *msg* is allowed to pass (i.e. if
1882 * the verdict eBPF program returns **SK_PASS**), redirect it to
1883 * the socket referenced by *map* (of type
1884 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1885 * egress interfaces can be used for redirection. The
1886 * **BPF_F_INGRESS** value in *flags* is used to make the
1887 * distinction (ingress path is selected if the flag is present,
1888 * egress path otherwise). This is the only flag supported for now.
1889 * Return
1890 * **SK_PASS** on success, or **SK_DROP** on error.
1891 *
1892 * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
1893 * Description
1894 * This helper is used in programs implementing policies at the
1895 * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
1896 * if the verdeict eBPF program returns **SK_PASS**), redirect it
1897 * to the socket referenced by *map* (of type
1898 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1899 * egress interfaces can be used for redirection. The
1900 * **BPF_F_INGRESS** value in *flags* is used to make the
1901 * distinction (ingress path is selected if the flag is present,
1902 * egress otherwise). This is the only flag supported for now.
1903 * Return
1904 * **SK_PASS** on success, or **SK_DROP** on error.
1829 */ 1905 */
1830#define __BPF_FUNC_MAPPER(FN) \ 1906#define __BPF_FUNC_MAPPER(FN) \
1831 FN(unspec), \ 1907 FN(unspec), \
@@ -1896,7 +1972,11 @@ union bpf_attr {
1896 FN(xdp_adjust_tail), \ 1972 FN(xdp_adjust_tail), \
1897 FN(skb_get_xfrm_state), \ 1973 FN(skb_get_xfrm_state), \
1898 FN(get_stack), \ 1974 FN(get_stack), \
1899 FN(skb_load_bytes_relative), 1975 FN(skb_load_bytes_relative), \
1976 FN(fib_lookup), \
1977 FN(sock_hash_update), \
1978 FN(msg_redirect_hash), \
1979 FN(sk_redirect_hash),
1900 1980
1901/* integer value in 'imm' field of BPF_CALL instruction selects which helper 1981/* integer value in 'imm' field of BPF_CALL instruction selects which helper
1902 * function eBPF program intends to call 1982 * function eBPF program intends to call
@@ -2130,6 +2210,15 @@ struct bpf_map_info {
2130 __u32 ifindex; 2210 __u32 ifindex;
2131 __u64 netns_dev; 2211 __u64 netns_dev;
2132 __u64 netns_ino; 2212 __u64 netns_ino;
2213 __u32 btf_id;
2214 __u32 btf_key_id;
2215 __u32 btf_value_id;
2216} __attribute__((aligned(8)));
2217
2218struct bpf_btf_info {
2219 __aligned_u64 btf;
2220 __u32 btf_size;
2221 __u32 id;
2133} __attribute__((aligned(8))); 2222} __attribute__((aligned(8)));
2134 2223
2135/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 2224/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -2310,4 +2399,55 @@ struct bpf_raw_tracepoint_args {
2310 __u64 args[0]; 2399 __u64 args[0];
2311}; 2400};
2312 2401
2402/* DIRECT: Skip the FIB rules and go to FIB table associated with device
2403 * OUTPUT: Do lookup from egress perspective; default is ingress
2404 */
2405#define BPF_FIB_LOOKUP_DIRECT BIT(0)
2406#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
2407
2408struct bpf_fib_lookup {
2409 /* input */
2410 __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
2411
2412 /* set if lookup is to consider L4 data - e.g., FIB rules */
2413 __u8 l4_protocol;
2414 __be16 sport;
2415 __be16 dport;
2416
2417 /* total length of packet from network header - used for MTU check */
2418 __u16 tot_len;
2419 __u32 ifindex; /* L3 device index for lookup */
2420
2421 union {
2422 /* inputs to lookup */
2423 __u8 tos; /* AF_INET */
2424 __be32 flowlabel; /* AF_INET6 */
2425
2426 /* output: metric of fib result */
2427 __u32 rt_metric;
2428 };
2429
2430 union {
2431 __be32 mpls_in;
2432 __be32 ipv4_src;
2433 __u32 ipv6_src[4]; /* in6_addr; network order */
2434 };
2435
2436 /* input to bpf_fib_lookup, *dst is destination address.
2437 * output: bpf_fib_lookup sets to gateway address
2438 */
2439 union {
2440 /* return for MPLS lookups */
2441 __be32 mpls_out[4]; /* support up to 4 labels */
2442 __be32 ipv4_dst;
2443 __u32 ipv6_dst[4]; /* in6_addr; network order */
2444 };
2445
2446 /* output */
2447 __be16 h_vlan_proto;
2448 __be16 h_vlan_TCI;
2449 __u8 smac[6]; /* ETH_ALEN */
2450 __u8 dmac[6]; /* ETH_ALEN */
2451};
2452
2313#endif /* _UAPI__LINUX_BPF_H__ */ 2453#endif /* _UAPI__LINUX_BPF_H__ */