aboutsummaryrefslogtreecommitdiffstats
path: root/tools/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 21:39:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 21:39:49 -0400
commit1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch)
treedcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /tools/include
parent285767604576148fc1be7fcd112e4a90eb0d6ad2 (diff)
parent7170e6045a6a8b33f4fa5753589dc77b16198e2d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
Diffstat (limited to 'tools/include')
-rw-r--r--tools/include/linux/filter.h10
-rw-r--r--tools/include/uapi/asm/bitsperlong.h18
-rw-r--r--tools/include/uapi/asm/errno.h18
-rw-r--r--tools/include/uapi/linux/bpf.h2220
-rw-r--r--tools/include/uapi/linux/btf.h113
-rw-r--r--tools/include/uapi/linux/erspan.h52
-rw-r--r--tools/include/uapi/linux/lirc.h217
-rw-r--r--tools/include/uapi/linux/seg6.h55
-rw-r--r--tools/include/uapi/linux/seg6_local.h80
9 files changed, 2399 insertions, 384 deletions
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index c5e512da8d8a..af55acf73e75 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -263,6 +263,16 @@
263#define BPF_LD_MAP_FD(DST, MAP_FD) \ 263#define BPF_LD_MAP_FD(DST, MAP_FD) \
264 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) 264 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
265 265
266/* Relative call */
267
268#define BPF_CALL_REL(TGT) \
269 ((struct bpf_insn) { \
270 .code = BPF_JMP | BPF_CALL, \
271 .dst_reg = 0, \
272 .src_reg = BPF_PSEUDO_CALL, \
273 .off = 0, \
274 .imm = TGT })
275
266/* Program exit */ 276/* Program exit */
267 277
268#define BPF_EXIT_INSN() \ 278#define BPF_EXIT_INSN() \
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..8dd6aefdafa4
--- /dev/null
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,18 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#if defined(__i386__) || defined(__x86_64__)
3#include "../../arch/x86/include/uapi/asm/bitsperlong.h"
4#elif defined(__aarch64__)
5#include "../../arch/arm64/include/uapi/asm/bitsperlong.h"
6#elif defined(__powerpc__)
7#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h"
8#elif defined(__s390__)
9#include "../../arch/s390/include/uapi/asm/bitsperlong.h"
10#elif defined(__sparc__)
11#include "../../arch/sparc/include/uapi/asm/bitsperlong.h"
12#elif defined(__mips__)
13#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
14#elif defined(__ia64__)
15#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
16#else
17#include <asm-generic/bitsperlong.h>
18#endif
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
new file mode 100644
index 000000000000..ce3c5945a1c4
--- /dev/null
+++ b/tools/include/uapi/asm/errno.h
@@ -0,0 +1,18 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#if defined(__i386__) || defined(__x86_64__)
3#include "../../arch/x86/include/uapi/asm/errno.h"
4#elif defined(__powerpc__)
5#include "../../arch/powerpc/include/uapi/asm/errno.h"
6#elif defined(__sparc__)
7#include "../../arch/sparc/include/uapi/asm/errno.h"
8#elif defined(__alpha__)
9#include "../../arch/alpha/include/uapi/asm/errno.h"
10#elif defined(__mips__)
11#include "../../arch/mips/include/uapi/asm/errno.h"
12#elif defined(__ia64__)
13#include "../../arch/ia64/include/uapi/asm/errno.h"
14#elif defined(__xtensa__)
15#include "../../arch/xtensa/include/uapi/asm/errno.h"
16#else
17#include <asm-generic/errno.h>
18#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8c317737ba3f..e0b06784f227 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -95,6 +95,9 @@ enum bpf_cmd {
95 BPF_OBJ_GET_INFO_BY_FD, 95 BPF_OBJ_GET_INFO_BY_FD,
96 BPF_PROG_QUERY, 96 BPF_PROG_QUERY,
97 BPF_RAW_TRACEPOINT_OPEN, 97 BPF_RAW_TRACEPOINT_OPEN,
98 BPF_BTF_LOAD,
99 BPF_BTF_GET_FD_BY_ID,
100 BPF_TASK_FD_QUERY,
98}; 101};
99 102
100enum bpf_map_type { 103enum bpf_map_type {
@@ -115,6 +118,8 @@ enum bpf_map_type {
115 BPF_MAP_TYPE_DEVMAP, 118 BPF_MAP_TYPE_DEVMAP,
116 BPF_MAP_TYPE_SOCKMAP, 119 BPF_MAP_TYPE_SOCKMAP,
117 BPF_MAP_TYPE_CPUMAP, 120 BPF_MAP_TYPE_CPUMAP,
121 BPF_MAP_TYPE_XSKMAP,
122 BPF_MAP_TYPE_SOCKHASH,
118}; 123};
119 124
120enum bpf_prog_type { 125enum bpf_prog_type {
@@ -137,6 +142,8 @@ enum bpf_prog_type {
137 BPF_PROG_TYPE_SK_MSG, 142 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT, 143 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR, 144 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
145 BPF_PROG_TYPE_LWT_SEG6LOCAL,
146 BPF_PROG_TYPE_LIRC_MODE2,
140}; 147};
141 148
142enum bpf_attach_type { 149enum bpf_attach_type {
@@ -154,6 +161,9 @@ enum bpf_attach_type {
154 BPF_CGROUP_INET6_CONNECT, 161 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND, 162 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND, 163 BPF_CGROUP_INET6_POST_BIND,
164 BPF_CGROUP_UDP4_SENDMSG,
165 BPF_CGROUP_UDP6_SENDMSG,
166 BPF_LIRC_MODE2,
157 __MAX_BPF_ATTACH_TYPE 167 __MAX_BPF_ATTACH_TYPE
158}; 168};
159 169
@@ -279,6 +289,9 @@ union bpf_attr {
279 */ 289 */
280 char map_name[BPF_OBJ_NAME_LEN]; 290 char map_name[BPF_OBJ_NAME_LEN];
281 __u32 map_ifindex; /* ifindex of netdev to create on */ 291 __u32 map_ifindex; /* ifindex of netdev to create on */
292 __u32 btf_fd; /* fd pointing to a BTF type data */
293 __u32 btf_key_type_id; /* BTF type_id of the key */
294 __u32 btf_value_type_id; /* BTF type_id of the value */
282 }; 295 };
283 296
284 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 297 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -339,6 +352,7 @@ union bpf_attr {
339 __u32 start_id; 352 __u32 start_id;
340 __u32 prog_id; 353 __u32 prog_id;
341 __u32 map_id; 354 __u32 map_id;
355 __u32 btf_id;
342 }; 356 };
343 __u32 next_id; 357 __u32 next_id;
344 __u32 open_flags; 358 __u32 open_flags;
@@ -363,398 +377,1704 @@ union bpf_attr {
363 __u64 name; 377 __u64 name;
364 __u32 prog_fd; 378 __u32 prog_fd;
365 } raw_tracepoint; 379 } raw_tracepoint;
380
381 struct { /* anonymous struct for BPF_BTF_LOAD */
382 __aligned_u64 btf;
383 __aligned_u64 btf_log_buf;
384 __u32 btf_size;
385 __u32 btf_log_size;
386 __u32 btf_log_level;
387 };
388
389 struct {
390 __u32 pid; /* input: pid */
391 __u32 fd; /* input: fd */
392 __u32 flags; /* input: flags */
393 __u32 buf_len; /* input/output: buf len */
394 __aligned_u64 buf; /* input/output:
395 * tp_name for tracepoint
396 * symbol for kprobe
397 * filename for uprobe
398 */
399 __u32 prog_id; /* output: prod_id */
400 __u32 fd_type; /* output: BPF_FD_TYPE_* */
401 __u64 probe_offset; /* output: probe_offset */
402 __u64 probe_addr; /* output: probe_addr */
403 } task_fd_query;
366} __attribute__((aligned(8))); 404} __attribute__((aligned(8)));
367 405
368/* BPF helper function descriptions: 406/* The description below is an attempt at providing documentation to eBPF
369 * 407 * developers about the multiple available eBPF helper functions. It can be
370 * void *bpf_map_lookup_elem(&map, &key) 408 * parsed and used to produce a manual page. The workflow is the following,
371 * Return: Map value or NULL 409 * and requires the rst2man utility:
372 * 410 *
373 * int bpf_map_update_elem(&map, &key, &value, flags) 411 * $ ./scripts/bpf_helpers_doc.py \
374 * Return: 0 on success or negative error 412 * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
375 * 413 * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
376 * int bpf_map_delete_elem(&map, &key) 414 * $ man /tmp/bpf-helpers.7
377 * Return: 0 on success or negative error 415 *
378 * 416 * Note that in order to produce this external documentation, some RST
379 * int bpf_probe_read(void *dst, int size, void *src) 417 * formatting is used in the descriptions to get "bold" and "italics" in
380 * Return: 0 on success or negative error 418 * manual pages. Also note that the few trailing white spaces are
419 * intentional, removing them would break paragraphs for rst2man.
420 *
421 * Start of BPF helper function descriptions:
422 *
423 * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
424 * Description
425 * Perform a lookup in *map* for an entry associated to *key*.
426 * Return
427 * Map value associated to *key*, or **NULL** if no entry was
428 * found.
429 *
430 * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
431 * Description
432 * Add or update the value of the entry associated to *key* in
433 * *map* with *value*. *flags* is one of:
434 *
435 * **BPF_NOEXIST**
436 * The entry for *key* must not exist in the map.
437 * **BPF_EXIST**
438 * The entry for *key* must already exist in the map.
439 * **BPF_ANY**
440 * No condition on the existence of the entry for *key*.
441 *
442 * Flag value **BPF_NOEXIST** cannot be used for maps of types
443 * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
444 * elements always exist), the helper would return an error.
445 * Return
446 * 0 on success, or a negative error in case of failure.
447 *
448 * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
449 * Description
450 * Delete entry with *key* from *map*.
451 * Return
452 * 0 on success, or a negative error in case of failure.
453 *
454 * int bpf_probe_read(void *dst, u32 size, const void *src)
455 * Description
456 * For tracing programs, safely attempt to read *size* bytes from
457 * address *src* and store the data in *dst*.
458 * Return
459 * 0 on success, or a negative error in case of failure.
381 * 460 *
382 * u64 bpf_ktime_get_ns(void) 461 * u64 bpf_ktime_get_ns(void)
383 * Return: current ktime 462 * Description
384 * 463 * Return the time elapsed since system boot, in nanoseconds.
385 * int bpf_trace_printk(const char *fmt, int fmt_size, ...) 464 * Return
386 * Return: length of buffer written or negative error 465 * Current *ktime*.
387 * 466 *
388 * u32 bpf_prandom_u32(void) 467 * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
389 * Return: random value 468 * Description
390 * 469 * This helper is a "printk()-like" facility for debugging. It
391 * u32 bpf_raw_smp_processor_id(void) 470 * prints a message defined by format *fmt* (of size *fmt_size*)
392 * Return: SMP processor ID 471 * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
393 * 472 * available. It can take up to three additional **u64**
394 * int bpf_skb_store_bytes(skb, offset, from, len, flags) 473 * arguments (as an eBPF helpers, the total number of arguments is
395 * store bytes into packet 474 * limited to five).
396 * @skb: pointer to skb 475 *
397 * @offset: offset within packet from skb->mac_header 476 * Each time the helper is called, it appends a line to the trace.
398 * @from: pointer where to copy bytes from 477 * The format of the trace is customizable, and the exact output
399 * @len: number of bytes to store into packet 478 * one will get depends on the options set in
400 * @flags: bit 0 - if true, recompute skb->csum 479 * *\/sys/kernel/debug/tracing/trace_options* (see also the
401 * other bits - reserved 480 * *README* file under the same directory). However, it usually
402 * Return: 0 on success or negative error 481 * defaults to something like:
403 * 482 *
404 * int bpf_l3_csum_replace(skb, offset, from, to, flags) 483 * ::
405 * recompute IP checksum 484 *
406 * @skb: pointer to skb 485 * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
407 * @offset: offset within packet where IP checksum is located 486 *
408 * @from: old value of header field 487 * In the above:
409 * @to: new value of header field 488 *
410 * @flags: bits 0-3 - size of header field 489 * * ``telnet`` is the name of the current task.
411 * other bits - reserved 490 * * ``470`` is the PID of the current task.
412 * Return: 0 on success or negative error 491 * * ``001`` is the CPU number on which the task is
413 * 492 * running.
414 * int bpf_l4_csum_replace(skb, offset, from, to, flags) 493 * * In ``.N..``, each character refers to a set of
415 * recompute TCP/UDP checksum 494 * options (whether irqs are enabled, scheduling
416 * @skb: pointer to skb 495 * options, whether hard/softirqs are running, level of
417 * @offset: offset within packet where TCP/UDP checksum is located 496 * preempt_disabled respectively). **N** means that
418 * @from: old value of header field 497 * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
419 * @to: new value of header field 498 * are set.
420 * @flags: bits 0-3 - size of header field 499 * * ``419421.045894`` is a timestamp.
421 * bit 4 - is pseudo header 500 * * ``0x00000001`` is a fake value used by BPF for the
422 * other bits - reserved 501 * instruction pointer register.
423 * Return: 0 on success or negative error 502 * * ``<formatted msg>`` is the message formatted with
424 * 503 * *fmt*.
425 * int bpf_tail_call(ctx, prog_array_map, index) 504 *
426 * jump into another BPF program 505 * The conversion specifiers supported by *fmt* are similar, but
427 * @ctx: context pointer passed to next program 506 * more limited than for printk(). They are **%d**, **%i**,
428 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY 507 * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
429 * @index: 32-bit index inside array that selects specific program to run 508 * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
430 * Return: 0 on success or negative error 509 * of field, padding with zeroes, etc.) is available, and the
431 * 510 * helper will return **-EINVAL** (but print nothing) if it
432 * int bpf_clone_redirect(skb, ifindex, flags) 511 * encounters an unknown specifier.
433 * redirect to another netdev 512 *
434 * @skb: pointer to skb 513 * Also, note that **bpf_trace_printk**\ () is slow, and should
435 * @ifindex: ifindex of the net device 514 * only be used for debugging purposes. For this reason, a notice
436 * @flags: bit 0 - if set, redirect to ingress instead of egress 515 * bloc (spanning several lines) is printed to kernel logs and
437 * other bits - reserved 516 * states that the helper should not be used "for production use"
438 * Return: 0 on success or negative error 517 * the first time this helper is used (or more precisely, when
518 * **trace_printk**\ () buffers are allocated). For passing values
519 * to user space, perf events should be preferred.
520 * Return
521 * The number of bytes written to the buffer, or a negative error
522 * in case of failure.
523 *
524 * u32 bpf_get_prandom_u32(void)
525 * Description
526 * Get a pseudo-random number.
527 *
528 * From a security point of view, this helper uses its own
529 * pseudo-random internal state, and cannot be used to infer the
530 * seed of other random functions in the kernel. However, it is
531 * essential to note that the generator used by the helper is not
532 * cryptographically secure.
533 * Return
534 * A random 32-bit unsigned value.
535 *
536 * u32 bpf_get_smp_processor_id(void)
537 * Description
538 * Get the SMP (symmetric multiprocessing) processor id. Note that
539 * all programs run with preemption disabled, which means that the
540 * SMP processor id is stable during all the execution of the
541 * program.
542 * Return
543 * The SMP id of the processor running the program.
544 *
545 * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
546 * Description
547 * Store *len* bytes from address *from* into the packet
548 * associated to *skb*, at *offset*. *flags* are a combination of
549 * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
550 * checksum for the packet after storing the bytes) and
551 * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
552 * **->swhash** and *skb*\ **->l4hash** to 0).
553 *
554 * A call to this helper is susceptible to change the underlaying
555 * packet buffer. Therefore, at load time, all checks on pointers
556 * previously done by the verifier are invalidated and must be
557 * performed again, if the helper is used in combination with
558 * direct packet access.
559 * Return
560 * 0 on success, or a negative error in case of failure.
561 *
562 * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
563 * Description
564 * Recompute the layer 3 (e.g. IP) checksum for the packet
565 * associated to *skb*. Computation is incremental, so the helper
566 * must know the former value of the header field that was
567 * modified (*from*), the new value of this field (*to*), and the
568 * number of bytes (2 or 4) for this field, stored in *size*.
569 * Alternatively, it is possible to store the difference between
570 * the previous and the new values of the header field in *to*, by
571 * setting *from* and *size* to 0. For both methods, *offset*
572 * indicates the location of the IP checksum within the packet.
573 *
574 * This helper works in combination with **bpf_csum_diff**\ (),
575 * which does not update the checksum in-place, but offers more
576 * flexibility and can handle sizes larger than 2 or 4 for the
577 * checksum to update.
578 *
579 * A call to this helper is susceptible to change the underlaying
580 * packet buffer. Therefore, at load time, all checks on pointers
581 * previously done by the verifier are invalidated and must be
582 * performed again, if the helper is used in combination with
583 * direct packet access.
584 * Return
585 * 0 on success, or a negative error in case of failure.
586 *
587 * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
588 * Description
589 * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
590 * packet associated to *skb*. Computation is incremental, so the
591 * helper must know the former value of the header field that was
592 * modified (*from*), the new value of this field (*to*), and the
593 * number of bytes (2 or 4) for this field, stored on the lowest
594 * four bits of *flags*. Alternatively, it is possible to store
595 * the difference between the previous and the new values of the
596 * header field in *to*, by setting *from* and the four lowest
597 * bits of *flags* to 0. For both methods, *offset* indicates the
598 * location of the IP checksum within the packet. In addition to
599 * the size of the field, *flags* can be added (bitwise OR) actual
600 * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
601 * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
602 * for updates resulting in a null checksum the value is set to
603 * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
604 * the checksum is to be computed against a pseudo-header.
605 *
606 * This helper works in combination with **bpf_csum_diff**\ (),
607 * which does not update the checksum in-place, but offers more
608 * flexibility and can handle sizes larger than 2 or 4 for the
609 * checksum to update.
610 *
611 * A call to this helper is susceptible to change the underlaying
612 * packet buffer. Therefore, at load time, all checks on pointers
613 * previously done by the verifier are invalidated and must be
614 * performed again, if the helper is used in combination with
615 * direct packet access.
616 * Return
617 * 0 on success, or a negative error in case of failure.
618 *
619 * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
620 * Description
621 * This special helper is used to trigger a "tail call", or in
622 * other words, to jump into another eBPF program. The same stack
623 * frame is used (but values on stack and in registers for the
624 * caller are not accessible to the callee). This mechanism allows
625 * for program chaining, either for raising the maximum number of
626 * available eBPF instructions, or to execute given programs in
627 * conditional blocks. For security reasons, there is an upper
628 * limit to the number of successive tail calls that can be
629 * performed.
630 *
631 * Upon call of this helper, the program attempts to jump into a
632 * program referenced at index *index* in *prog_array_map*, a
633 * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
634 * *ctx*, a pointer to the context.
635 *
636 * If the call succeeds, the kernel immediately runs the first
637 * instruction of the new program. This is not a function call,
638 * and it never returns to the previous program. If the call
639 * fails, then the helper has no effect, and the caller continues
640 * to run its subsequent instructions. A call can fail if the
641 * destination program for the jump does not exist (i.e. *index*
642 * is superior to the number of entries in *prog_array_map*), or
643 * if the maximum number of tail calls has been reached for this
644 * chain of programs. This limit is defined in the kernel by the
645 * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
646 * which is currently set to 32.
647 * Return
648 * 0 on success, or a negative error in case of failure.
649 *
650 * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
651 * Description
652 * Clone and redirect the packet associated to *skb* to another
653 * net device of index *ifindex*. Both ingress and egress
654 * interfaces can be used for redirection. The **BPF_F_INGRESS**
655 * value in *flags* is used to make the distinction (ingress path
656 * is selected if the flag is present, egress path otherwise).
657 * This is the only flag supported for now.
658 *
659 * In comparison with **bpf_redirect**\ () helper,
660 * **bpf_clone_redirect**\ () has the associated cost of
661 * duplicating the packet buffer, but this can be executed out of
662 * the eBPF program. Conversely, **bpf_redirect**\ () is more
663 * efficient, but it is handled through an action code where the
664 * redirection happens only after the eBPF program has returned.
665 *
666 * A call to this helper is susceptible to change the underlaying
667 * packet buffer. Therefore, at load time, all checks on pointers
668 * previously done by the verifier are invalidated and must be
669 * performed again, if the helper is used in combination with
670 * direct packet access.
671 * Return
672 * 0 on success, or a negative error in case of failure.
439 * 673 *
440 * u64 bpf_get_current_pid_tgid(void) 674 * u64 bpf_get_current_pid_tgid(void)
441 * Return: current->tgid << 32 | current->pid 675 * Return
676 * A 64-bit integer containing the current tgid and pid, and
677 * created as such:
678 * *current_task*\ **->tgid << 32 \|**
679 * *current_task*\ **->pid**.
442 * 680 *
443 * u64 bpf_get_current_uid_gid(void) 681 * u64 bpf_get_current_uid_gid(void)
444 * Return: current_gid << 32 | current_uid 682 * Return
445 * 683 * A 64-bit integer containing the current GID and UID, and
446 * int bpf_get_current_comm(char *buf, int size_of_buf) 684 * created as such: *current_gid* **<< 32 \|** *current_uid*.
447 * stores current->comm into buf 685 *
448 * Return: 0 on success or negative error 686 * int bpf_get_current_comm(char *buf, u32 size_of_buf)
449 * 687 * Description
450 * u32 bpf_get_cgroup_classid(skb) 688 * Copy the **comm** attribute of the current task into *buf* of
451 * retrieve a proc's classid 689 * *size_of_buf*. The **comm** attribute contains the name of
452 * @skb: pointer to skb 690 * the executable (excluding the path) for the current task. The
453 * Return: classid if != 0 691 * *size_of_buf* must be strictly positive. On success, the
454 * 692 * helper makes sure that the *buf* is NUL-terminated. On failure,
455 * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) 693 * it is filled with zeroes.
456 * Return: 0 on success or negative error 694 * Return
457 * 695 * 0 on success, or a negative error in case of failure.
458 * int bpf_skb_vlan_pop(skb) 696 *
459 * Return: 0 on success or negative error 697 * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
460 * 698 * Description
461 * int bpf_skb_get_tunnel_key(skb, key, size, flags) 699 * Retrieve the classid for the current task, i.e. for the net_cls
462 * int bpf_skb_set_tunnel_key(skb, key, size, flags) 700 * cgroup to which *skb* belongs.
463 * retrieve or populate tunnel metadata 701 *
464 * @skb: pointer to skb 702 * This helper can be used on TC egress path, but not on ingress.
465 * @key: pointer to 'struct bpf_tunnel_key' 703 *
466 * @size: size of 'struct bpf_tunnel_key' 704 * The net_cls cgroup provides an interface to tag network packets
467 * @flags: room for future extensions 705 * based on a user-provided identifier for all traffic coming from
468 * Return: 0 on success or negative error 706 * the tasks belonging to the related cgroup. See also the related
469 * 707 * kernel documentation, available from the Linux sources in file
470 * u64 bpf_perf_event_read(map, flags) 708 * *Documentation/cgroup-v1/net_cls.txt*.
471 * read perf event counter value 709 *
472 * @map: pointer to perf_event_array map 710 * The Linux kernel has two versions for cgroups: there are
473 * @flags: index of event in the map or bitmask flags 711 * cgroups v1 and cgroups v2. Both are available to users, who can
474 * Return: value of perf event counter read or error code 712 * use a mixture of them, but note that the net_cls cgroup is for
475 * 713 * cgroup v1 only. This makes it incompatible with BPF programs
476 * int bpf_redirect(ifindex, flags) 714 * run on cgroups, which is a cgroup-v2-only feature (a socket can
477 * redirect to another netdev 715 * only hold data for one version of cgroups at a time).
478 * @ifindex: ifindex of the net device 716 *
479 * @flags: 717 * This helper is only available is the kernel was compiled with
480 * cls_bpf: 718 * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
481 * bit 0 - if set, redirect to ingress instead of egress 719 * "**y**" or to "**m**".
482 * other bits - reserved 720 * Return
483 * xdp_bpf: 721 * The classid, or 0 for the default unconfigured classid.
484 * all bits - reserved 722 *
485 * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error 723 * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
486 * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error 724 * Description
487 * int bpf_redirect_map(map, key, flags) 725 * Push a *vlan_tci* (VLAN tag control information) of protocol
488 * redirect to endpoint in map 726 * *vlan_proto* to the packet associated to *skb*, then update
489 * @map: pointer to dev map 727 * the checksum. Note that if *vlan_proto* is different from
490 * @key: index in map to lookup 728 * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
491 * @flags: -- 729 * be **ETH_P_8021Q**.
492 * Return: XDP_REDIRECT on success or XDP_ABORT on error 730 *
493 * 731 * A call to this helper is susceptible to change the underlaying
494 * u32 bpf_get_route_realm(skb) 732 * packet buffer. Therefore, at load time, all checks on pointers
495 * retrieve a dst's tclassid 733 * previously done by the verifier are invalidated and must be
496 * @skb: pointer to skb 734 * performed again, if the helper is used in combination with
497 * Return: realm if != 0 735 * direct packet access.
498 * 736 * Return
499 * int bpf_perf_event_output(ctx, map, flags, data, size) 737 * 0 on success, or a negative error in case of failure.
500 * output perf raw sample 738 *
501 * @ctx: struct pt_regs* 739 * int bpf_skb_vlan_pop(struct sk_buff *skb)
502 * @map: pointer to perf_event_array map 740 * Description
503 * @flags: index of event in the map or bitmask flags 741 * Pop a VLAN header from the packet associated to *skb*.
504 * @data: data on stack to be output as raw data 742 *
505 * @size: size of data 743 * A call to this helper is susceptible to change the underlaying
506 * Return: 0 on success or negative error 744 * packet buffer. Therefore, at load time, all checks on pointers
507 * 745 * previously done by the verifier are invalidated and must be
508 * int bpf_get_stackid(ctx, map, flags) 746 * performed again, if the helper is used in combination with
509 * walk user or kernel stack and return id 747 * direct packet access.
510 * @ctx: struct pt_regs* 748 * Return
511 * @map: pointer to stack_trace map 749 * 0 on success, or a negative error in case of failure.
512 * @flags: bits 0-7 - numer of stack frames to skip 750 *
513 * bit 8 - collect user stack instead of kernel 751 * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
514 * bit 9 - compare stacks by hash only 752 * Description
515 * bit 10 - if two different stacks hash into the same stackid 753 * Get tunnel metadata. This helper takes a pointer *key* to an
516 * discard old 754 * empty **struct bpf_tunnel_key** of **size**, that will be
517 * other bits - reserved 755 * filled with tunnel metadata for the packet associated to *skb*.
518 * Return: >= 0 stackid on success or negative error 756 * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
519 * 757 * indicates that the tunnel is based on IPv6 protocol instead of
520 * s64 bpf_csum_diff(from, from_size, to, to_size, seed) 758 * IPv4.
521 * calculate csum diff 759 *
522 * @from: raw from buffer 760 * The **struct bpf_tunnel_key** is an object that generalizes the
523 * @from_size: length of from buffer 761 * principal parameters used by various tunneling protocols into a
524 * @to: raw to buffer 762 * single struct. This way, it can be used to easily make a
525 * @to_size: length of to buffer 763 * decision based on the contents of the encapsulation header,
526 * @seed: optional seed 764 * "summarized" in this struct. In particular, it holds the IP
527 * Return: csum result or negative error code 765 * address of the remote end (IPv4 or IPv6, depending on the case)
528 * 766 * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
529 * int bpf_skb_get_tunnel_opt(skb, opt, size) 767 * this struct exposes the *key*\ **->tunnel_id**, which is
530 * retrieve tunnel options metadata 768 * generally mapped to a VNI (Virtual Network Identifier), making
531 * @skb: pointer to skb 769 * it programmable together with the **bpf_skb_set_tunnel_key**\
532 * @opt: pointer to raw tunnel option data 770 * () helper.
533 * @size: size of @opt 771 *
534 * Return: option size 772 * Let's imagine that the following code is part of a program
535 * 773 * attached to the TC ingress interface, on one end of a GRE
536 * int bpf_skb_set_tunnel_opt(skb, opt, size) 774 * tunnel, and is supposed to filter out all messages coming from
537 * populate tunnel options metadata 775 * remote ends with IPv4 address other than 10.0.0.1:
538 * @skb: pointer to skb 776 *
539 * @opt: pointer to raw tunnel option data 777 * ::
540 * @size: size of @opt 778 *
541 * Return: 0 on success or negative error 779 * int ret;
542 * 780 * struct bpf_tunnel_key key = {};
543 * int bpf_skb_change_proto(skb, proto, flags) 781 *
544 * Change protocol of the skb. Currently supported is v4 -> v6, 782 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
545 * v6 -> v4 transitions. The helper will also resize the skb. eBPF 783 * if (ret < 0)
546 * program is expected to fill the new headers via skb_store_bytes 784 * return TC_ACT_SHOT; // drop packet
547 * and lX_csum_replace. 785 *
548 * @skb: pointer to skb 786 * if (key.remote_ipv4 != 0x0a000001)
549 * @proto: new skb->protocol type 787 * return TC_ACT_SHOT; // drop packet
550 * @flags: reserved 788 *
551 * Return: 0 on success or negative error 789 * return TC_ACT_OK; // accept packet
552 * 790 *
553 * int bpf_skb_change_type(skb, type) 791 * This interface can also be used with all encapsulation devices
554 * Change packet type of skb. 792 * that can operate in "collect metadata" mode: instead of having
555 * @skb: pointer to skb 793 * one network device per specific configuration, the "collect
556 * @type: new skb->pkt_type type 794 * metadata" mode only requires a single device where the
557 * Return: 0 on success or negative error 795 * configuration can be extracted from this helper.
558 * 796 *
559 * int bpf_skb_under_cgroup(skb, map, index) 797 * This can be used together with various tunnels such as VXLan,
560 * Check cgroup2 membership of skb 798 * Geneve, GRE or IP in IP (IPIP).
561 * @skb: pointer to skb 799 * Return
562 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type 800 * 0 on success, or a negative error in case of failure.
563 * @index: index of the cgroup in the bpf_map 801 *
564 * Return: 802 * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
565 * == 0 skb failed the cgroup2 descendant test 803 * Description
566 * == 1 skb succeeded the cgroup2 descendant test 804 * Populate tunnel metadata for packet associated to *skb.* The
567 * < 0 error 805 * tunnel metadata is set to the contents of *key*, of *size*. The
568 * 806 * *flags* can be set to a combination of the following values:
569 * u32 bpf_get_hash_recalc(skb) 807 *
570 * Retrieve and possibly recalculate skb->hash. 808 * **BPF_F_TUNINFO_IPV6**
571 * @skb: pointer to skb 809 * Indicate that the tunnel is based on IPv6 protocol
572 * Return: hash 810 * instead of IPv4.
811 * **BPF_F_ZERO_CSUM_TX**
812 * For IPv4 packets, add a flag to tunnel metadata
813 * indicating that checksum computation should be skipped
814 * and checksum set to zeroes.
815 * **BPF_F_DONT_FRAGMENT**
816 * Add a flag to tunnel metadata indicating that the
817 * packet should not be fragmented.
818 * **BPF_F_SEQ_NUMBER**
819 * Add a flag to tunnel metadata indicating that a
820 * sequence number should be added to tunnel header before
821 * sending the packet. This flag was added for GRE
822 * encapsulation, but might be used with other protocols
823 * as well in the future.
824 *
825 * Here is a typical usage on the transmit path:
826 *
827 * ::
828 *
829 * struct bpf_tunnel_key key;
830 * populate key ...
831 * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
832 * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
833 *
834 * See also the description of the **bpf_skb_get_tunnel_key**\ ()
835 * helper for additional information.
836 * Return
837 * 0 on success, or a negative error in case of failure.
838 *
839 * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
840 * Description
841 * Read the value of a perf event counter. This helper relies on a
842 * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
843 * the perf event counter is selected when *map* is updated with
844 * perf event file descriptors. The *map* is an array whose size
845 * is the number of available CPUs, and each cell contains a value
846 * relative to one CPU. The value to retrieve is indicated by
847 * *flags*, that contains the index of the CPU to look up, masked
848 * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
849 * **BPF_F_CURRENT_CPU** to indicate that the value for the
850 * current CPU should be retrieved.
851 *
852 * Note that before Linux 4.13, only hardware perf event can be
853 * retrieved.
854 *
855 * Also, be aware that the newer helper
856 * **bpf_perf_event_read_value**\ () is recommended over
857 * **bpf_perf_event_read**\ () in general. The latter has some ABI
858 * quirks where error and counter value are used as a return code
859 * (which is wrong to do since ranges may overlap). This issue is
860 * fixed with **bpf_perf_event_read_value**\ (), which at the same
861 * time provides more features over the **bpf_perf_event_read**\
862 * () interface. Please refer to the description of
863 * **bpf_perf_event_read_value**\ () for details.
864 * Return
865 * The value of the perf event counter read from the map, or a
866 * negative error code in case of failure.
867 *
868 * int bpf_redirect(u32 ifindex, u64 flags)
869 * Description
870 * Redirect the packet to another net device of index *ifindex*.
871 * This helper is somewhat similar to **bpf_clone_redirect**\
872 * (), except that the packet is not cloned, which provides
873 * increased performance.
874 *
875 * Except for XDP, both ingress and egress interfaces can be used
876 * for redirection. The **BPF_F_INGRESS** value in *flags* is used
877 * to make the distinction (ingress path is selected if the flag
878 * is present, egress path otherwise). Currently, XDP only
879 * supports redirection to the egress interface, and accepts no
880 * flag at all.
881 *
882 * The same effect can be attained with the more generic
883 * **bpf_redirect_map**\ (), which requires specific maps to be
884 * used but offers better performance.
885 * Return
886 * For XDP, the helper returns **XDP_REDIRECT** on success or
887 * **XDP_ABORTED** on error. For other program types, the values
888 * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
889 * error.
890 *
891 * u32 bpf_get_route_realm(struct sk_buff *skb)
892 * Description
893 * Retrieve the realm or the route, that is to say the
894 * **tclassid** field of the destination for the *skb*. The
895 * indentifier retrieved is a user-provided tag, similar to the
896 * one used with the net_cls cgroup (see description for
897 * **bpf_get_cgroup_classid**\ () helper), but here this tag is
898 * held by a route (a destination entry), not by a task.
899 *
900 * Retrieving this identifier works with the clsact TC egress hook
901 * (see also **tc-bpf(8)**), or alternatively on conventional
902 * classful egress qdiscs, but not on TC ingress path. In case of
903 * clsact TC egress hook, this has the advantage that, internally,
904 * the destination entry has not been dropped yet in the transmit
905 * path. Therefore, the destination entry does not need to be
906 * artificially held via **netif_keep_dst**\ () for a classful
907 * qdisc until the *skb* is freed.
908 *
909 * This helper is available only if the kernel was compiled with
910 * **CONFIG_IP_ROUTE_CLASSID** configuration option.
911 * Return
912 * The realm of the route for the packet associated to *skb*, or 0
913 * if none was found.
914 *
915 * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
916 * Description
917 * Write raw *data* blob into a special BPF perf event held by
918 * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
919 * event must have the following attributes: **PERF_SAMPLE_RAW**
920 * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
921 * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
922 *
923 * The *flags* are used to indicate the index in *map* for which
924 * the value must be put, masked with **BPF_F_INDEX_MASK**.
925 * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
926 * to indicate that the index of the current CPU core should be
927 * used.
928 *
929 * The value to write, of *size*, is passed through eBPF stack and
930 * pointed by *data*.
931 *
932 * The context of the program *ctx* needs also be passed to the
933 * helper.
934 *
935 * On user space, a program willing to read the values needs to
936 * call **perf_event_open**\ () on the perf event (either for
937 * one or for all CPUs) and to store the file descriptor into the
938 * *map*. This must be done before the eBPF program can send data
939 * into it. An example is available in file
940 * *samples/bpf/trace_output_user.c* in the Linux kernel source
941 * tree (the eBPF program counterpart is in
942 * *samples/bpf/trace_output_kern.c*).
943 *
944 * **bpf_perf_event_output**\ () achieves better performance
945 * than **bpf_trace_printk**\ () for sharing data with user
946 * space, and is much better suitable for streaming data from eBPF
947 * programs.
948 *
949 * Note that this helper is not restricted to tracing use cases
950 * and can be used with programs attached to TC or XDP as well,
951 * where it allows for passing data to user space listeners. Data
952 * can be:
953 *
954 * * Only custom structs,
955 * * Only the packet payload, or
956 * * A combination of both.
957 * Return
958 * 0 on success, or a negative error in case of failure.
959 *
960 * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
961 * Description
962 * This helper was provided as an easy way to load data from a
963 * packet. It can be used to load *len* bytes from *offset* from
964 * the packet associated to *skb*, into the buffer pointed by
965 * *to*.
966 *
967 * Since Linux 4.7, usage of this helper has mostly been replaced
968 * by "direct packet access", enabling packet data to be
969 * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
970 * pointing respectively to the first byte of packet data and to
971 * the byte after the last byte of packet data. However, it
972 * remains useful if one wishes to read large quantities of data
973 * at once from a packet into the eBPF stack.
974 * Return
975 * 0 on success, or a negative error in case of failure.
976 *
977 * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
978 * Description
979 * Walk a user or a kernel stack and return its id. To achieve
980 * this, the helper needs *ctx*, which is a pointer to the context
981 * on which the tracing program is executed, and a pointer to a
982 * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
983 *
984 * The last argument, *flags*, holds the number of stack frames to
985 * skip (from 0 to 255), masked with
986 * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
987 * a combination of the following flags:
988 *
989 * **BPF_F_USER_STACK**
990 * Collect a user space stack instead of a kernel stack.
991 * **BPF_F_FAST_STACK_CMP**
992 * Compare stacks by hash only.
993 * **BPF_F_REUSE_STACKID**
994 * If two different stacks hash into the same *stackid*,
995 * discard the old one.
996 *
997 * The stack id retrieved is a 32 bit long integer handle which
998 * can be further combined with other data (including other stack
999 * ids) and used as a key into maps. This can be useful for
1000 * generating a variety of graphs (such as flame graphs or off-cpu
1001 * graphs).
1002 *
1003 * For walking a stack, this helper is an improvement over
1004 * **bpf_probe_read**\ (), which can be used with unrolled loops
1005 * but is not efficient and consumes a lot of eBPF instructions.
1006 * Instead, **bpf_get_stackid**\ () can collect up to
1007 * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
1008 * this limit can be controlled with the **sysctl** program, and
1009 * that it should be manually increased in order to profile long
1010 * user stacks (such as stacks for Java programs). To do so, use:
1011 *
1012 * ::
1013 *
1014 * # sysctl kernel.perf_event_max_stack=<new value>
1015 * Return
1016 * The positive or null stack id on success, or a negative error
1017 * in case of failure.
1018 *
1019 * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
1020 * Description
1021 * Compute a checksum difference, from the raw buffer pointed by
1022 * *from*, of length *from_size* (that must be a multiple of 4),
1023 * towards the raw buffer pointed by *to*, of size *to_size*
1024 * (same remark). An optional *seed* can be added to the value
1025 * (this can be cascaded, the seed may come from a previous call
1026 * to the helper).
1027 *
1028 * This is flexible enough to be used in several ways:
1029 *
1030 * * With *from_size* == 0, *to_size* > 0 and *seed* set to
1031 * checksum, it can be used when pushing new data.
1032 * * With *from_size* > 0, *to_size* == 0 and *seed* set to
1033 * checksum, it can be used when removing data from a packet.
1034 * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
1035 * can be used to compute a diff. Note that *from_size* and
1036 * *to_size* do not need to be equal.
1037 *
1038 * This helper can be used in combination with
1039 * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
1040 * which one can feed in the difference computed with
1041 * **bpf_csum_diff**\ ().
1042 * Return
1043 * The checksum result, or a negative error code in case of
1044 * failure.
1045 *
1046 * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1047 * Description
1048 * Retrieve tunnel options metadata for the packet associated to
1049 * *skb*, and store the raw tunnel option data to the buffer *opt*
1050 * of *size*.
1051 *
1052 * This helper can be used with encapsulation devices that can
1053 * operate in "collect metadata" mode (please refer to the related
1054 * note in the description of **bpf_skb_get_tunnel_key**\ () for
1055 * more details). A particular example where this can be used is
1056 * in combination with the Geneve encapsulation protocol, where it
1057 * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
1058 * and retrieving arbitrary TLVs (Type-Length-Value headers) from
1059 * the eBPF program. This allows for full customization of these
1060 * headers.
1061 * Return
1062 * The size of the option data retrieved.
1063 *
1064 * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1065 * Description
1066 * Set tunnel options metadata for the packet associated to *skb*
1067 * to the option data contained in the raw buffer *opt* of *size*.
1068 *
1069 * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
1070 * helper for additional information.
1071 * Return
1072 * 0 on success, or a negative error in case of failure.
1073 *
1074 * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
1075 * Description
1076 * Change the protocol of the *skb* to *proto*. Currently
1077 * supported are transition from IPv4 to IPv6, and from IPv6 to
1078 * IPv4. The helper takes care of the groundwork for the
1079 * transition, including resizing the socket buffer. The eBPF
1080 * program is expected to fill the new headers, if any, via
1081 * **skb_store_bytes**\ () and to recompute the checksums with
1082 * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
1083 * (). The main case for this helper is to perform NAT64
1084 * operations out of an eBPF program.
1085 *
1086 * Internally, the GSO type is marked as dodgy so that headers are
1087 * checked and segments are recalculated by the GSO/GRO engine.
1088 * The size for GSO target is adapted as well.
1089 *
1090 * All values for *flags* are reserved for future usage, and must
1091 * be left at zero.
1092 *
1093 * A call to this helper is susceptible to change the underlaying
1094 * packet buffer. Therefore, at load time, all checks on pointers
1095 * previously done by the verifier are invalidated and must be
1096 * performed again, if the helper is used in combination with
1097 * direct packet access.
1098 * Return
1099 * 0 on success, or a negative error in case of failure.
1100 *
1101 * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
1102 * Description
1103 * Change the packet type for the packet associated to *skb*. This
1104 * comes down to setting *skb*\ **->pkt_type** to *type*, except
1105 * the eBPF program does not have a write access to *skb*\
1106 * **->pkt_type** beside this helper. Using a helper here allows
1107 * for graceful handling of errors.
1108 *
1109 * The major use case is to change incoming *skb*s to
1110 * **PACKET_HOST** in a programmatic way instead of having to
1111 * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
1112 * example.
1113 *
1114 * Note that *type* only allows certain values. At this time, they
1115 * are:
1116 *
1117 * **PACKET_HOST**
1118 * Packet is for us.
1119 * **PACKET_BROADCAST**
1120 * Send packet to all.
1121 * **PACKET_MULTICAST**
1122 * Send packet to group.
1123 * **PACKET_OTHERHOST**
1124 * Send packet to someone else.
1125 * Return
1126 * 0 on success, or a negative error in case of failure.
1127 *
1128 * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
1129 * Description
1130 * Check whether *skb* is a descendant of the cgroup2 held by
1131 * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
1132 * Return
1133 * The return value depends on the result of the test, and can be:
1134 *
1135 * * 0, if the *skb* failed the cgroup2 descendant test.
1136 * * 1, if the *skb* succeeded the cgroup2 descendant test.
1137 * * A negative error code, if an error occurred.
1138 *
1139 * u32 bpf_get_hash_recalc(struct sk_buff *skb)
1140 * Description
1141 * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
1142 * not set, in particular if the hash was cleared due to mangling,
1143 * recompute this hash. Later accesses to the hash can be done
1144 * directly with *skb*\ **->hash**.
1145 *
1146 * Calling **bpf_set_hash_invalid**\ (), changing a packet
1147 * prototype with **bpf_skb_change_proto**\ (), or calling
1148 * **bpf_skb_store_bytes**\ () with the
1149 * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
1150 * the hash and to trigger a new computation for the next call to
1151 * **bpf_get_hash_recalc**\ ().
1152 * Return
1153 * The 32-bit hash.
573 * 1154 *
574 * u64 bpf_get_current_task(void) 1155 * u64 bpf_get_current_task(void)
575 * Returns current task_struct 1156 * Return
576 * Return: current 1157 * A pointer to the current task struct.
577 * 1158 *
578 * int bpf_probe_write_user(void *dst, void *src, int len) 1159 * int bpf_probe_write_user(void *dst, const void *src, u32 len)
579 * safely attempt to write to a location 1160 * Description
580 * @dst: destination address in userspace 1161 * Attempt in a safe way to write *len* bytes from the buffer
581 * @src: source address on stack 1162 * *src* to *dst* in memory. It only works for threads that are in
582 * @len: number of bytes to copy 1163 * user context, and *dst* must be a valid user space address.
583 * Return: 0 on success or negative error 1164 *
584 * 1165 * This helper should not be used to implement any kind of
585 * int bpf_current_task_under_cgroup(map, index) 1166 * security mechanism because of TOC-TOU attacks, but rather to
586 * Check cgroup2 membership of current task 1167 * debug, divert, and manipulate execution of semi-cooperative
587 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type 1168 * processes.
588 * @index: index of the cgroup in the bpf_map 1169 *
589 * Return: 1170 * Keep in mind that this feature is meant for experiments, and it
590 * == 0 current failed the cgroup2 descendant test 1171 * has a risk of crashing the system and running programs.
591 * == 1 current succeeded the cgroup2 descendant test 1172 * Therefore, when an eBPF program using this helper is attached,
592 * < 0 error 1173 * a warning including PID and process name is printed to kernel
593 * 1174 * logs.
594 * int bpf_skb_change_tail(skb, len, flags) 1175 * Return
595 * The helper will resize the skb to the given new size, to be used f.e. 1176 * 0 on success, or a negative error in case of failure.
596 * with control messages. 1177 *
597 * @skb: pointer to skb 1178 * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
598 * @len: new skb length 1179 * Description
599 * @flags: reserved 1180 * Check whether the probe is being run is the context of a given
600 * Return: 0 on success or negative error 1181 * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
601 * 1182 * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
602 * int bpf_skb_pull_data(skb, len) 1183 * Return
603 * The helper will pull in non-linear data in case the skb is non-linear 1184 * The return value depends on the result of the test, and can be:
604 * and not all of len are part of the linear section. Only needed for 1185 *
605 * read/write with direct packet access. 1186 * * 0, if the *skb* task belongs to the cgroup2.
606 * @skb: pointer to skb 1187 * * 1, if the *skb* task does not belong to the cgroup2.
607 * @len: len to make read/writeable 1188 * * A negative error code, if an error occurred.
608 * Return: 0 on success or negative error 1189 *
609 * 1190 * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
610 * s64 bpf_csum_update(skb, csum) 1191 * Description
611 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. 1192 * Resize (trim or grow) the packet associated to *skb* to the
612 * @skb: pointer to skb 1193 * new *len*. The *flags* are reserved for future usage, and must
613 * @csum: csum to add 1194 * be left at zero.
614 * Return: csum on success or negative error 1195 *
615 * 1196 * The basic idea is that the helper performs the needed work to
616 * void bpf_set_hash_invalid(skb) 1197 * change the size of the packet, then the eBPF program rewrites
617 * Invalidate current skb->hash. 1198 * the rest via helpers like **bpf_skb_store_bytes**\ (),
618 * @skb: pointer to skb 1199 * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
619 * 1200 * and others. This helper is a slow path utility intended for
620 * int bpf_get_numa_node_id() 1201 * replies with control messages. And because it is targeted for
621 * Return: Id of current NUMA node. 1202 * slow path, the helper itself can afford to be slow: it
622 * 1203 * implicitly linearizes, unclones and drops offloads from the
623 * int bpf_skb_change_head() 1204 * *skb*.
624 * Grows headroom of skb and adjusts MAC header offset accordingly. 1205 *
625 * Will extends/reallocae as required automatically. 1206 * A call to this helper is susceptible to change the underlaying
626 * May change skb data pointer and will thus invalidate any check 1207 * packet buffer. Therefore, at load time, all checks on pointers
627 * performed for direct packet access. 1208 * previously done by the verifier are invalidated and must be
628 * @skb: pointer to skb 1209 * performed again, if the helper is used in combination with
629 * @len: length of header to be pushed in front 1210 * direct packet access.
630 * @flags: Flags (unused for now) 1211 * Return
631 * Return: 0 on success or negative error 1212 * 0 on success, or a negative error in case of failure.
632 * 1213 *
633 * int bpf_xdp_adjust_head(xdp_md, delta) 1214 * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
634 * Adjust the xdp_md.data by delta 1215 * Description
635 * @xdp_md: pointer to xdp_md 1216 * Pull in non-linear data in case the *skb* is non-linear and not
636 * @delta: An positive/negative integer to be added to xdp_md.data 1217 * all of *len* are part of the linear section. Make *len* bytes
637 * Return: 0 on success or negative on error 1218 * from *skb* readable and writable. If a zero value is passed for
1219 * *len*, then the whole length of the *skb* is pulled.
1220 *
1221 * This helper is only needed for reading and writing with direct
1222 * packet access.
1223 *
1224 * For direct packet access, testing that offsets to access
1225 * are within packet boundaries (test on *skb*\ **->data_end**) is
1226 * susceptible to fail if offsets are invalid, or if the requested
1227 * data is in non-linear parts of the *skb*. On failure the
1228 * program can just bail out, or in the case of a non-linear
1229 * buffer, use a helper to make the data available. The
1230 * **bpf_skb_load_bytes**\ () helper is a first solution to access
1231 * the data. Another one consists in using **bpf_skb_pull_data**
1232 * to pull in once the non-linear parts, then retesting and
1233 * eventually access the data.
1234 *
1235 * At the same time, this also makes sure the *skb* is uncloned,
1236 * which is a necessary condition for direct write. As this needs
1237 * to be an invariant for the write part only, the verifier
1238 * detects writes and adds a prologue that is calling
1239 * **bpf_skb_pull_data()** to effectively unclone the *skb* from
1240 * the very beginning in case it is indeed cloned.
1241 *
1242 * A call to this helper is susceptible to change the underlaying
1243 * packet buffer. Therefore, at load time, all checks on pointers
1244 * previously done by the verifier are invalidated and must be
1245 * performed again, if the helper is used in combination with
1246 * direct packet access.
1247 * Return
1248 * 0 on success, or a negative error in case of failure.
1249 *
1250 * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
1251 * Description
1252 * Add the checksum *csum* into *skb*\ **->csum** in case the
1253 * driver has supplied a checksum for the entire packet into that
1254 * field. Return an error otherwise. This helper is intended to be
1255 * used in combination with **bpf_csum_diff**\ (), in particular
1256 * when the checksum needs to be updated after data has been
1257 * written into the packet through direct packet access.
1258 * Return
1259 * The checksum on success, or a negative error code in case of
1260 * failure.
1261 *
1262 * void bpf_set_hash_invalid(struct sk_buff *skb)
1263 * Description
1264 * Invalidate the current *skb*\ **->hash**. It can be used after
1265 * mangling on headers through direct packet access, in order to
1266 * indicate that the hash is outdated and to trigger a
1267 * recalculation the next time the kernel tries to access this
1268 * hash or when the **bpf_get_hash_recalc**\ () helper is called.
1269 *
1270 * int bpf_get_numa_node_id(void)
1271 * Description
1272 * Return the id of the current NUMA node. The primary use case
1273 * for this helper is the selection of sockets for the local NUMA
1274 * node, when the program is attached to sockets using the
1275 * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
1276 * but the helper is also available to other eBPF program types,
1277 * similarly to **bpf_get_smp_processor_id**\ ().
1278 * Return
1279 * The id of current NUMA node.
1280 *
1281 * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
1282 * Description
1283 * Grows headroom of packet associated to *skb* and adjusts the
1284 * offset of the MAC header accordingly, adding *len* bytes of
1285 * space. It automatically extends and reallocates memory as
1286 * required.
1287 *
1288 * This helper can be used on a layer 3 *skb* to push a MAC header
1289 * for redirection into a layer 2 device.
1290 *
1291 * All values for *flags* are reserved for future usage, and must
1292 * be left at zero.
1293 *
1294 * A call to this helper is susceptible to change the underlaying
1295 * packet buffer. Therefore, at load time, all checks on pointers
1296 * previously done by the verifier are invalidated and must be
1297 * performed again, if the helper is used in combination with
1298 * direct packet access.
1299 * Return
1300 * 0 on success, or a negative error in case of failure.
1301 *
1302 * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
1303 * Description
1304 * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
1305 * it is possible to use a negative value for *delta*. This helper
1306 * can be used to prepare the packet for pushing or popping
1307 * headers.
1308 *
1309 * A call to this helper is susceptible to change the underlaying
1310 * packet buffer. Therefore, at load time, all checks on pointers
1311 * previously done by the verifier are invalidated and must be
1312 * performed again, if the helper is used in combination with
1313 * direct packet access.
1314 * Return
1315 * 0 on success, or a negative error in case of failure.
638 * 1316 *
639 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) 1317 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
640 * Copy a NUL terminated string from unsafe address. In case the string 1318 * Description
641 * length is smaller than size, the target is not padded with further NUL 1319 * Copy a NUL terminated string from an unsafe address
642 * bytes. In case the string length is larger than size, just count-1 1320 * *unsafe_ptr* to *dst*. The *size* should include the
643 * bytes are copied and the last byte is set to NUL. 1321 * terminating NUL byte. In case the string length is smaller than
644 * @dst: destination address 1322 * *size*, the target is not padded with further NUL bytes. If the
645 * @size: maximum number of bytes to copy, including the trailing NUL 1323 * string length is larger than *size*, just *size*-1 bytes are
646 * @unsafe_ptr: unsafe address 1324 * copied and the last byte is set to NUL.
647 * Return: 1325 *
648 * > 0 length of the string including the trailing NUL on success 1326 * On success, the length of the copied string is returned. This
649 * < 0 error 1327 * makes this helper useful in tracing programs for reading
650 * 1328 * strings, and more importantly to get its length at runtime. See
651 * u64 bpf_get_socket_cookie(skb) 1329 * the following snippet:
652 * Get the cookie for the socket stored inside sk_buff. 1330 *
653 * @skb: pointer to skb 1331 * ::
654 * Return: 8 Bytes non-decreasing number on success or 0 if the socket 1332 *
655 * field is missing inside sk_buff 1333 * SEC("kprobe/sys_open")
656 * 1334 * void bpf_sys_open(struct pt_regs *ctx)
657 * u32 bpf_get_socket_uid(skb) 1335 * {
658 * Get the owner uid of the socket stored inside sk_buff. 1336 * char buf[PATHLEN]; // PATHLEN is defined to 256
659 * @skb: pointer to skb 1337 * int res = bpf_probe_read_str(buf, sizeof(buf),
660 * Return: uid of the socket owner on success or overflowuid if failed. 1338 * ctx->di);
661 * 1339 *
662 * u32 bpf_set_hash(skb, hash) 1340 * // Consume buf, for example push it to
663 * Set full skb->hash. 1341 * // userspace via bpf_perf_event_output(); we
664 * @skb: pointer to skb 1342 * // can use res (the string length) as event
665 * @hash: hash to set 1343 * // size, after checking its boundaries.
666 * 1344 * }
667 * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) 1345 *
668 * Calls setsockopt. Not all opts are available, only those with 1346 * In comparison, using **bpf_probe_read()** helper here instead
669 * integer optvals plus TCP_CONGESTION. 1347 * to read the string would require to estimate the length at
670 * Supported levels: SOL_SOCKET and IPPROTO_TCP 1348 * compile time, and would often result in copying more memory
671 * @bpf_socket: pointer to bpf_socket 1349 * than necessary.
672 * @level: SOL_SOCKET or IPPROTO_TCP 1350 *
673 * @optname: option name 1351 * Another useful use case is when parsing individual process
674 * @optval: pointer to option value 1352 * arguments or individual environment variables navigating
675 * @optlen: length of optval in bytes 1353 * *current*\ **->mm->arg_start** and *current*\
676 * Return: 0 or negative error 1354 * **->mm->env_start**: using this helper and the return value,
677 * 1355 * one can quickly iterate at the right offset of the memory area.
678 * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) 1356 * Return
679 * Calls getsockopt. Not all opts are available. 1357 * On success, the strictly positive length of the string,
680 * Supported levels: IPPROTO_TCP 1358 * including the trailing NUL character. On error, a negative
681 * @bpf_socket: pointer to bpf_socket 1359 * value.
682 * @level: IPPROTO_TCP 1360 *
683 * @optname: option name 1361 * u64 bpf_get_socket_cookie(struct sk_buff *skb)
684 * @optval: pointer to option value 1362 * Description
685 * @optlen: length of optval in bytes 1363 * If the **struct sk_buff** pointed by *skb* has a known socket,
686 * Return: 0 or negative error 1364 * retrieve the cookie (generated by the kernel) of this socket.
687 * 1365 * If no cookie has been set yet, generate a new cookie. Once
688 * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) 1366 * generated, the socket cookie remains stable for the life of the
689 * Set callback flags for sock_ops 1367 * socket. This helper can be useful for monitoring per socket
690 * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct 1368 * networking traffic statistics as it provides a unique socket
691 * @flags: flags value 1369 * identifier per namespace.
692 * Return: 0 for no error 1370 * Return
693 * -EINVAL if there is no full tcp socket 1371 * A 8-byte long non-decreasing number on success, or 0 if the
694 * bits in flags that are not supported by current kernel 1372 * socket field is missing inside *skb*.
695 * 1373 *
696 * int bpf_skb_adjust_room(skb, len_diff, mode, flags) 1374 * u32 bpf_get_socket_uid(struct sk_buff *skb)
697 * Grow or shrink room in sk_buff. 1375 * Return
698 * @skb: pointer to skb 1376 * The owner UID of the socket associated to *skb*. If the socket
699 * @len_diff: (signed) amount of room to grow/shrink 1377 * is **NULL**, or if it is not a full socket (i.e. if it is a
700 * @mode: operation mode (enum bpf_adj_room_mode) 1378 * time-wait or a request socket instead), **overflowuid** value
701 * @flags: reserved for future use 1379 * is returned (note that **overflowuid** might also be the actual
702 * Return: 0 on success or negative error code 1380 * UID value for the socket).
703 * 1381 *
704 * int bpf_sk_redirect_map(map, key, flags) 1382 * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
705 * Redirect skb to a sock in map using key as a lookup key for the 1383 * Description
706 * sock in map. 1384 * Set the full hash for *skb* (set the field *skb*\ **->hash**)
707 * @map: pointer to sockmap 1385 * to value *hash*.
708 * @key: key to lookup sock in map 1386 * Return
709 * @flags: reserved for future use 1387 * 0
710 * Return: SK_PASS 1388 *
711 * 1389 * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
712 * int bpf_sock_map_update(skops, map, key, flags) 1390 * Description
713 * @skops: pointer to bpf_sock_ops 1391 * Emulate a call to **setsockopt()** on the socket associated to
714 * @map: pointer to sockmap to update 1392 * *bpf_socket*, which must be a full socket. The *level* at
715 * @key: key to insert/update sock in map 1393 * which the option resides and the name *optname* of the option
716 * @flags: same flags as map update elem 1394 * must be specified, see **setsockopt(2)** for more information.
717 * 1395 * The option value of length *optlen* is pointed by *optval*.
718 * int bpf_xdp_adjust_meta(xdp_md, delta) 1396 *
719 * Adjust the xdp_md.data_meta by delta 1397 * This helper actually implements a subset of **setsockopt()**.
720 * @xdp_md: pointer to xdp_md 1398 * It supports the following *level*\ s:
721 * @delta: An positive/negative integer to be added to xdp_md.data_meta 1399 *
722 * Return: 0 on success or negative on error 1400 * * **SOL_SOCKET**, which supports the following *optname*\ s:
723 * 1401 * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
724 * int bpf_perf_event_read_value(map, flags, buf, buf_size) 1402 * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
725 * read perf event counter value and perf event enabled/running time 1403 * * **IPPROTO_TCP**, which supports the following *optname*\ s:
726 * @map: pointer to perf_event_array map 1404 * **TCP_CONGESTION**, **TCP_BPF_IW**,
727 * @flags: index of event in the map or bitmask flags 1405 * **TCP_BPF_SNDCWND_CLAMP**.
728 * @buf: buf to fill 1406 * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
729 * @buf_size: size of the buf 1407 * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
730 * Return: 0 on success or negative error code 1408 * Return
731 * 1409 * 0 on success, or a negative error in case of failure.
732 * int bpf_perf_prog_read_value(ctx, buf, buf_size) 1410 *
733 * read perf prog attached perf event counter and enabled/running time 1411 * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
734 * @ctx: pointer to ctx 1412 * Description
735 * @buf: buf to fill 1413 * Grow or shrink the room for data in the packet associated to
736 * @buf_size: size of the buf 1414 * *skb* by *len_diff*, and according to the selected *mode*.
737 * Return : 0 on success or negative error code 1415 *
738 * 1416 * There is a single supported mode at this time:
739 * int bpf_override_return(pt_regs, rc) 1417 *
740 * @pt_regs: pointer to struct pt_regs 1418 * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
741 * @rc: the return value to set 1419 * (room space is added or removed below the layer 3 header).
742 * 1420 *
743 * int bpf_msg_redirect_map(map, key, flags) 1421 * All values for *flags* are reserved for future usage, and must
744 * Redirect msg to a sock in map using key as a lookup key for the 1422 * be left at zero.
745 * sock in map. 1423 *
746 * @map: pointer to sockmap 1424 * A call to this helper is susceptible to change the underlaying
747 * @key: key to lookup sock in map 1425 * packet buffer. Therefore, at load time, all checks on pointers
748 * @flags: reserved for future use 1426 * previously done by the verifier are invalidated and must be
749 * Return: SK_PASS 1427 * performed again, if the helper is used in combination with
750 * 1428 * direct packet access.
751 * int bpf_bind(ctx, addr, addr_len) 1429 * Return
752 * Bind socket to address. Only binding to IP is supported, no port can be 1430 * 0 on success, or a negative error in case of failure.
753 * set in addr. 1431 *
754 * @ctx: pointer to context of type bpf_sock_addr 1432 * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
755 * @addr: pointer to struct sockaddr to bind socket to 1433 * Description
756 * @addr_len: length of sockaddr structure 1434 * Redirect the packet to the endpoint referenced by *map* at
757 * Return: 0 on success or negative error code 1435 * index *key*. Depending on its type, this *map* can contain
1436 * references to net devices (for forwarding packets through other
1437 * ports), or to CPUs (for redirecting XDP frames to another CPU;
1438 * but this is only implemented for native XDP (with driver
1439 * support) as of this writing).
1440 *
1441 * All values for *flags* are reserved for future usage, and must
1442 * be left at zero.
1443 *
1444 * When used to redirect packets to net devices, this helper
1445 * provides a high performance increase over **bpf_redirect**\ ().
1446 * This is due to various implementation details of the underlying
1447 * mechanisms, one of which is the fact that **bpf_redirect_map**\
1448 * () tries to send packet as a "bulk" to the device.
1449 * Return
1450 * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
1451 *
1452 * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
1453 * Description
1454 * Redirect the packet to the socket referenced by *map* (of type
1455 * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1456 * egress interfaces can be used for redirection. The
1457 * **BPF_F_INGRESS** value in *flags* is used to make the
1458 * distinction (ingress path is selected if the flag is present,
1459 * egress path otherwise). This is the only flag supported for now.
1460 * Return
1461 * **SK_PASS** on success, or **SK_DROP** on error.
1462 *
1463 * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
1464 * Description
1465 * Add an entry to, or update a *map* referencing sockets. The
1466 * *skops* is used as a new value for the entry associated to
1467 * *key*. *flags* is one of:
1468 *
1469 * **BPF_NOEXIST**
1470 * The entry for *key* must not exist in the map.
1471 * **BPF_EXIST**
1472 * The entry for *key* must already exist in the map.
1473 * **BPF_ANY**
1474 * No condition on the existence of the entry for *key*.
1475 *
1476 * If the *map* has eBPF programs (parser and verdict), those will
1477 * be inherited by the socket being added. If the socket is
1478 * already attached to eBPF programs, this results in an error.
1479 * Return
1480 * 0 on success, or a negative error in case of failure.
1481 *
1482 * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
1483 * Description
1484 * Adjust the address pointed by *xdp_md*\ **->data_meta** by
1485 * *delta* (which can be positive or negative). Note that this
1486 * operation modifies the address stored in *xdp_md*\ **->data**,
1487 * so the latter must be loaded only after the helper has been
1488 * called.
1489 *
1490 * The use of *xdp_md*\ **->data_meta** is optional and programs
1491 * are not required to use it. The rationale is that when the
1492 * packet is processed with XDP (e.g. as DoS filter), it is
1493 * possible to push further meta data along with it before passing
1494 * to the stack, and to give the guarantee that an ingress eBPF
1495 * program attached as a TC classifier on the same device can pick
1496 * this up for further post-processing. Since TC works with socket
1497 * buffers, it remains possible to set from XDP the **mark** or
1498 * **priority** pointers, or other pointers for the socket buffer.
1499 * Having this scratch space generic and programmable allows for
1500 * more flexibility as the user is free to store whatever meta
1501 * data they need.
1502 *
1503 * A call to this helper is susceptible to change the underlaying
1504 * packet buffer. Therefore, at load time, all checks on pointers
1505 * previously done by the verifier are invalidated and must be
1506 * performed again, if the helper is used in combination with
1507 * direct packet access.
1508 * Return
1509 * 0 on success, or a negative error in case of failure.
1510 *
1511 * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
1512 * Description
1513 * Read the value of a perf event counter, and store it into *buf*
1514 * of size *buf_size*. This helper relies on a *map* of type
1515 * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
1516 * counter is selected when *map* is updated with perf event file
1517 * descriptors. The *map* is an array whose size is the number of
1518 * available CPUs, and each cell contains a value relative to one
1519 * CPU. The value to retrieve is indicated by *flags*, that
1520 * contains the index of the CPU to look up, masked with
1521 * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
1522 * **BPF_F_CURRENT_CPU** to indicate that the value for the
1523 * current CPU should be retrieved.
1524 *
1525 * This helper behaves in a way close to
1526 * **bpf_perf_event_read**\ () helper, save that instead of
1527 * just returning the value observed, it fills the *buf*
1528 * structure. This allows for additional data to be retrieved: in
1529 * particular, the enabled and running times (in *buf*\
1530 * **->enabled** and *buf*\ **->running**, respectively) are
1531 * copied. In general, **bpf_perf_event_read_value**\ () is
1532 * recommended over **bpf_perf_event_read**\ (), which has some
1533 * ABI issues and provides fewer functionalities.
1534 *
1535 * These values are interesting, because hardware PMU (Performance
1536 * Monitoring Unit) counters are limited resources. When there are
1537 * more PMU based perf events opened than available counters,
1538 * kernel will multiplex these events so each event gets certain
1539 * percentage (but not all) of the PMU time. In case that
1540 * multiplexing happens, the number of samples or counter value
1541 * will not reflect the case compared to when no multiplexing
1542 * occurs. This makes comparison between different runs difficult.
1543 * Typically, the counter value should be normalized before
1544 * comparing to other experiments. The usual normalization is done
1545 * as follows.
1546 *
1547 * ::
1548 *
1549 * normalized_counter = counter * t_enabled / t_running
1550 *
1551 * Where t_enabled is the time enabled for event and t_running is
1552 * the time running for event since last normalization. The
1553 * enabled and running times are accumulated since the perf event
1554 * open. To achieve scaling factor between two invocations of an
1555 * eBPF program, users can can use CPU id as the key (which is
1556 * typical for perf array usage model) to remember the previous
1557 * value and do the calculation inside the eBPF program.
1558 * Return
1559 * 0 on success, or a negative error in case of failure.
1560 *
1561 * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
1562 * Description
1563 * For en eBPF program attached to a perf event, retrieve the
1564 * value of the event counter associated to *ctx* and store it in
1565 * the structure pointed by *buf* and of size *buf_size*. Enabled
1566 * and running times are also stored in the structure (see
1567 * description of helper **bpf_perf_event_read_value**\ () for
1568 * more details).
1569 * Return
1570 * 0 on success, or a negative error in case of failure.
1571 *
1572 * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
1573 * Description
1574 * Emulate a call to **getsockopt()** on the socket associated to
1575 * *bpf_socket*, which must be a full socket. The *level* at
1576 * which the option resides and the name *optname* of the option
1577 * must be specified, see **getsockopt(2)** for more information.
1578 * The retrieved value is stored in the structure pointed by
1579 * *opval* and of length *optlen*.
1580 *
1581 * This helper actually implements a subset of **getsockopt()**.
1582 * It supports the following *level*\ s:
1583 *
1584 * * **IPPROTO_TCP**, which supports *optname*
1585 * **TCP_CONGESTION**.
1586 * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
1587 * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
1588 * Return
1589 * 0 on success, or a negative error in case of failure.
1590 *
1591 * int bpf_override_return(struct pt_reg *regs, u64 rc)
1592 * Description
1593 * Used for error injection, this helper uses kprobes to override
1594 * the return value of the probed function, and to set it to *rc*.
1595 * The first argument is the context *regs* on which the kprobe
1596 * works.
1597 *
1598 * This helper works by setting setting the PC (program counter)
1599 * to an override function which is run in place of the original
1600 * probed function. This means the probed function is not run at
1601 * all. The replacement function just returns with the required
1602 * value.
1603 *
1604 * This helper has security implications, and thus is subject to
1605 * restrictions. It is only available if the kernel was compiled
1606 * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
1607 * option, and in this case it only works on functions tagged with
1608 * **ALLOW_ERROR_INJECTION** in the kernel code.
1609 *
1610 * Also, the helper is only available for the architectures having
1611 * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
1612 * x86 architecture is the only one to support this feature.
1613 * Return
1614 * 0
1615 *
1616 * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
1617 * Description
1618 * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
1619 * for the full TCP socket associated to *bpf_sock_ops* to
1620 * *argval*.
1621 *
1622 * The primary use of this field is to determine if there should
1623 * be calls to eBPF programs of type
1624 * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
1625 * code. A program of the same type can change its value, per
1626 * connection and as necessary, when the connection is
1627 * established. This field is directly accessible for reading, but
1628 * this helper must be used for updates in order to return an
1629 * error if an eBPF program tries to set a callback that is not
1630 * supported in the current kernel.
1631 *
1632 * The supported callback values that *argval* can combine are:
1633 *
1634 * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
1635 * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
1636 * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
1637 *
1638 * Here are some examples of where one could call such eBPF
1639 * program:
1640 *
1641 * * When RTO fires.
1642 * * When a packet is retransmitted.
1643 * * When the connection terminates.
1644 * * When a packet is sent.
1645 * * When a packet is received.
1646 * Return
1647 * Code **-EINVAL** if the socket is not a full TCP socket;
1648 * otherwise, a positive number containing the bits that could not
1649 * be set is returned (which comes down to 0 if all bits were set
1650 * as required).
1651 *
1652 * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
1653 * Description
1654 * This helper is used in programs implementing policies at the
1655 * socket level. If the message *msg* is allowed to pass (i.e. if
1656 * the verdict eBPF program returns **SK_PASS**), redirect it to
1657 * the socket referenced by *map* (of type
1658 * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1659 * egress interfaces can be used for redirection. The
1660 * **BPF_F_INGRESS** value in *flags* is used to make the
1661 * distinction (ingress path is selected if the flag is present,
1662 * egress path otherwise). This is the only flag supported for now.
1663 * Return
1664 * **SK_PASS** on success, or **SK_DROP** on error.
1665 *
1666 * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
1667 * Description
1668 * For socket policies, apply the verdict of the eBPF program to
1669 * the next *bytes* (number of bytes) of message *msg*.
1670 *
1671 * For example, this helper can be used in the following cases:
1672 *
1673 * * A single **sendmsg**\ () or **sendfile**\ () system call
1674 * contains multiple logical messages that the eBPF program is
1675 * supposed to read and for which it should apply a verdict.
1676 * * An eBPF program only cares to read the first *bytes* of a
1677 * *msg*. If the message has a large payload, then setting up
1678 * and calling the eBPF program repeatedly for all bytes, even
1679 * though the verdict is already known, would create unnecessary
1680 * overhead.
1681 *
1682 * When called from within an eBPF program, the helper sets a
1683 * counter internal to the BPF infrastructure, that is used to
1684 * apply the last verdict to the next *bytes*. If *bytes* is
1685 * smaller than the current data being processed from a
1686 * **sendmsg**\ () or **sendfile**\ () system call, the first
1687 * *bytes* will be sent and the eBPF program will be re-run with
1688 * the pointer for start of data pointing to byte number *bytes*
1689 * **+ 1**. If *bytes* is larger than the current data being
1690 * processed, then the eBPF verdict will be applied to multiple
1691 * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
1692 * consumed.
1693 *
1694 * Note that if a socket closes with the internal counter holding
1695 * a non-zero value, this is not a problem because data is not
1696 * being buffered for *bytes* and is sent as it is received.
1697 * Return
1698 * 0
1699 *
1700 * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
1701 * Description
1702 * For socket policies, prevent the execution of the verdict eBPF
1703 * program for message *msg* until *bytes* (byte number) have been
1704 * accumulated.
1705 *
1706 * This can be used when one needs a specific number of bytes
1707 * before a verdict can be assigned, even if the data spans
1708 * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
1709 * case would be a user calling **sendmsg**\ () repeatedly with
1710 * 1-byte long message segments. Obviously, this is bad for
1711 * performance, but it is still valid. If the eBPF program needs
1712 * *bytes* bytes to validate a header, this helper can be used to
1713 * prevent the eBPF program to be called again until *bytes* have
1714 * been accumulated.
1715 * Return
1716 * 0
1717 *
1718 * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
1719 * Description
1720 * For socket policies, pull in non-linear data from user space
1721 * for *msg* and set pointers *msg*\ **->data** and *msg*\
1722 * **->data_end** to *start* and *end* bytes offsets into *msg*,
1723 * respectively.
1724 *
1725 * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
1726 * *msg* it can only parse data that the (**data**, **data_end**)
1727 * pointers have already consumed. For **sendmsg**\ () hooks this
1728 * is likely the first scatterlist element. But for calls relying
1729 * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
1730 * be the range (**0**, **0**) because the data is shared with
1731 * user space and by default the objective is to avoid allowing
1732 * user space to modify data while (or after) eBPF verdict is
1733 * being decided. This helper can be used to pull in data and to
1734 * set the start and end pointer to given values. Data will be
1735 * copied if necessary (i.e. if data was not linear and if start
1736 * and end pointers do not point to the same chunk).
1737 *
1738 * A call to this helper is susceptible to change the underlaying
1739 * packet buffer. Therefore, at load time, all checks on pointers
1740 * previously done by the verifier are invalidated and must be
1741 * performed again, if the helper is used in combination with
1742 * direct packet access.
1743 *
1744 * All values for *flags* are reserved for future usage, and must
1745 * be left at zero.
1746 * Return
1747 * 0 on success, or a negative error in case of failure.
1748 *
1749 * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
1750 * Description
1751 * Bind the socket associated to *ctx* to the address pointed by
1752 * *addr*, of length *addr_len*. This allows for making outgoing
1753 * connection from the desired IP address, which can be useful for
1754 * example when all processes inside a cgroup should use one
1755 * single IP address on a host that has multiple IP configured.
1756 *
1757 * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
1758 * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
1759 * **AF_INET6**). Looking for a free port to bind to can be
1760 * expensive, therefore binding to port is not permitted by the
1761 * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
1762 * must be set to zero.
1763 * Return
1764 * 0 on success, or a negative error in case of failure.
1765 *
1766 * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
1767 * Description
1768 * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
1769 * only possible to shrink the packet as of this writing,
1770 * therefore *delta* must be a negative integer.
1771 *
1772 * A call to this helper is susceptible to change the underlaying
1773 * packet buffer. Therefore, at load time, all checks on pointers
1774 * previously done by the verifier are invalidated and must be
1775 * performed again, if the helper is used in combination with
1776 * direct packet access.
1777 * Return
1778 * 0 on success, or a negative error in case of failure.
1779 *
1780 * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
1781 * Description
1782 * Retrieve the XFRM state (IP transform framework, see also
1783 * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
1784 *
1785 * The retrieved value is stored in the **struct bpf_xfrm_state**
1786 * pointed by *xfrm_state* and of length *size*.
1787 *
1788 * All values for *flags* are reserved for future usage, and must
1789 * be left at zero.
1790 *
1791 * This helper is available only if the kernel was compiled with
1792 * **CONFIG_XFRM** configuration option.
1793 * Return
1794 * 0 on success, or a negative error in case of failure.
1795 *
1796 * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
1797 * Description
1798 * Return a user or a kernel stack in bpf program provided buffer.
1799 * To achieve this, the helper needs *ctx*, which is a pointer
1800 * to the context on which the tracing program is executed.
1801 * To store the stacktrace, the bpf program provides *buf* with
1802 * a nonnegative *size*.
1803 *
1804 * The last argument, *flags*, holds the number of stack frames to
1805 * skip (from 0 to 255), masked with
1806 * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
1807 * the following flags:
1808 *
1809 * **BPF_F_USER_STACK**
1810 * Collect a user space stack instead of a kernel stack.
1811 * **BPF_F_USER_BUILD_ID**
1812 * Collect buildid+offset instead of ips for user stack,
1813 * only valid if **BPF_F_USER_STACK** is also specified.
1814 *
1815 * **bpf_get_stack**\ () can collect up to
1816 * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
1817 * to sufficient large buffer size. Note that
1818 * this limit can be controlled with the **sysctl** program, and
1819 * that it should be manually increased in order to profile long
1820 * user stacks (such as stacks for Java programs). To do so, use:
1821 *
1822 * ::
1823 *
1824 * # sysctl kernel.perf_event_max_stack=<new value>
1825 * Return
1826 * A non-negative value equal to or less than *size* on success,
1827 * or a negative error in case of failure.
1828 *
1829 * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
1830 * Description
1831 * This helper is similar to **bpf_skb_load_bytes**\ () in that
1832 * it provides an easy way to load *len* bytes from *offset*
1833 * from the packet associated to *skb*, into the buffer pointed
1834 * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
1835 * a fifth argument *start_header* exists in order to select a
1836 * base offset to start from. *start_header* can be one of:
1837 *
1838 * **BPF_HDR_START_MAC**
1839 * Base offset to load data from is *skb*'s mac header.
1840 * **BPF_HDR_START_NET**
1841 * Base offset to load data from is *skb*'s network header.
1842 *
1843 * In general, "direct packet access" is the preferred method to
1844 * access packet data, however, this helper is in particular useful
1845 * in socket filters where *skb*\ **->data** does not always point
1846 * to the start of the mac header and where "direct packet access"
1847 * is not available.
1848 * Return
1849 * 0 on success, or a negative error in case of failure.
1850 *
1851 * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
1852 * Description
1853 * Do FIB lookup in kernel tables using parameters in *params*.
1854 * If lookup is successful and result shows packet is to be
1855 * forwarded, the neighbor tables are searched for the nexthop.
1856 * If successful (ie., FIB lookup shows forwarding and nexthop
1857 * is resolved), the nexthop address is returned in ipv4_dst
1858 * or ipv6_dst based on family, smac is set to mac address of
1859 * egress device, dmac is set to nexthop mac address, rt_metric
1860 * is set to metric from route (IPv4/IPv6 only).
1861 *
1862 * *plen* argument is the size of the passed in struct.
1863 * *flags* argument can be a combination of one or more of the
1864 * following values:
1865 *
1866 * **BPF_FIB_LOOKUP_DIRECT**
1867 * Do a direct table lookup vs full lookup using FIB
1868 * rules.
1869 * **BPF_FIB_LOOKUP_OUTPUT**
1870 * Perform lookup from an egress perspective (default is
1871 * ingress).
1872 *
1873 * *ctx* is either **struct xdp_md** for XDP programs or
1874 * **struct sk_buff** tc cls_act programs.
1875 * Return
1876 * Egress device index on success, 0 if packet needs to continue
1877 * up the stack for further processing or a negative error in case
1878 * of failure.
1879 *
1880 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
1881 * Description
1882 * Add an entry to, or update a sockhash *map* referencing sockets.
1883 * The *skops* is used as a new value for the entry associated to
1884 * *key*. *flags* is one of:
1885 *
1886 * **BPF_NOEXIST**
1887 * The entry for *key* must not exist in the map.
1888 * **BPF_EXIST**
1889 * The entry for *key* must already exist in the map.
1890 * **BPF_ANY**
1891 * No condition on the existence of the entry for *key*.
1892 *
1893 * If the *map* has eBPF programs (parser and verdict), those will
1894 * be inherited by the socket being added. If the socket is
1895 * already attached to eBPF programs, this results in an error.
1896 * Return
1897 * 0 on success, or a negative error in case of failure.
1898 *
1899 * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
1900 * Description
1901 * This helper is used in programs implementing policies at the
1902 * socket level. If the message *msg* is allowed to pass (i.e. if
1903 * the verdict eBPF program returns **SK_PASS**), redirect it to
1904 * the socket referenced by *map* (of type
1905 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1906 * egress interfaces can be used for redirection. The
1907 * **BPF_F_INGRESS** value in *flags* is used to make the
1908 * distinction (ingress path is selected if the flag is present,
1909 * egress path otherwise). This is the only flag supported for now.
1910 * Return
1911 * **SK_PASS** on success, or **SK_DROP** on error.
1912 *
1913 * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
1914 * Description
1915 * This helper is used in programs implementing policies at the
1916 * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
1917 * if the verdeict eBPF program returns **SK_PASS**), redirect it
1918 * to the socket referenced by *map* (of type
1919 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1920 * egress interfaces can be used for redirection. The
1921 * **BPF_F_INGRESS** value in *flags* is used to make the
1922 * distinction (ingress path is selected if the flag is present,
1923 * egress otherwise). This is the only flag supported for now.
1924 * Return
1925 * **SK_PASS** on success, or **SK_DROP** on error.
1926 *
1927 * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
1928 * Description
1929 * Encapsulate the packet associated to *skb* within a Layer 3
1930 * protocol header. This header is provided in the buffer at
1931 * address *hdr*, with *len* its size in bytes. *type* indicates
1932 * the protocol of the header and can be one of:
1933 *
1934 * **BPF_LWT_ENCAP_SEG6**
1935 * IPv6 encapsulation with Segment Routing Header
1936 * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
1937 * the IPv6 header is computed by the kernel.
1938 * **BPF_LWT_ENCAP_SEG6_INLINE**
1939 * Only works if *skb* contains an IPv6 packet. Insert a
1940 * Segment Routing Header (**struct ipv6_sr_hdr**) inside
1941 * the IPv6 header.
1942 *
1943 * A call to this helper is susceptible to change the underlaying
1944 * packet buffer. Therefore, at load time, all checks on pointers
1945 * previously done by the verifier are invalidated and must be
1946 * performed again, if the helper is used in combination with
1947 * direct packet access.
1948 * Return
1949 * 0 on success, or a negative error in case of failure.
1950 *
1951 * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
1952 * Description
1953 * Store *len* bytes from address *from* into the packet
1954 * associated to *skb*, at *offset*. Only the flags, tag and TLVs
1955 * inside the outermost IPv6 Segment Routing Header can be
1956 * modified through this helper.
1957 *
1958 * A call to this helper is susceptible to change the underlaying
1959 * packet buffer. Therefore, at load time, all checks on pointers
1960 * previously done by the verifier are invalidated and must be
1961 * performed again, if the helper is used in combination with
1962 * direct packet access.
1963 * Return
1964 * 0 on success, or a negative error in case of failure.
1965 *
1966 * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
1967 * Description
1968 * Adjust the size allocated to TLVs in the outermost IPv6
1969 * Segment Routing Header contained in the packet associated to
1970 * *skb*, at position *offset* by *delta* bytes. Only offsets
1971 * after the segments are accepted. *delta* can be as well
1972 * positive (growing) as negative (shrinking).
1973 *
1974 * A call to this helper is susceptible to change the underlaying
1975 * packet buffer. Therefore, at load time, all checks on pointers
1976 * previously done by the verifier are invalidated and must be
1977 * performed again, if the helper is used in combination with
1978 * direct packet access.
1979 * Return
1980 * 0 on success, or a negative error in case of failure.
1981 *
1982 * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
1983 * Description
1984 * Apply an IPv6 Segment Routing action of type *action* to the
1985 * packet associated to *skb*. Each action takes a parameter
1986 * contained at address *param*, and of length *param_len* bytes.
1987 * *action* can be one of:
1988 *
1989 * **SEG6_LOCAL_ACTION_END_X**
1990 * End.X action: Endpoint with Layer-3 cross-connect.
1991 * Type of *param*: **struct in6_addr**.
1992 * **SEG6_LOCAL_ACTION_END_T**
1993 * End.T action: Endpoint with specific IPv6 table lookup.
1994 * Type of *param*: **int**.
1995 * **SEG6_LOCAL_ACTION_END_B6**
1996 * End.B6 action: Endpoint bound to an SRv6 policy.
1997 * Type of param: **struct ipv6_sr_hdr**.
1998 * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
1999 * End.B6.Encap action: Endpoint bound to an SRv6
2000 * encapsulation policy.
2001 * Type of param: **struct ipv6_sr_hdr**.
2002 *
2003 * A call to this helper is susceptible to change the underlaying
2004 * packet buffer. Therefore, at load time, all checks on pointers
2005 * previously done by the verifier are invalidated and must be
2006 * performed again, if the helper is used in combination with
2007 * direct packet access.
2008 * Return
2009 * 0 on success, or a negative error in case of failure.
2010 *
2011 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
2012 * Description
2013 * This helper is used in programs implementing IR decoding, to
2014 * report a successfully decoded key press with *scancode*,
2015 * *toggle* value in the given *protocol*. The scancode will be
2016 * translated to a keycode using the rc keymap, and reported as
2017 * an input key down event. After a period a key up event is
2018 * generated. This period can be extended by calling either
2019 * **bpf_rc_keydown** () again with the same values, or calling
2020 * **bpf_rc_repeat** ().
2021 *
2022 * Some protocols include a toggle bit, in case the button was
2023 * released and pressed again between consecutive scancodes.
2024 *
2025 * The *ctx* should point to the lirc sample as passed into
2026 * the program.
2027 *
2028 * The *protocol* is the decoded protocol number (see
2029 * **enum rc_proto** for some predefined values).
2030 *
2031 * This helper is only available is the kernel was compiled with
2032 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2033 * "**y**".
2034 *
2035 * Return
2036 * 0
2037 *
2038 * int bpf_rc_repeat(void *ctx)
2039 * Description
2040 * This helper is used in programs implementing IR decoding, to
2041 * report a successfully decoded repeat key message. This delays
2042 * the generation of a key up event for previously generated
2043 * key down event.
2044 *
2045 * Some IR protocols like NEC have a special IR message for
2046 * repeating last button, for when a button is held down.
2047 *
2048 * The *ctx* should point to the lirc sample as passed into
2049 * the program.
2050 *
2051 * This helper is only available is the kernel was compiled with
2052 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2053 * "**y**".
2054 *
2055 * Return
2056 * 0
2057 *
2058 * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
2059 * Description
2060 * Return the cgroup v2 id of the socket associated with the *skb*.
2061 * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
2062 * helper for cgroup v1 by providing a tag resp. identifier that
2063 * can be matched on or used for map lookups e.g. to implement
2064 * policy. The cgroup v2 id of a given path in the hierarchy is
2065 * exposed in user space through the f_handle API in order to get
2066 * to the same 64-bit id.
2067 *
2068 * This helper can be used on TC egress path, but not on ingress,
2069 * and is available only if the kernel was compiled with the
2070 * **CONFIG_SOCK_CGROUP_DATA** configuration option.
2071 * Return
2072 * The id is returned or 0 in case the id could not be retrieved.
2073 *
2074 * u64 bpf_get_current_cgroup_id(void)
2075 * Return
2076 * A 64-bit integer containing the current cgroup id based
2077 * on the cgroup within which the current task is running.
758 */ 2078 */
759#define __BPF_FUNC_MAPPER(FN) \ 2079#define __BPF_FUNC_MAPPER(FN) \
760 FN(unspec), \ 2080 FN(unspec), \
@@ -821,7 +2141,23 @@ union bpf_attr {
821 FN(msg_apply_bytes), \ 2141 FN(msg_apply_bytes), \
822 FN(msg_cork_bytes), \ 2142 FN(msg_cork_bytes), \
823 FN(msg_pull_data), \ 2143 FN(msg_pull_data), \
824 FN(bind), 2144 FN(bind), \
2145 FN(xdp_adjust_tail), \
2146 FN(skb_get_xfrm_state), \
2147 FN(get_stack), \
2148 FN(skb_load_bytes_relative), \
2149 FN(fib_lookup), \
2150 FN(sock_hash_update), \
2151 FN(msg_redirect_hash), \
2152 FN(sk_redirect_hash), \
2153 FN(lwt_push_encap), \
2154 FN(lwt_seg6_store_bytes), \
2155 FN(lwt_seg6_adjust_srh), \
2156 FN(lwt_seg6_action), \
2157 FN(rc_repeat), \
2158 FN(rc_keydown), \
2159 FN(skb_cgroup_id), \
2160 FN(get_current_cgroup_id),
825 2161
826/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2162/* integer value in 'imm' field of BPF_CALL instruction selects which helper
827 * function eBPF program intends to call 2163 * function eBPF program intends to call
@@ -855,11 +2191,14 @@ enum bpf_func_id {
855/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ 2191/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
856#define BPF_F_TUNINFO_IPV6 (1ULL << 0) 2192#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
857 2193
858/* BPF_FUNC_get_stackid flags. */ 2194/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
859#define BPF_F_SKIP_FIELD_MASK 0xffULL 2195#define BPF_F_SKIP_FIELD_MASK 0xffULL
860#define BPF_F_USER_STACK (1ULL << 8) 2196#define BPF_F_USER_STACK (1ULL << 8)
2197/* flags used by BPF_FUNC_get_stackid only. */
861#define BPF_F_FAST_STACK_CMP (1ULL << 9) 2198#define BPF_F_FAST_STACK_CMP (1ULL << 9)
862#define BPF_F_REUSE_STACKID (1ULL << 10) 2199#define BPF_F_REUSE_STACKID (1ULL << 10)
2200/* flags used by BPF_FUNC_get_stack only. */
2201#define BPF_F_USER_BUILD_ID (1ULL << 11)
863 2202
864/* BPF_FUNC_skb_set_tunnel_key flags. */ 2203/* BPF_FUNC_skb_set_tunnel_key flags. */
865#define BPF_F_ZERO_CSUM_TX (1ULL << 1) 2204#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
@@ -879,6 +2218,18 @@ enum bpf_adj_room_mode {
879 BPF_ADJ_ROOM_NET, 2218 BPF_ADJ_ROOM_NET,
880}; 2219};
881 2220
2221/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
2222enum bpf_hdr_start_off {
2223 BPF_HDR_START_MAC,
2224 BPF_HDR_START_NET,
2225};
2226
2227/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
2228enum bpf_lwt_encap_mode {
2229 BPF_LWT_ENCAP_SEG6,
2230 BPF_LWT_ENCAP_SEG6_INLINE
2231};
2232
882/* user accessible mirror of in-kernel sk_buff. 2233/* user accessible mirror of in-kernel sk_buff.
883 * new fields can only be added to the end of this structure 2234 * new fields can only be added to the end of this structure
884 */ 2235 */
@@ -923,10 +2274,24 @@ struct bpf_tunnel_key {
923 }; 2274 };
924 __u8 tunnel_tos; 2275 __u8 tunnel_tos;
925 __u8 tunnel_ttl; 2276 __u8 tunnel_ttl;
926 __u16 tunnel_ext; 2277 __u16 tunnel_ext; /* Padding, future use. */
927 __u32 tunnel_label; 2278 __u32 tunnel_label;
928}; 2279};
929 2280
2281/* user accessible mirror of in-kernel xfrm_state.
2282 * new fields can only be added to the end of this structure
2283 */
2284struct bpf_xfrm_state {
2285 __u32 reqid;
2286 __u32 spi; /* Stored in network byte order */
2287 __u16 family;
2288 __u16 ext; /* Padding, future use. */
2289 union {
2290 __u32 remote_ipv4; /* Stored in network byte order */
2291 __u32 remote_ipv6[4]; /* Stored in network byte order */
2292 };
2293};
2294
930/* Generic BPF return codes which all BPF program types may support. 2295/* Generic BPF return codes which all BPF program types may support.
931 * The values are binary compatible with their TC_ACT_* counter-part to 2296 * The values are binary compatible with their TC_ACT_* counter-part to
932 * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT 2297 * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -999,6 +2364,14 @@ enum sk_action {
999struct sk_msg_md { 2364struct sk_msg_md {
1000 void *data; 2365 void *data;
1001 void *data_end; 2366 void *data_end;
2367
2368 __u32 family;
2369 __u32 remote_ip4; /* Stored in network byte order */
2370 __u32 local_ip4; /* Stored in network byte order */
2371 __u32 remote_ip6[4]; /* Stored in network byte order */
2372 __u32 local_ip6[4]; /* Stored in network byte order */
2373 __u32 remote_port; /* Stored in network byte order */
2374 __u32 local_port; /* stored in host byte order */
1002}; 2375};
1003 2376
1004#define BPF_TAG_SIZE 8 2377#define BPF_TAG_SIZE 8
@@ -1017,9 +2390,13 @@ struct bpf_prog_info {
1017 __aligned_u64 map_ids; 2390 __aligned_u64 map_ids;
1018 char name[BPF_OBJ_NAME_LEN]; 2391 char name[BPF_OBJ_NAME_LEN];
1019 __u32 ifindex; 2392 __u32 ifindex;
1020 __u32 :32; 2393 __u32 gpl_compatible:1;
1021 __u64 netns_dev; 2394 __u64 netns_dev;
1022 __u64 netns_ino; 2395 __u64 netns_ino;
2396 __u32 nr_jited_ksyms;
2397 __u32 nr_jited_func_lens;
2398 __aligned_u64 jited_ksyms;
2399 __aligned_u64 jited_func_lens;
1023} __attribute__((aligned(8))); 2400} __attribute__((aligned(8)));
1024 2401
1025struct bpf_map_info { 2402struct bpf_map_info {
@@ -1034,6 +2411,15 @@ struct bpf_map_info {
1034 __u32 :32; 2411 __u32 :32;
1035 __u64 netns_dev; 2412 __u64 netns_dev;
1036 __u64 netns_ino; 2413 __u64 netns_ino;
2414 __u32 btf_id;
2415 __u32 btf_key_type_id;
2416 __u32 btf_value_type_id;
2417} __attribute__((aligned(8)));
2418
2419struct bpf_btf_info {
2420 __aligned_u64 btf;
2421 __u32 btf_size;
2422 __u32 id;
1037} __attribute__((aligned(8))); 2423} __attribute__((aligned(8)));
1038 2424
1039/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 2425/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1054,6 +2440,12 @@ struct bpf_sock_addr {
1054 __u32 family; /* Allows 4-byte read, but no write */ 2440 __u32 family; /* Allows 4-byte read, but no write */
1055 __u32 type; /* Allows 4-byte read, but no write */ 2441 __u32 type; /* Allows 4-byte read, but no write */
1056 __u32 protocol; /* Allows 4-byte read, but no write */ 2442 __u32 protocol; /* Allows 4-byte read, but no write */
2443 __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
2444 * Stored in network byte order.
2445 */
2446 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
2447 * Stored in network byte order.
2448 */
1057}; 2449};
1058 2450
1059/* User bpf_sock_ops struct to access socket values and specify request ops 2451/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -1214,4 +2606,64 @@ struct bpf_raw_tracepoint_args {
1214 __u64 args[0]; 2606 __u64 args[0];
1215}; 2607};
1216 2608
2609/* DIRECT: Skip the FIB rules and go to FIB table associated with device
2610 * OUTPUT: Do lookup from egress perspective; default is ingress
2611 */
2612#define BPF_FIB_LOOKUP_DIRECT BIT(0)
2613#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
2614
2615struct bpf_fib_lookup {
2616 /* input: network family for lookup (AF_INET, AF_INET6)
2617 * output: network family of egress nexthop
2618 */
2619 __u8 family;
2620
2621 /* set if lookup is to consider L4 data - e.g., FIB rules */
2622 __u8 l4_protocol;
2623 __be16 sport;
2624 __be16 dport;
2625
2626 /* total length of packet from network header - used for MTU check */
2627 __u16 tot_len;
2628 __u32 ifindex; /* L3 device index for lookup */
2629
2630 union {
2631 /* inputs to lookup */
2632 __u8 tos; /* AF_INET */
2633 __be32 flowlabel; /* AF_INET6 */
2634
2635 /* output: metric of fib result (IPv4/IPv6 only) */
2636 __u32 rt_metric;
2637 };
2638
2639 union {
2640 __be32 ipv4_src;
2641 __u32 ipv6_src[4]; /* in6_addr; network order */
2642 };
2643
2644 /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
2645 * network header. output: bpf_fib_lookup sets to gateway address
2646 * if FIB lookup returns gateway route
2647 */
2648 union {
2649 __be32 ipv4_dst;
2650 __u32 ipv6_dst[4]; /* in6_addr; network order */
2651 };
2652
2653 /* output */
2654 __be16 h_vlan_proto;
2655 __be16 h_vlan_TCI;
2656 __u8 smac[6]; /* ETH_ALEN */
2657 __u8 dmac[6]; /* ETH_ALEN */
2658};
2659
2660enum bpf_task_fd_type {
2661 BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
2662 BPF_FD_TYPE_TRACEPOINT, /* tp name */
2663 BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
2664 BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
2665 BPF_FD_TYPE_UPROBE, /* filename + offset */
2666 BPF_FD_TYPE_URETPROBE, /* filename + offset */
2667};
2668
1217#endif /* _UAPI__LINUX_BPF_H__ */ 2669#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 000000000000..0b5ddbe135a4
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,113 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/* Copyright (c) 2018 Facebook */
3#ifndef _UAPI__LINUX_BTF_H__
4#define _UAPI__LINUX_BTF_H__
5
6#include <linux/types.h>
7
8#define BTF_MAGIC 0xeB9F
9#define BTF_VERSION 1
10
11struct btf_header {
12 __u16 magic;
13 __u8 version;
14 __u8 flags;
15 __u32 hdr_len;
16
17 /* All offsets are in bytes relative to the end of this header */
18 __u32 type_off; /* offset of type section */
19 __u32 type_len; /* length of type section */
20 __u32 str_off; /* offset of string section */
21 __u32 str_len; /* length of string section */
22};
23
24/* Max # of type identifier */
25#define BTF_MAX_TYPE 0x0000ffff
26/* Max offset into the string section */
27#define BTF_MAX_NAME_OFFSET 0x0000ffff
28/* Max # of struct/union/enum members or func args */
29#define BTF_MAX_VLEN 0xffff
30
31struct btf_type {
32 __u32 name_off;
33 /* "info" bits arrangement
34 * bits 0-15: vlen (e.g. # of struct's members)
35 * bits 16-23: unused
36 * bits 24-27: kind (e.g. int, ptr, array...etc)
37 * bits 28-31: unused
38 */
39 __u32 info;
40 /* "size" is used by INT, ENUM, STRUCT and UNION.
41 * "size" tells the size of the type it is describing.
42 *
43 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
44 * "type" is a type_id referring to another type.
45 */
46 union {
47 __u32 size;
48 __u32 type;
49 };
50};
51
52#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
53#define BTF_INFO_VLEN(info) ((info) & 0xffff)
54
55#define BTF_KIND_UNKN 0 /* Unknown */
56#define BTF_KIND_INT 1 /* Integer */
57#define BTF_KIND_PTR 2 /* Pointer */
58#define BTF_KIND_ARRAY 3 /* Array */
59#define BTF_KIND_STRUCT 4 /* Struct */
60#define BTF_KIND_UNION 5 /* Union */
61#define BTF_KIND_ENUM 6 /* Enumeration */
62#define BTF_KIND_FWD 7 /* Forward */
63#define BTF_KIND_TYPEDEF 8 /* Typedef */
64#define BTF_KIND_VOLATILE 9 /* Volatile */
65#define BTF_KIND_CONST 10 /* Const */
66#define BTF_KIND_RESTRICT 11 /* Restrict */
67#define BTF_KIND_MAX 11
68#define NR_BTF_KINDS 12
69
70/* For some specific BTF_KIND, "struct btf_type" is immediately
71 * followed by extra data.
72 */
73
74/* BTF_KIND_INT is followed by a u32 and the following
75 * is the 32 bits arrangement:
76 */
77#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
78#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
79#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
80
81/* Attributes stored in the BTF_INT_ENCODING */
82#define BTF_INT_SIGNED (1 << 0)
83#define BTF_INT_CHAR (1 << 1)
84#define BTF_INT_BOOL (1 << 2)
85
86/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
87 * The exact number of btf_enum is stored in the vlen (of the
88 * info in "struct btf_type").
89 */
90struct btf_enum {
91 __u32 name_off;
92 __s32 val;
93};
94
95/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
96struct btf_array {
97 __u32 type;
98 __u32 index_type;
99 __u32 nelems;
100};
101
102/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
103 * by multiple "struct btf_member". The exact number
104 * of btf_member is stored in the vlen (of the info in
105 * "struct btf_type").
106 */
107struct btf_member {
108 __u32 name_off;
109 __u32 type;
110 __u32 offset; /* offset in bits */
111};
112
113#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h
new file mode 100644
index 000000000000..841573019ae1
--- /dev/null
+++ b/tools/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * ERSPAN Tunnel Metadata
4 *
5 * Copyright (c) 2018 VMware
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * Userspace API for metadata mode ERSPAN tunnel
12 */
13#ifndef _UAPI_ERSPAN_H
14#define _UAPI_ERSPAN_H
15
16#include <linux/types.h> /* For __beXX in userspace */
17#include <asm/byteorder.h>
18
19/* ERSPAN version 2 metadata header */
20struct erspan_md2 {
21 __be32 timestamp;
22 __be16 sgt; /* security group tag */
23#if defined(__LITTLE_ENDIAN_BITFIELD)
24 __u8 hwid_upper:2,
25 ft:5,
26 p:1;
27 __u8 o:1,
28 gra:2,
29 dir:1,
30 hwid:4;
31#elif defined(__BIG_ENDIAN_BITFIELD)
32 __u8 p:1,
33 ft:5,
34 hwid_upper:2;
35 __u8 hwid:4,
36 dir:1,
37 gra:2,
38 o:1;
39#else
40#error "Please fix <asm/byteorder.h>"
41#endif
42};
43
44struct erspan_metadata {
45 int version;
46 union {
47 __be32 index; /* Version 1 (type II)*/
48 struct erspan_md2 md2; /* Version 2 (type III) */
49 } u;
50};
51
52#endif /* _UAPI_ERSPAN_H */
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
new file mode 100644
index 000000000000..f189931042a7
--- /dev/null
+++ b/tools/include/uapi/linux/lirc.h
@@ -0,0 +1,217 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * lirc.h - linux infrared remote control header file
4 * last modified 2010/07/13 by Jarod Wilson
5 */
6
7#ifndef _LINUX_LIRC_H
8#define _LINUX_LIRC_H
9
10#include <linux/types.h>
11#include <linux/ioctl.h>
12
13#define PULSE_BIT 0x01000000
14#define PULSE_MASK 0x00FFFFFF
15
16#define LIRC_MODE2_SPACE 0x00000000
17#define LIRC_MODE2_PULSE 0x01000000
18#define LIRC_MODE2_FREQUENCY 0x02000000
19#define LIRC_MODE2_TIMEOUT 0x03000000
20
21#define LIRC_VALUE_MASK 0x00FFFFFF
22#define LIRC_MODE2_MASK 0xFF000000
23
24#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
25#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
26#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
27#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
28
29#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
30#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
31
32#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
33#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
34#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
35#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
36
37/* used heavily by lirc userspace */
38#define lirc_t int
39
40/*** lirc compatible hardware features ***/
41
42#define LIRC_MODE2SEND(x) (x)
43#define LIRC_SEND2MODE(x) (x)
44#define LIRC_MODE2REC(x) ((x) << 16)
45#define LIRC_REC2MODE(x) ((x) >> 16)
46
47#define LIRC_MODE_RAW 0x00000001
48#define LIRC_MODE_PULSE 0x00000002
49#define LIRC_MODE_MODE2 0x00000004
50#define LIRC_MODE_SCANCODE 0x00000008
51#define LIRC_MODE_LIRCCODE 0x00000010
52
53
54#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW)
55#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE)
56#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2)
57#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
58
59#define LIRC_CAN_SEND_MASK 0x0000003f
60
61#define LIRC_CAN_SET_SEND_CARRIER 0x00000100
62#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200
63#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400
64
65#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW)
66#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE)
67#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2)
68#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE)
69#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
70
71#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
72
73#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16)
74#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
75
76#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
77#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000
78#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000
79#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000
80#define LIRC_CAN_SET_REC_FILTER 0x08000000
81
82#define LIRC_CAN_MEASURE_CARRIER 0x02000000
83#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000
84
85#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
86#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
87
88#define LIRC_CAN_NOTIFY_DECODE 0x01000000
89
90/*** IOCTL commands for lirc driver ***/
91
92#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32)
93
94#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32)
95#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32)
96#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32)
97
98#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32)
99#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32)
100
101/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
102#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32)
103
104#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32)
105#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32)
106/* Note: these can reset the according pulse_width */
107#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32)
108#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32)
109#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32)
110#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32)
111
112/*
113 * when a timeout != 0 is set the driver will send a
114 * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
115 * never sent, timeout is disabled by default
116 */
117#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32)
118
119/* 1 enables, 0 disables timeout reports in MODE2 */
120#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32)
121
122/*
123 * if enabled from the next key press on the driver will send
124 * LIRC_MODE2_FREQUENCY packets
125 */
126#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32)
127
128/*
129 * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
130 * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
131 */
132#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32)
133
134#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32)
135
136/*
137 * struct lirc_scancode - decoded scancode with protocol for use with
138 * LIRC_MODE_SCANCODE
139 *
140 * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
141 * was decoded.
142 * @flags: should be 0 for transmit. When receiving scancodes,
143 * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
144 * depending on the protocol
145 * @rc_proto: see enum rc_proto
146 * @keycode: the translated keycode. Set to 0 for transmit.
147 * @scancode: the scancode received or to be sent
148 */
149struct lirc_scancode {
150 __u64 timestamp;
151 __u16 flags;
152 __u16 rc_proto;
153 __u32 keycode;
154 __u64 scancode;
155};
156
157/* Set if the toggle bit of rc-5 or rc-6 is enabled */
158#define LIRC_SCANCODE_FLAG_TOGGLE 1
159/* Set if this is a nec or sanyo repeat */
160#define LIRC_SCANCODE_FLAG_REPEAT 2
161
162/**
163 * enum rc_proto - the Remote Controller protocol
164 *
165 * @RC_PROTO_UNKNOWN: Protocol not known
166 * @RC_PROTO_OTHER: Protocol known but proprietary
167 * @RC_PROTO_RC5: Philips RC5 protocol
168 * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
169 * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
170 * @RC_PROTO_JVC: JVC protocol
171 * @RC_PROTO_SONY12: Sony 12 bit protocol
172 * @RC_PROTO_SONY15: Sony 15 bit protocol
173 * @RC_PROTO_SONY20: Sony 20 bit protocol
174 * @RC_PROTO_NEC: NEC protocol
175 * @RC_PROTO_NECX: Extended NEC protocol
176 * @RC_PROTO_NEC32: NEC 32 bit protocol
177 * @RC_PROTO_SANYO: Sanyo protocol
178 * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
179 * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
180 * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
181 * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
182 * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
183 * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
184 * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
185 * @RC_PROTO_SHARP: Sharp protocol
186 * @RC_PROTO_XMP: XMP protocol
187 * @RC_PROTO_CEC: CEC protocol
188 * @RC_PROTO_IMON: iMon Pad protocol
189 */
190enum rc_proto {
191 RC_PROTO_UNKNOWN = 0,
192 RC_PROTO_OTHER = 1,
193 RC_PROTO_RC5 = 2,
194 RC_PROTO_RC5X_20 = 3,
195 RC_PROTO_RC5_SZ = 4,
196 RC_PROTO_JVC = 5,
197 RC_PROTO_SONY12 = 6,
198 RC_PROTO_SONY15 = 7,
199 RC_PROTO_SONY20 = 8,
200 RC_PROTO_NEC = 9,
201 RC_PROTO_NECX = 10,
202 RC_PROTO_NEC32 = 11,
203 RC_PROTO_SANYO = 12,
204 RC_PROTO_MCIR2_KBD = 13,
205 RC_PROTO_MCIR2_MSE = 14,
206 RC_PROTO_RC6_0 = 15,
207 RC_PROTO_RC6_6A_20 = 16,
208 RC_PROTO_RC6_6A_24 = 17,
209 RC_PROTO_RC6_6A_32 = 18,
210 RC_PROTO_RC6_MCE = 19,
211 RC_PROTO_SHARP = 20,
212 RC_PROTO_XMP = 21,
213 RC_PROTO_CEC = 22,
214 RC_PROTO_IMON = 23,
215};
216
217#endif
diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
new file mode 100644
index 000000000000..286e8d6a8e98
--- /dev/null
+++ b/tools/include/uapi/linux/seg6.h
@@ -0,0 +1,55 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/*
3 * SR-IPv6 implementation
4 *
5 * Author:
6 * David Lebrun <david.lebrun@uclouvain.be>
7 *
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#ifndef _UAPI_LINUX_SEG6_H
16#define _UAPI_LINUX_SEG6_H
17
18#include <linux/types.h>
19#include <linux/in6.h> /* For struct in6_addr. */
20
21/*
22 * SRH
23 */
24struct ipv6_sr_hdr {
25 __u8 nexthdr;
26 __u8 hdrlen;
27 __u8 type;
28 __u8 segments_left;
29 __u8 first_segment; /* Represents the last_entry field of SRH */
30 __u8 flags;
31 __u16 tag;
32
33 struct in6_addr segments[0];
34};
35
36#define SR6_FLAG1_PROTECTED (1 << 6)
37#define SR6_FLAG1_OAM (1 << 5)
38#define SR6_FLAG1_ALERT (1 << 4)
39#define SR6_FLAG1_HMAC (1 << 3)
40
41#define SR6_TLV_INGRESS 1
42#define SR6_TLV_EGRESS 2
43#define SR6_TLV_OPAQUE 3
44#define SR6_TLV_PADDING 4
45#define SR6_TLV_HMAC 5
46
47#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
48
49struct sr6_tlv {
50 __u8 type;
51 __u8 len;
52 __u8 data[0];
53};
54
55#endif
diff --git a/tools/include/uapi/linux/seg6_local.h b/tools/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000000..edc138bdc56d
--- /dev/null
+++ b/tools/include/uapi/linux/seg6_local.h
@@ -0,0 +1,80 @@
1/*
2 * SR-IPv6 implementation
3 *
4 * Author:
5 * David Lebrun <david.lebrun@uclouvain.be>
6 *
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _UAPI_LINUX_SEG6_LOCAL_H
15#define _UAPI_LINUX_SEG6_LOCAL_H
16
17#include <linux/seg6.h>
18
19enum {
20 SEG6_LOCAL_UNSPEC,
21 SEG6_LOCAL_ACTION,
22 SEG6_LOCAL_SRH,
23 SEG6_LOCAL_TABLE,
24 SEG6_LOCAL_NH4,
25 SEG6_LOCAL_NH6,
26 SEG6_LOCAL_IIF,
27 SEG6_LOCAL_OIF,
28 SEG6_LOCAL_BPF,
29 __SEG6_LOCAL_MAX,
30};
31#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
32
33enum {
34 SEG6_LOCAL_ACTION_UNSPEC = 0,
35 /* node segment */
36 SEG6_LOCAL_ACTION_END = 1,
37 /* adjacency segment (IPv6 cross-connect) */
38 SEG6_LOCAL_ACTION_END_X = 2,
39 /* lookup of next seg NH in table */
40 SEG6_LOCAL_ACTION_END_T = 3,
41 /* decap and L2 cross-connect */
42 SEG6_LOCAL_ACTION_END_DX2 = 4,
43 /* decap and IPv6 cross-connect */
44 SEG6_LOCAL_ACTION_END_DX6 = 5,
45 /* decap and IPv4 cross-connect */
46 SEG6_LOCAL_ACTION_END_DX4 = 6,
47 /* decap and lookup of DA in v6 table */
48 SEG6_LOCAL_ACTION_END_DT6 = 7,
49 /* decap and lookup of DA in v4 table */
50 SEG6_LOCAL_ACTION_END_DT4 = 8,
51 /* binding segment with insertion */
52 SEG6_LOCAL_ACTION_END_B6 = 9,
53 /* binding segment with encapsulation */
54 SEG6_LOCAL_ACTION_END_B6_ENCAP = 10,
55 /* binding segment with MPLS encap */
56 SEG6_LOCAL_ACTION_END_BM = 11,
57 /* lookup last seg in table */
58 SEG6_LOCAL_ACTION_END_S = 12,
59 /* forward to SR-unaware VNF with static proxy */
60 SEG6_LOCAL_ACTION_END_AS = 13,
61 /* forward to SR-unaware VNF with masquerading */
62 SEG6_LOCAL_ACTION_END_AM = 14,
63 /* custom BPF action */
64 SEG6_LOCAL_ACTION_END_BPF = 15,
65
66 __SEG6_LOCAL_ACTION_MAX,
67};
68
69#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
70
71enum {
72 SEG6_LOCAL_BPF_PROG_UNSPEC,
73 SEG6_LOCAL_BPF_PROG,
74 SEG6_LOCAL_BPF_PROG_NAME,
75 __SEG6_LOCAL_BPF_PROG_MAX,
76};
77
78#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
79
80#endif