aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf-cgroup.h13
-rw-r--r--include/linux/bpf.h78
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/cgroup.h18
-rw-r--r--include/linux/filter.h18
5 files changed, 126 insertions, 17 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index cb3c6b3b89c8..b631ee75762d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/jump_label.h> 7#include <linux/jump_label.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <linux/percpu-refcount.h>
9#include <linux/rbtree.h> 10#include <linux/rbtree.h>
10#include <uapi/linux/bpf.h> 11#include <uapi/linux/bpf.h>
11 12
@@ -71,11 +72,17 @@ struct cgroup_bpf {
71 u32 flags[MAX_BPF_ATTACH_TYPE]; 72 u32 flags[MAX_BPF_ATTACH_TYPE];
72 73
73 /* temp storage for effective prog array used by prog_attach/detach */ 74 /* temp storage for effective prog array used by prog_attach/detach */
74 struct bpf_prog_array __rcu *inactive; 75 struct bpf_prog_array *inactive;
76
77 /* reference counter used to detach bpf programs after cgroup removal */
78 struct percpu_ref refcnt;
79
80 /* cgroup_bpf is released using a work queue */
81 struct work_struct release_work;
75}; 82};
76 83
77void cgroup_bpf_put(struct cgroup *cgrp);
78int cgroup_bpf_inherit(struct cgroup *cgrp); 84int cgroup_bpf_inherit(struct cgroup *cgrp);
85void cgroup_bpf_offline(struct cgroup *cgrp);
79 86
80int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 87int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
81 enum bpf_attach_type type, u32 flags); 88 enum bpf_attach_type type, u32 flags);
@@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
283 290
284struct bpf_prog; 291struct bpf_prog;
285struct cgroup_bpf {}; 292struct cgroup_bpf {};
286static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
287static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 293static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
294static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
288 295
289static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, 296static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
290 enum bpf_prog_type ptype, 297 enum bpf_prog_type ptype,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4fb3aa2dc975..e5a309e6a400 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -66,6 +66,11 @@ struct bpf_map_ops {
66 u64 imm, u32 *off); 66 u64 imm, u32 *off);
67}; 67};
68 68
69struct bpf_map_memory {
70 u32 pages;
71 struct user_struct *user;
72};
73
69struct bpf_map { 74struct bpf_map {
70 /* The first two cachelines with read-mostly members of which some 75 /* The first two cachelines with read-mostly members of which some
71 * are also accessed in fast-path (e.g. ops, max_entries). 76 * are also accessed in fast-path (e.g. ops, max_entries).
@@ -86,7 +91,7 @@ struct bpf_map {
86 u32 btf_key_type_id; 91 u32 btf_key_type_id;
87 u32 btf_value_type_id; 92 u32 btf_value_type_id;
88 struct btf *btf; 93 struct btf *btf;
89 u32 pages; 94 struct bpf_map_memory memory;
90 bool unpriv_array; 95 bool unpriv_array;
91 bool frozen; /* write-once */ 96 bool frozen; /* write-once */
92 /* 48 bytes hole */ 97 /* 48 bytes hole */
@@ -94,8 +99,7 @@ struct bpf_map {
94 /* The 3rd and 4th cacheline with misc members to avoid false sharing 99 /* The 3rd and 4th cacheline with misc members to avoid false sharing
95 * particularly with refcounting. 100 * particularly with refcounting.
96 */ 101 */
97 struct user_struct *user ____cacheline_aligned; 102 atomic_t refcnt ____cacheline_aligned;
98 atomic_t refcnt;
99 atomic_t usercnt; 103 atomic_t usercnt;
100 struct work_struct work; 104 struct work_struct work;
101 char name[BPF_OBJ_NAME_LEN]; 105 char name[BPF_OBJ_NAME_LEN];
@@ -370,6 +374,7 @@ struct bpf_prog_aux {
370 u32 id; 374 u32 id;
371 u32 func_cnt; /* used by non-func prog as the number of func progs */ 375 u32 func_cnt; /* used by non-func prog as the number of func progs */
372 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ 376 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
377 bool verifier_zext; /* Zero extensions has been inserted by verifier. */
373 bool offload_requested; 378 bool offload_requested;
374 struct bpf_prog **func; 379 struct bpf_prog **func;
375 void *jit_data; /* JIT specific data. arch dependent */ 380 void *jit_data; /* JIT specific data. arch dependent */
@@ -513,17 +518,17 @@ struct bpf_prog_array {
513}; 518};
514 519
515struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); 520struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
516void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); 521void bpf_prog_array_free(struct bpf_prog_array *progs);
517int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); 522int bpf_prog_array_length(struct bpf_prog_array *progs);
518int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, 523int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
519 __u32 __user *prog_ids, u32 cnt); 524 __u32 __user *prog_ids, u32 cnt);
520 525
521void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, 526void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
522 struct bpf_prog *old_prog); 527 struct bpf_prog *old_prog);
523int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, 528int bpf_prog_array_copy_info(struct bpf_prog_array *array,
524 u32 *prog_ids, u32 request_cnt, 529 u32 *prog_ids, u32 request_cnt,
525 u32 *prog_cnt); 530 u32 *prog_cnt);
526int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, 531int bpf_prog_array_copy(struct bpf_prog_array *old_array,
527 struct bpf_prog *exclude_prog, 532 struct bpf_prog *exclude_prog,
528 struct bpf_prog *include_prog, 533 struct bpf_prog *include_prog,
529 struct bpf_prog_array **new_array); 534 struct bpf_prog_array **new_array);
@@ -551,6 +556,56 @@ _out: \
551 _ret; \ 556 _ret; \
552 }) 557 })
553 558
559/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
560 * so BPF programs can request cwr for TCP packets.
561 *
562 * Current cgroup skb programs can only return 0 or 1 (0 to drop the
563 * packet. This macro changes the behavior so the low order bit
564 * indicates whether the packet should be dropped (0) or not (1)
565 * and the next bit is a congestion notification bit. This could be
566 * used by TCP to call tcp_enter_cwr()
567 *
568 * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
569 * 0: drop packet
570 * 1: keep packet
571 * 2: drop packet and cn
572 * 3: keep packet and cn
573 *
574 * This macro then converts it to one of the NET_XMIT or an error
575 * code that is then interpreted as drop packet (and no cn):
576 * 0: NET_XMIT_SUCCESS skb should be transmitted
577 * 1: NET_XMIT_DROP skb should be dropped and cn
578 * 2: NET_XMIT_CN skb should be transmitted and cn
579 * 3: -EPERM skb should be dropped
580 */
581#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
582 ({ \
583 struct bpf_prog_array_item *_item; \
584 struct bpf_prog *_prog; \
585 struct bpf_prog_array *_array; \
586 u32 ret; \
587 u32 _ret = 1; \
588 u32 _cn = 0; \
589 preempt_disable(); \
590 rcu_read_lock(); \
591 _array = rcu_dereference(array); \
592 _item = &_array->items[0]; \
593 while ((_prog = READ_ONCE(_item->prog))) { \
594 bpf_cgroup_storage_set(_item->cgroup_storage); \
595 ret = func(_prog, ctx); \
596 _ret &= (ret & 1); \
597 _cn |= (ret & 2); \
598 _item++; \
599 } \
600 rcu_read_unlock(); \
601 preempt_enable(); \
602 if (_ret) \
603 _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
604 else \
605 _ret = (_cn ? NET_XMIT_DROP : -EPERM); \
606 _ret; \
607 })
608
554#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ 609#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
555 __BPF_PROG_RUN_ARRAY(array, ctx, func, false) 610 __BPF_PROG_RUN_ARRAY(array, ctx, func, false)
556 611
@@ -595,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
595struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); 650struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
596void bpf_map_put_with_uref(struct bpf_map *map); 651void bpf_map_put_with_uref(struct bpf_map *map);
597void bpf_map_put(struct bpf_map *map); 652void bpf_map_put(struct bpf_map *map);
598int bpf_map_precharge_memlock(u32 pages);
599int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); 653int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
600void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); 654void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
655int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
656void bpf_map_charge_finish(struct bpf_map_memory *mem);
657void bpf_map_charge_move(struct bpf_map_memory *dst,
658 struct bpf_map_memory *src);
601void *bpf_map_area_alloc(size_t size, int numa_node); 659void *bpf_map_area_alloc(size_t size, int numa_node);
602void bpf_map_area_free(void *base); 660void bpf_map_area_free(void *base);
603void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); 661void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1305ccbd8fe6..704ed7971472 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -36,9 +36,11 @@
36 */ 36 */
37enum bpf_reg_liveness { 37enum bpf_reg_liveness {
38 REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ 38 REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
39 REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ 39 REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
40 REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ 40 REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
41 REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */ 41 REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
42 REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
43 REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
42}; 44};
43 45
44struct bpf_reg_state { 46struct bpf_reg_state {
@@ -131,6 +133,11 @@ struct bpf_reg_state {
131 * pointing to bpf_func_state. 133 * pointing to bpf_func_state.
132 */ 134 */
133 u32 frameno; 135 u32 frameno;
136 /* Tracks subreg definition. The stored value is the insn_idx of the
137 * writing insn. This is safe because subreg_def is used before any insn
138 * patching which only happens after main verification finished.
139 */
140 s32 subreg_def;
134 enum bpf_reg_liveness live; 141 enum bpf_reg_liveness live;
135}; 142};
136 143
@@ -187,6 +194,7 @@ struct bpf_func_state {
187struct bpf_verifier_state { 194struct bpf_verifier_state {
188 /* call stack tracking */ 195 /* call stack tracking */
189 struct bpf_func_state *frame[MAX_CALL_FRAMES]; 196 struct bpf_func_state *frame[MAX_CALL_FRAMES];
197 u32 insn_idx;
190 u32 curframe; 198 u32 curframe;
191 u32 active_spin_lock; 199 u32 active_spin_lock;
192 bool speculative; 200 bool speculative;
@@ -232,7 +240,9 @@ struct bpf_insn_aux_data {
232 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ 240 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
233 int sanitize_stack_off; /* stack slot to be cleared */ 241 int sanitize_stack_off; /* stack slot to be cleared */
234 bool seen; /* this insn was processed by the verifier */ 242 bool seen; /* this insn was processed by the verifier */
243 bool zext_dst; /* this insn zero extends dst reg */
235 u8 alu_state; /* used in combination with alu_limit */ 244 u8 alu_state; /* used in combination with alu_limit */
245 bool prune_point;
236 unsigned int orig_idx; /* original instruction index */ 246 unsigned int orig_idx; /* original instruction index */
237}; 247};
238 248
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c0077adeea83..49e8facf7c4a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
924 924
925#endif /* !CONFIG_CGROUPS */ 925#endif /* !CONFIG_CGROUPS */
926 926
927#ifdef CONFIG_CGROUP_BPF
928static inline void cgroup_bpf_get(struct cgroup *cgrp)
929{
930 percpu_ref_get(&cgrp->bpf.refcnt);
931}
932
933static inline void cgroup_bpf_put(struct cgroup *cgrp)
934{
935 percpu_ref_put(&cgrp->bpf.refcnt);
936}
937
938#else /* CONFIG_CGROUP_BPF */
939
940static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
941static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
942
943#endif /* CONFIG_CGROUP_BPF */
944
927#endif /* _LINUX_CGROUP_H */ 945#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7148bab96943..43b45d6db36d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -160,6 +160,20 @@ struct ctl_table_header;
160 .off = 0, \ 160 .off = 0, \
161 .imm = IMM }) 161 .imm = IMM })
162 162
163/* Special form of mov32, used for doing explicit zero extension on dst. */
164#define BPF_ZEXT_REG(DST) \
165 ((struct bpf_insn) { \
166 .code = BPF_ALU | BPF_MOV | BPF_X, \
167 .dst_reg = DST, \
168 .src_reg = DST, \
169 .off = 0, \
170 .imm = 1 })
171
172static inline bool insn_is_zext(const struct bpf_insn *insn)
173{
174 return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
175}
176
163/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ 177/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
164#define BPF_LD_IMM64(DST, IMM) \ 178#define BPF_LD_IMM64(DST, IMM) \
165 BPF_LD_IMM64_RAW(DST, 0, IMM) 179 BPF_LD_IMM64_RAW(DST, 0, IMM)
@@ -512,7 +526,8 @@ struct bpf_prog {
512 blinded:1, /* Was blinded */ 526 blinded:1, /* Was blinded */
513 is_func:1, /* program is a bpf function */ 527 is_func:1, /* program is a bpf function */
514 kprobe_override:1, /* Do we override a kprobe? */ 528 kprobe_override:1, /* Do we override a kprobe? */
515 has_callchain_buf:1; /* callchain buffer allocated? */ 529 has_callchain_buf:1, /* callchain buffer allocated? */
530 enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
516 enum bpf_prog_type type; /* Type of BPF program */ 531 enum bpf_prog_type type; /* Type of BPF program */
517 enum bpf_attach_type expected_attach_type; /* For some prog types */ 532 enum bpf_attach_type expected_attach_type; /* For some prog types */
518 u32 len; /* Number of filter blocks */ 533 u32 len; /* Number of filter blocks */
@@ -811,6 +826,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
811 826
812struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); 827struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
813void bpf_jit_compile(struct bpf_prog *prog); 828void bpf_jit_compile(struct bpf_prog *prog);
829bool bpf_jit_needs_zext(void);
814bool bpf_helper_changes_pkt_data(void *func); 830bool bpf_helper_changes_pkt_data(void *func);
815 831
816static inline bool bpf_dump_raw_ok(void) 832static inline bool bpf_dump_raw_ok(void)