diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/bpf-cgroup.h | 13 | ||||
-rw-r--r-- | include/linux/bpf.h | 78 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 16 | ||||
-rw-r--r-- | include/linux/cgroup.h | 18 | ||||
-rw-r--r-- | include/linux/filter.h | 18 |
5 files changed, 126 insertions, 17 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index cb3c6b3b89c8..b631ee75762d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/jump_label.h> | 7 | #include <linux/jump_label.h> |
8 | #include <linux/percpu.h> | 8 | #include <linux/percpu.h> |
9 | #include <linux/percpu-refcount.h> | ||
9 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
10 | #include <uapi/linux/bpf.h> | 11 | #include <uapi/linux/bpf.h> |
11 | 12 | ||
@@ -71,11 +72,17 @@ struct cgroup_bpf { | |||
71 | u32 flags[MAX_BPF_ATTACH_TYPE]; | 72 | u32 flags[MAX_BPF_ATTACH_TYPE]; |
72 | 73 | ||
73 | /* temp storage for effective prog array used by prog_attach/detach */ | 74 | /* temp storage for effective prog array used by prog_attach/detach */ |
74 | struct bpf_prog_array __rcu *inactive; | 75 | struct bpf_prog_array *inactive; |
76 | |||
77 | /* reference counter used to detach bpf programs after cgroup removal */ | ||
78 | struct percpu_ref refcnt; | ||
79 | |||
80 | /* cgroup_bpf is released using a work queue */ | ||
81 | struct work_struct release_work; | ||
75 | }; | 82 | }; |
76 | 83 | ||
77 | void cgroup_bpf_put(struct cgroup *cgrp); | ||
78 | int cgroup_bpf_inherit(struct cgroup *cgrp); | 84 | int cgroup_bpf_inherit(struct cgroup *cgrp); |
85 | void cgroup_bpf_offline(struct cgroup *cgrp); | ||
79 | 86 | ||
80 | int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, | 87 | int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, |
81 | enum bpf_attach_type type, u32 flags); | 88 | enum bpf_attach_type type, u32 flags); |
@@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, | |||
283 | 290 | ||
284 | struct bpf_prog; | 291 | struct bpf_prog; |
285 | struct cgroup_bpf {}; | 292 | struct cgroup_bpf {}; |
286 | static inline void cgroup_bpf_put(struct cgroup *cgrp) {} | ||
287 | static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } | 293 | static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } |
294 | static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} | ||
288 | 295 | ||
289 | static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, | 296 | static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, |
290 | enum bpf_prog_type ptype, | 297 | enum bpf_prog_type ptype, |
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fb3aa2dc975..e5a309e6a400 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -66,6 +66,11 @@ struct bpf_map_ops { | |||
66 | u64 imm, u32 *off); | 66 | u64 imm, u32 *off); |
67 | }; | 67 | }; |
68 | 68 | ||
69 | struct bpf_map_memory { | ||
70 | u32 pages; | ||
71 | struct user_struct *user; | ||
72 | }; | ||
73 | |||
69 | struct bpf_map { | 74 | struct bpf_map { |
70 | /* The first two cachelines with read-mostly members of which some | 75 | /* The first two cachelines with read-mostly members of which some |
71 | * are also accessed in fast-path (e.g. ops, max_entries). | 76 | * are also accessed in fast-path (e.g. ops, max_entries). |
@@ -86,7 +91,7 @@ struct bpf_map { | |||
86 | u32 btf_key_type_id; | 91 | u32 btf_key_type_id; |
87 | u32 btf_value_type_id; | 92 | u32 btf_value_type_id; |
88 | struct btf *btf; | 93 | struct btf *btf; |
89 | u32 pages; | 94 | struct bpf_map_memory memory; |
90 | bool unpriv_array; | 95 | bool unpriv_array; |
91 | bool frozen; /* write-once */ | 96 | bool frozen; /* write-once */ |
92 | /* 48 bytes hole */ | 97 | /* 48 bytes hole */ |
@@ -94,8 +99,7 @@ struct bpf_map { | |||
94 | /* The 3rd and 4th cacheline with misc members to avoid false sharing | 99 | /* The 3rd and 4th cacheline with misc members to avoid false sharing |
95 | * particularly with refcounting. | 100 | * particularly with refcounting. |
96 | */ | 101 | */ |
97 | struct user_struct *user ____cacheline_aligned; | 102 | atomic_t refcnt ____cacheline_aligned; |
98 | atomic_t refcnt; | ||
99 | atomic_t usercnt; | 103 | atomic_t usercnt; |
100 | struct work_struct work; | 104 | struct work_struct work; |
101 | char name[BPF_OBJ_NAME_LEN]; | 105 | char name[BPF_OBJ_NAME_LEN]; |
@@ -370,6 +374,7 @@ struct bpf_prog_aux { | |||
370 | u32 id; | 374 | u32 id; |
371 | u32 func_cnt; /* used by non-func prog as the number of func progs */ | 375 | u32 func_cnt; /* used by non-func prog as the number of func progs */ |
372 | u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ | 376 | u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ |
377 | bool verifier_zext; /* Zero extensions has been inserted by verifier. */ | ||
373 | bool offload_requested; | 378 | bool offload_requested; |
374 | struct bpf_prog **func; | 379 | struct bpf_prog **func; |
375 | void *jit_data; /* JIT specific data. arch dependent */ | 380 | void *jit_data; /* JIT specific data. arch dependent */ |
@@ -513,17 +518,17 @@ struct bpf_prog_array { | |||
513 | }; | 518 | }; |
514 | 519 | ||
515 | struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); | 520 | struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); |
516 | void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); | 521 | void bpf_prog_array_free(struct bpf_prog_array *progs); |
517 | int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); | 522 | int bpf_prog_array_length(struct bpf_prog_array *progs); |
518 | int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, | 523 | int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, |
519 | __u32 __user *prog_ids, u32 cnt); | 524 | __u32 __user *prog_ids, u32 cnt); |
520 | 525 | ||
521 | void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, | 526 | void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, |
522 | struct bpf_prog *old_prog); | 527 | struct bpf_prog *old_prog); |
523 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, | 528 | int bpf_prog_array_copy_info(struct bpf_prog_array *array, |
524 | u32 *prog_ids, u32 request_cnt, | 529 | u32 *prog_ids, u32 request_cnt, |
525 | u32 *prog_cnt); | 530 | u32 *prog_cnt); |
526 | int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, | 531 | int bpf_prog_array_copy(struct bpf_prog_array *old_array, |
527 | struct bpf_prog *exclude_prog, | 532 | struct bpf_prog *exclude_prog, |
528 | struct bpf_prog *include_prog, | 533 | struct bpf_prog *include_prog, |
529 | struct bpf_prog_array **new_array); | 534 | struct bpf_prog_array **new_array); |
@@ -551,6 +556,56 @@ _out: \ | |||
551 | _ret; \ | 556 | _ret; \ |
552 | }) | 557 | }) |
553 | 558 | ||
559 | /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs | ||
560 | * so BPF programs can request cwr for TCP packets. | ||
561 | * | ||
562 | * Current cgroup skb programs can only return 0 or 1 (0 to drop the | ||
563 | * packet. This macro changes the behavior so the low order bit | ||
564 | * indicates whether the packet should be dropped (0) or not (1) | ||
565 | * and the next bit is a congestion notification bit. This could be | ||
566 | * used by TCP to call tcp_enter_cwr() | ||
567 | * | ||
568 | * Hence, new allowed return values of CGROUP EGRESS BPF programs are: | ||
569 | * 0: drop packet | ||
570 | * 1: keep packet | ||
571 | * 2: drop packet and cn | ||
572 | * 3: keep packet and cn | ||
573 | * | ||
574 | * This macro then converts it to one of the NET_XMIT or an error | ||
575 | * code that is then interpreted as drop packet (and no cn): | ||
576 | * 0: NET_XMIT_SUCCESS skb should be transmitted | ||
577 | * 1: NET_XMIT_DROP skb should be dropped and cn | ||
578 | * 2: NET_XMIT_CN skb should be transmitted and cn | ||
579 | * 3: -EPERM skb should be dropped | ||
580 | */ | ||
581 | #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ | ||
582 | ({ \ | ||
583 | struct bpf_prog_array_item *_item; \ | ||
584 | struct bpf_prog *_prog; \ | ||
585 | struct bpf_prog_array *_array; \ | ||
586 | u32 ret; \ | ||
587 | u32 _ret = 1; \ | ||
588 | u32 _cn = 0; \ | ||
589 | preempt_disable(); \ | ||
590 | rcu_read_lock(); \ | ||
591 | _array = rcu_dereference(array); \ | ||
592 | _item = &_array->items[0]; \ | ||
593 | while ((_prog = READ_ONCE(_item->prog))) { \ | ||
594 | bpf_cgroup_storage_set(_item->cgroup_storage); \ | ||
595 | ret = func(_prog, ctx); \ | ||
596 | _ret &= (ret & 1); \ | ||
597 | _cn |= (ret & 2); \ | ||
598 | _item++; \ | ||
599 | } \ | ||
600 | rcu_read_unlock(); \ | ||
601 | preempt_enable(); \ | ||
602 | if (_ret) \ | ||
603 | _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ | ||
604 | else \ | ||
605 | _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ | ||
606 | _ret; \ | ||
607 | }) | ||
608 | |||
554 | #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ | 609 | #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ |
555 | __BPF_PROG_RUN_ARRAY(array, ctx, func, false) | 610 | __BPF_PROG_RUN_ARRAY(array, ctx, func, false) |
556 | 611 | ||
@@ -595,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f); | |||
595 | struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); | 650 | struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); |
596 | void bpf_map_put_with_uref(struct bpf_map *map); | 651 | void bpf_map_put_with_uref(struct bpf_map *map); |
597 | void bpf_map_put(struct bpf_map *map); | 652 | void bpf_map_put(struct bpf_map *map); |
598 | int bpf_map_precharge_memlock(u32 pages); | ||
599 | int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); | 653 | int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); |
600 | void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); | 654 | void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); |
655 | int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size); | ||
656 | void bpf_map_charge_finish(struct bpf_map_memory *mem); | ||
657 | void bpf_map_charge_move(struct bpf_map_memory *dst, | ||
658 | struct bpf_map_memory *src); | ||
601 | void *bpf_map_area_alloc(size_t size, int numa_node); | 659 | void *bpf_map_area_alloc(size_t size, int numa_node); |
602 | void bpf_map_area_free(void *base); | 660 | void bpf_map_area_free(void *base); |
603 | void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); | 661 | void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); |
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1305ccbd8fe6..704ed7971472 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h | |||
@@ -36,9 +36,11 @@ | |||
36 | */ | 36 | */ |
37 | enum bpf_reg_liveness { | 37 | enum bpf_reg_liveness { |
38 | REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ | 38 | REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ |
39 | REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ | 39 | REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */ |
40 | REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ | 40 | REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */ |
41 | REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */ | 41 | REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64, |
42 | REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */ | ||
43 | REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ | ||
42 | }; | 44 | }; |
43 | 45 | ||
44 | struct bpf_reg_state { | 46 | struct bpf_reg_state { |
@@ -131,6 +133,11 @@ struct bpf_reg_state { | |||
131 | * pointing to bpf_func_state. | 133 | * pointing to bpf_func_state. |
132 | */ | 134 | */ |
133 | u32 frameno; | 135 | u32 frameno; |
136 | /* Tracks subreg definition. The stored value is the insn_idx of the | ||
137 | * writing insn. This is safe because subreg_def is used before any insn | ||
138 | * patching which only happens after main verification finished. | ||
139 | */ | ||
140 | s32 subreg_def; | ||
134 | enum bpf_reg_liveness live; | 141 | enum bpf_reg_liveness live; |
135 | }; | 142 | }; |
136 | 143 | ||
@@ -187,6 +194,7 @@ struct bpf_func_state { | |||
187 | struct bpf_verifier_state { | 194 | struct bpf_verifier_state { |
188 | /* call stack tracking */ | 195 | /* call stack tracking */ |
189 | struct bpf_func_state *frame[MAX_CALL_FRAMES]; | 196 | struct bpf_func_state *frame[MAX_CALL_FRAMES]; |
197 | u32 insn_idx; | ||
190 | u32 curframe; | 198 | u32 curframe; |
191 | u32 active_spin_lock; | 199 | u32 active_spin_lock; |
192 | bool speculative; | 200 | bool speculative; |
@@ -232,7 +240,9 @@ struct bpf_insn_aux_data { | |||
232 | int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ | 240 | int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ |
233 | int sanitize_stack_off; /* stack slot to be cleared */ | 241 | int sanitize_stack_off; /* stack slot to be cleared */ |
234 | bool seen; /* this insn was processed by the verifier */ | 242 | bool seen; /* this insn was processed by the verifier */ |
243 | bool zext_dst; /* this insn zero extends dst reg */ | ||
235 | u8 alu_state; /* used in combination with alu_limit */ | 244 | u8 alu_state; /* used in combination with alu_limit */ |
245 | bool prune_point; | ||
236 | unsigned int orig_idx; /* original instruction index */ | 246 | unsigned int orig_idx; /* original instruction index */ |
237 | }; | 247 | }; |
238 | 248 | ||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c0077adeea83..49e8facf7c4a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task) | |||
924 | 924 | ||
925 | #endif /* !CONFIG_CGROUPS */ | 925 | #endif /* !CONFIG_CGROUPS */ |
926 | 926 | ||
927 | #ifdef CONFIG_CGROUP_BPF | ||
928 | static inline void cgroup_bpf_get(struct cgroup *cgrp) | ||
929 | { | ||
930 | percpu_ref_get(&cgrp->bpf.refcnt); | ||
931 | } | ||
932 | |||
933 | static inline void cgroup_bpf_put(struct cgroup *cgrp) | ||
934 | { | ||
935 | percpu_ref_put(&cgrp->bpf.refcnt); | ||
936 | } | ||
937 | |||
938 | #else /* CONFIG_CGROUP_BPF */ | ||
939 | |||
940 | static inline void cgroup_bpf_get(struct cgroup *cgrp) {} | ||
941 | static inline void cgroup_bpf_put(struct cgroup *cgrp) {} | ||
942 | |||
943 | #endif /* CONFIG_CGROUP_BPF */ | ||
944 | |||
927 | #endif /* _LINUX_CGROUP_H */ | 945 | #endif /* _LINUX_CGROUP_H */ |
diff --git a/include/linux/filter.h b/include/linux/filter.h index 7148bab96943..43b45d6db36d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -160,6 +160,20 @@ struct ctl_table_header; | |||
160 | .off = 0, \ | 160 | .off = 0, \ |
161 | .imm = IMM }) | 161 | .imm = IMM }) |
162 | 162 | ||
163 | /* Special form of mov32, used for doing explicit zero extension on dst. */ | ||
164 | #define BPF_ZEXT_REG(DST) \ | ||
165 | ((struct bpf_insn) { \ | ||
166 | .code = BPF_ALU | BPF_MOV | BPF_X, \ | ||
167 | .dst_reg = DST, \ | ||
168 | .src_reg = DST, \ | ||
169 | .off = 0, \ | ||
170 | .imm = 1 }) | ||
171 | |||
172 | static inline bool insn_is_zext(const struct bpf_insn *insn) | ||
173 | { | ||
174 | return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; | ||
175 | } | ||
176 | |||
163 | /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ | 177 | /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ |
164 | #define BPF_LD_IMM64(DST, IMM) \ | 178 | #define BPF_LD_IMM64(DST, IMM) \ |
165 | BPF_LD_IMM64_RAW(DST, 0, IMM) | 179 | BPF_LD_IMM64_RAW(DST, 0, IMM) |
@@ -512,7 +526,8 @@ struct bpf_prog { | |||
512 | blinded:1, /* Was blinded */ | 526 | blinded:1, /* Was blinded */ |
513 | is_func:1, /* program is a bpf function */ | 527 | is_func:1, /* program is a bpf function */ |
514 | kprobe_override:1, /* Do we override a kprobe? */ | 528 | kprobe_override:1, /* Do we override a kprobe? */ |
515 | has_callchain_buf:1; /* callchain buffer allocated? */ | 529 | has_callchain_buf:1, /* callchain buffer allocated? */ |
530 | enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */ | ||
516 | enum bpf_prog_type type; /* Type of BPF program */ | 531 | enum bpf_prog_type type; /* Type of BPF program */ |
517 | enum bpf_attach_type expected_attach_type; /* For some prog types */ | 532 | enum bpf_attach_type expected_attach_type; /* For some prog types */ |
518 | u32 len; /* Number of filter blocks */ | 533 | u32 len; /* Number of filter blocks */ |
@@ -811,6 +826,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | |||
811 | 826 | ||
812 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); | 827 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); |
813 | void bpf_jit_compile(struct bpf_prog *prog); | 828 | void bpf_jit_compile(struct bpf_prog *prog); |
829 | bool bpf_jit_needs_zext(void); | ||
814 | bool bpf_helper_changes_pkt_data(void *func); | 830 | bool bpf_helper_changes_pkt_data(void *func); |
815 | 831 | ||
816 | static inline bool bpf_dump_raw_ok(void) | 832 | static inline bool bpf_dump_raw_ok(void) |