aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-03-31 23:33:04 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-31 23:33:04 -0400
commitd4069fe6fc91d496e4d1fe38b1a8b71aeb181c50 (patch)
tree2ca8148a4b4bc20965e98e03abd417c57bc0777e
parent70ae7222c61d4f19c844c8fe75f053f8976b9552 (diff)
parent7828f20e3779e4e85e55371e0e43f5006a15fb41 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-03-31 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add raw BPF tracepoint API in order to have a BPF program type that can access kernel internal arguments of the tracepoints in their raw form similar to kprobes based BPF programs. This infrastructure also adds a new BPF_RAW_TRACEPOINT_OPEN command to BPF syscall which returns an anon-inode backed fd for the tracepoint object that allows for automatic detach of the BPF program resp. unregistering of the tracepoint probe on fd release, from Alexei. 2) Add new BPF cgroup hooks at bind() and connect() entry in order to allow BPF programs to reject, inspect or modify user space passed struct sockaddr, and as well a hook at post bind time once the port has been allocated. They are used in FB's container management engine for implementing policy, replacing fragile LD_PRELOAD wrapper intercepting bind() and connect() calls that only works in limited scenarios like glibc based apps but not for other runtimes in containerized applications, from Andrey. 3) BPF_F_INGRESS flag support has been added to sockmap programs for their redirect helper call bringing it in line with cls_bpf based programs. Support is added for both variants of sockmap programs, meaning for tx ULP hooks as well as recv skb hooks, from John. 4) Various improvements on BPF side for the nfp driver, besides others this work adds BPF map update and delete helper call support from the datapath, JITing of 32 and 64 bit XADD instructions as well as offload support of bpf_get_prandom_u32() call. Initial implementation of nfp packet cache has been tackled that optimizes memory access (see merge commit for further details), from Jakub and Jiong. 5) Removal of struct bpf_verifier_env argument from the print_bpf_insn() API has been done in order to prepare to use print_bpf_insn() soon out of perf tool directly. This makes the print_bpf_insn() API more generic and pushes the env into private data. bpftool is adjusted as well with the print_bpf_insn() argument removal, from Jiri. 6) Couple of cleanups and prep work for the upcoming BTF (BPF Type Format). The latter will reuse the current BPF verifier log as well, thus bpf_verifier_log() is further generalized, from Martin. 7) For bpf_getsockopt() and bpf_setsockopt() helpers, IPv4 IP_TOS read and write support has been added in similar fashion to existing IPv6 IPV6_TCLASS socket option we already have, from Nikita. 8) Fixes in recent sockmap scatterlist API usage, which did not use sg_init_table() for initialization thus triggering a BUG_ON() in scatterlist API when CONFIG_DEBUG_SG was enabled. This adds and uses a small helper sg_init_marker() to properly handle the affected cases, from Prashant. 9) Let the BPF core follow IDR code convention and therefore use the idr_preload() and idr_preload_end() helpers, which would also help idr_alloc_cyclic() under GFP_ATOMIC to better succeed under memory pressure, from Shaohua. 10) Last but not least, a spelling fix in an error message for the BPF cookie UID helper under BPF sample code, from Colin. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h12
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c462
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h85
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c45
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c217
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/main.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h39
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c7
-rw-r--r--drivers/net/wireless/mediatek/mt7601u/trace.h6
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/bpf-cgroup.h68
-rw-r--r--include/linux/bpf.h5
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpf_verifier.h13
-rw-r--r--include/linux/filter.h15
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/scatterlist.h18
-rw-r--r--include/linux/trace_events.h42
-rw-r--r--include/linux/tracepoint-defs.h6
-rw-r--r--include/net/addrconf.h7
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/sock.h4
-rw-r--r--include/net/udp.h1
-rw-r--r--include/trace/bpf_probe.h92
-rw-r--r--include/trace/define_trace.h1
-rw-r--r--include/trace/events/f2fs.h2
-rw-r--r--include/uapi/linux/bpf.h62
-rw-r--r--kernel/bpf/cgroup.c39
-rw-r--r--kernel/bpf/disasm.c52
-rw-r--r--kernel/bpf/disasm.h5
-rw-r--r--kernel/bpf/sockmap.c303
-rw-r--r--kernel/bpf/syscall.c184
-rw-r--r--kernel/bpf/verifier.c64
-rw-r--r--kernel/trace/bpf_trace.c206
-rw-r--r--lib/scatterlist.c9
-rw-r--r--net/core/filter.c481
-rw-r--r--net/ipv4/af_inet.c71
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/af_inet6.c66
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/udp.c20
-rw-r--r--net/mac802154/trace.h8
-rw-r--r--net/wireless/trace.h2
-rw-r--r--samples/bpf/Makefile1
-rw-r--r--samples/bpf/bpf_load.c14
-rw-r--r--samples/bpf/cookie_uid_helper_example.c2
-rw-r--r--samples/bpf/test_overhead_raw_tp_kern.c17
-rw-r--r--samples/bpf/test_overhead_user.c12
-rw-r--r--samples/sockmap/sockmap_kern.c62
-rwxr-xr-xsamples/sockmap/sockmap_test.sh40
-rw-r--r--samples/sockmap/sockmap_user.c58
-rw-r--r--security/apparmor/include/path.h7
-rw-r--r--sound/firewire/amdtp-stream-trace.h2
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c12
-rw-r--r--tools/include/uapi/linux/bpf.h62
-rw-r--r--tools/lib/bpf/bpf.c55
-rw-r--r--tools/lib/bpf/bpf.h18
-rw-r--r--tools/lib/bpf/libbpf.c113
-rw-r--r--tools/lib/bpf/libbpf.h8
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/connect4_prog.c45
-rw-r--r--tools/testing/selftests/bpf/connect6_prog.c61
-rw-r--r--tools/testing/selftests/bpf/test_progs.c91
-rw-r--r--tools/testing/selftests/bpf/test_sock.c479
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c588
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh57
77 files changed, 4265 insertions, 399 deletions
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 41fafebe3b0d..da4aa1a95b11 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
1153 cinfo.sdma_ring_size = fd->cq->nentries; 1153 cinfo.sdma_ring_size = fd->cq->nentries;
1154 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1154 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
1155 1155
1156 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1156 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
1157 if (copy_to_user((void __user *)arg, &cinfo, len)) 1157 if (copy_to_user((void __user *)arg, &cinfo, len))
1158 return -EFAULT; 1158 return -EFAULT;
1159 1159
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 4eb4cc798035..e00c8a7d559c 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
106TRACE_EVENT(hfi1_ctxt_info, 106TRACE_EVENT(hfi1_ctxt_info,
107 TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt, 107 TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
108 unsigned int subctxt, 108 unsigned int subctxt,
109 struct hfi1_ctxt_info cinfo), 109 struct hfi1_ctxt_info *cinfo),
110 TP_ARGS(dd, ctxt, subctxt, cinfo), 110 TP_ARGS(dd, ctxt, subctxt, cinfo),
111 TP_STRUCT__entry(DD_DEV_ENTRY(dd) 111 TP_STRUCT__entry(DD_DEV_ENTRY(dd)
112 __field(unsigned int, ctxt) 112 __field(unsigned int, ctxt)
@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
120 TP_fast_assign(DD_DEV_ASSIGN(dd); 120 TP_fast_assign(DD_DEV_ASSIGN(dd);
121 __entry->ctxt = ctxt; 121 __entry->ctxt = ctxt;
122 __entry->subctxt = subctxt; 122 __entry->subctxt = subctxt;
123 __entry->egrtids = cinfo.egrtids; 123 __entry->egrtids = cinfo->egrtids;
124 __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; 124 __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
125 __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; 125 __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
126 __entry->sdma_ring_size = cinfo.sdma_ring_size; 126 __entry->sdma_ring_size = cinfo->sdma_ring_size;
127 __entry->rcvegr_size = cinfo.rcvegr_size; 127 __entry->rcvegr_size = cinfo->rcvegr_size;
128 ), 128 ),
129 TP_printk("[%s] ctxt %u:%u " CINFO_FMT, 129 TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
130 __get_str(dev), 130 __get_str(dev),
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 80d3aa0fc9d3..7e298148ca26 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -218,17 +218,17 @@ nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
218 return skb; 218 return skb;
219 219
220 hdr = (struct cmsg_hdr *)skb->data; 220 hdr = (struct cmsg_hdr *)skb->data;
221 /* 0 reply_size means caller will do the validation */
222 if (reply_size && skb->len != reply_size) {
223 cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
224 skb->len, reply_size);
225 goto err_free;
226 }
227 if (hdr->type != __CMSG_REPLY(type)) { 221 if (hdr->type != __CMSG_REPLY(type)) {
228 cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n", 222 cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
229 hdr->type, __CMSG_REPLY(type)); 223 hdr->type, __CMSG_REPLY(type));
230 goto err_free; 224 goto err_free;
231 } 225 }
226 /* 0 reply_size means caller will do the validation */
227 if (reply_size && skb->len != reply_size) {
228 cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
229 type, skb->len, reply_size);
230 goto err_free;
231 }
232 232
233 return skb; 233 return skb;
234err_free: 234err_free:
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index cfcc7bcb2c67..39639ac28b01 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -41,6 +41,7 @@ enum bpf_cap_tlv_type {
41 NFP_BPF_CAP_TYPE_FUNC = 1, 41 NFP_BPF_CAP_TYPE_FUNC = 1,
42 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, 42 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
43 NFP_BPF_CAP_TYPE_MAPS = 3, 43 NFP_BPF_CAP_TYPE_MAPS = 3,
44 NFP_BPF_CAP_TYPE_RANDOM = 4,
44}; 45};
45 46
46struct nfp_bpf_cap_tlv_func { 47struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 56451edf01c2..4b631e26f199 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -103,23 +103,18 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
103/* --- Emitters --- */ 103/* --- Emitters --- */
104static void 104static void
105__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, 105__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
106 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir) 106 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
107 bool indir)
107{ 108{
108 enum cmd_ctx_swap ctx;
109 u64 insn; 109 u64 insn;
110 110
111 if (sync)
112 ctx = CMD_CTX_SWAP;
113 else
114 ctx = CMD_CTX_NO_SWAP;
115
116 insn = FIELD_PREP(OP_CMD_A_SRC, areg) | 111 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
117 FIELD_PREP(OP_CMD_CTX, ctx) | 112 FIELD_PREP(OP_CMD_CTX, ctx) |
118 FIELD_PREP(OP_CMD_B_SRC, breg) | 113 FIELD_PREP(OP_CMD_B_SRC, breg) |
119 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | 114 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
120 FIELD_PREP(OP_CMD_XFER, xfer) | 115 FIELD_PREP(OP_CMD_XFER, xfer) |
121 FIELD_PREP(OP_CMD_CNT, size) | 116 FIELD_PREP(OP_CMD_CNT, size) |
122 FIELD_PREP(OP_CMD_SIG, sync) | 117 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
123 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | 118 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
124 FIELD_PREP(OP_CMD_INDIR, indir) | 119 FIELD_PREP(OP_CMD_INDIR, indir) |
125 FIELD_PREP(OP_CMD_MODE, mode); 120 FIELD_PREP(OP_CMD_MODE, mode);
@@ -129,7 +124,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
129 124
130static void 125static void
131emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 126emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
132 swreg lreg, swreg rreg, u8 size, bool sync, bool indir) 127 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
133{ 128{
134 struct nfp_insn_re_regs reg; 129 struct nfp_insn_re_regs reg;
135 int err; 130 int err;
@@ -150,22 +145,22 @@ emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
150 return; 145 return;
151 } 146 }
152 147
153 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync, 148 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
154 indir); 149 indir);
155} 150}
156 151
157static void 152static void
158emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 153emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
159 swreg lreg, swreg rreg, u8 size, bool sync) 154 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
160{ 155{
161 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); 156 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
162} 157}
163 158
164static void 159static void
165emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 160emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
166 swreg lreg, swreg rreg, u8 size, bool sync) 161 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
167{ 162{
168 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true); 163 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
169} 164}
170 165
171static void 166static void
@@ -410,7 +405,7 @@ __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
410 FIELD_PREP(OP_LCSR_A_SRC, areg) | 405 FIELD_PREP(OP_LCSR_A_SRC, areg) |
411 FIELD_PREP(OP_LCSR_B_SRC, breg) | 406 FIELD_PREP(OP_LCSR_B_SRC, breg) |
412 FIELD_PREP(OP_LCSR_WRITE, wr) | 407 FIELD_PREP(OP_LCSR_WRITE, wr) |
413 FIELD_PREP(OP_LCSR_ADDR, addr) | 408 FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
414 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | 409 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
415 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); 410 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
416 411
@@ -438,10 +433,16 @@ static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
438 return; 433 return;
439 } 434 }
440 435
441 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, 436 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
442 false, reg.src_lmextn); 437 false, reg.src_lmextn);
443} 438}
444 439
440/* CSR value is read in following immed[gpr, 0] */
441static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
442{
443 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
444}
445
445static void emit_nop(struct nfp_prog *nfp_prog) 446static void emit_nop(struct nfp_prog *nfp_prog)
446{ 447{
447 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); 448 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
@@ -553,6 +554,19 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
553 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 554 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
554} 555}
555 556
557/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
558 * result to @dst from offset, there is no change on the other bits of @dst.
559 */
560static void
561wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
562 u8 field_len, u8 offset)
563{
564 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
565 u8 mask = ((1 << field_len) - 1) << offset;
566
567 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
568}
569
556static void 570static void
557addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 571addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
558 swreg *rega, swreg *regb) 572 swreg *rega, swreg *regb)
@@ -597,7 +611,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
597 /* Memory read from source addr into transfer-in registers. */ 611 /* Memory read from source addr into transfer-in registers. */
598 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, 612 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
599 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, 613 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
600 src_base, off, xfer_num - 1, true, len > 32); 614 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
601 615
602 /* Move from transfer-in to transfer-out. */ 616 /* Move from transfer-in to transfer-out. */
603 for (i = 0; i < xfer_num; i++) 617 for (i = 0; i < xfer_num; i++)
@@ -609,39 +623,39 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
609 /* Use single direct_ref write8. */ 623 /* Use single direct_ref write8. */
610 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 624 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
611 reg_a(meta->paired_st->dst_reg * 2), off, len - 1, 625 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
612 true); 626 CMD_CTX_SWAP);
613 } else if (len <= 32 && IS_ALIGNED(len, 4)) { 627 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
614 /* Use single direct_ref write32. */ 628 /* Use single direct_ref write32. */
615 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 629 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
616 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, 630 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
617 true); 631 CMD_CTX_SWAP);
618 } else if (len <= 32) { 632 } else if (len <= 32) {
619 /* Use single indirect_ref write8. */ 633 /* Use single indirect_ref write8. */
620 wrp_immed(nfp_prog, reg_none(), 634 wrp_immed(nfp_prog, reg_none(),
621 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); 635 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
622 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 636 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
623 reg_a(meta->paired_st->dst_reg * 2), off, 637 reg_a(meta->paired_st->dst_reg * 2), off,
624 len - 1, true); 638 len - 1, CMD_CTX_SWAP);
625 } else if (IS_ALIGNED(len, 4)) { 639 } else if (IS_ALIGNED(len, 4)) {
626 /* Use single indirect_ref write32. */ 640 /* Use single indirect_ref write32. */
627 wrp_immed(nfp_prog, reg_none(), 641 wrp_immed(nfp_prog, reg_none(),
628 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 642 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
629 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 643 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
630 reg_a(meta->paired_st->dst_reg * 2), off, 644 reg_a(meta->paired_st->dst_reg * 2), off,
631 xfer_num - 1, true); 645 xfer_num - 1, CMD_CTX_SWAP);
632 } else if (len <= 40) { 646 } else if (len <= 40) {
633 /* Use one direct_ref write32 to write the first 32-bytes, then 647 /* Use one direct_ref write32 to write the first 32-bytes, then
634 * another direct_ref write8 to write the remaining bytes. 648 * another direct_ref write8 to write the remaining bytes.
635 */ 649 */
636 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 650 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
637 reg_a(meta->paired_st->dst_reg * 2), off, 7, 651 reg_a(meta->paired_st->dst_reg * 2), off, 7,
638 true); 652 CMD_CTX_SWAP);
639 653
640 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, 654 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
641 imm_b(nfp_prog)); 655 imm_b(nfp_prog));
642 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, 656 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
643 reg_a(meta->paired_st->dst_reg * 2), off, len - 33, 657 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
644 true); 658 CMD_CTX_SWAP);
645 } else { 659 } else {
646 /* Use one indirect_ref write32 to write 4-bytes aligned length, 660 /* Use one indirect_ref write32 to write 4-bytes aligned length,
647 * then another direct_ref write8 to write the remaining bytes. 661 * then another direct_ref write8 to write the remaining bytes.
@@ -652,12 +666,12 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
652 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); 666 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
653 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 667 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
654 reg_a(meta->paired_st->dst_reg * 2), off, 668 reg_a(meta->paired_st->dst_reg * 2), off,
655 xfer_num - 2, true); 669 xfer_num - 2, CMD_CTX_SWAP);
656 new_off = meta->paired_st->off + (xfer_num - 1) * 4; 670 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
657 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); 671 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
658 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 672 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
659 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, 673 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
660 (len & 0x3) - 1, true); 674 (len & 0x3) - 1, CMD_CTX_SWAP);
661 } 675 }
662 676
663 /* TODO: The following extra load is to make sure data flow be identical 677 /* TODO: The following extra load is to make sure data flow be identical
@@ -718,7 +732,7 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
718 shift = size < 4 ? 4 - size : 0; 732 shift = size < 4 ? 4 - size : 0;
719 733
720 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, 734 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
721 pptr_reg(nfp_prog), offset, sz - 1, true); 735 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
722 736
723 i = 0; 737 i = 0;
724 if (shift) 738 if (shift)
@@ -748,7 +762,7 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
748 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 762 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
749 763
750 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, 764 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
751 lreg, rreg, sz / 4 - 1, true); 765 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
752 766
753 i = 0; 767 i = 0;
754 if (mask) 768 if (mask)
@@ -828,7 +842,7 @@ data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
828 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); 842 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
829 843
830 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 844 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
831 reg_a(dst_gpr), offset, size - 1, true); 845 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
832 846
833 return 0; 847 return 0;
834} 848}
@@ -842,7 +856,7 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
842 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); 856 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
843 857
844 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 858 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
845 reg_a(dst_gpr), offset, size - 1, true); 859 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
846 860
847 return 0; 861 return 0;
848} 862}
@@ -1339,7 +1353,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1339} 1353}
1340 1354
1341static int 1355static int
1342map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1356map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1343{ 1357{
1344 struct bpf_offloaded_map *offmap; 1358 struct bpf_offloaded_map *offmap;
1345 struct nfp_bpf_map *nfp_map; 1359 struct nfp_bpf_map *nfp_map;
@@ -1353,19 +1367,21 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1353 1367
1354 /* We only have to reload LM0 if the key is not at start of stack */ 1368 /* We only have to reload LM0 if the key is not at start of stack */
1355 lm_off = nfp_prog->stack_depth; 1369 lm_off = nfp_prog->stack_depth;
1356 lm_off += meta->arg2.var_off.value + meta->arg2.off; 1370 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1357 load_lm_ptr = meta->arg2_var_off || lm_off; 1371 load_lm_ptr = meta->arg2.var_off || lm_off;
1358 1372
1359 /* Set LM0 to start of key */ 1373 /* Set LM0 to start of key */
1360 if (load_lm_ptr) 1374 if (load_lm_ptr)
1361 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); 1375 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1376 if (meta->func_id == BPF_FUNC_map_update_elem)
1377 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1362 1378
1363 /* Load map ID into a register, it should actually fit as an immediate 1379 /* Load map ID into a register, it should actually fit as an immediate
1364 * but in case it doesn't deal with it here, not in the delay slots. 1380 * but in case it doesn't deal with it here, not in the delay slots.
1365 */ 1381 */
1366 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog)); 1382 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1367 1383
1368 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem, 1384 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1369 2, RELO_BR_HELPER); 1385 2, RELO_BR_HELPER);
1370 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; 1386 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1371 1387
@@ -1388,6 +1404,18 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1388 return 0; 1404 return 0;
1389} 1405}
1390 1406
1407static int
1408nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1409{
1410 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1411 /* CSR value is read in following immed[gpr, 0] */
1412 emit_immed(nfp_prog, reg_both(0), 0,
1413 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1414 emit_immed(nfp_prog, reg_both(1), 0,
1415 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1416 return 0;
1417}
1418
1391/* --- Callbacks --- */ 1419/* --- Callbacks --- */
1392static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1420static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1393{ 1421{
@@ -1838,6 +1866,128 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1838 tmp_reg, meta->insn.dst_reg * 2, size); 1866 tmp_reg, meta->insn.dst_reg * 2, size);
1839} 1867}
1840 1868
1869static void
1870mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
1871 struct nfp_insn_meta *meta)
1872{
1873 s16 range_start = meta->pkt_cache.range_start;
1874 s16 range_end = meta->pkt_cache.range_end;
1875 swreg src_base, off;
1876 u8 xfer_num, len;
1877 bool indir;
1878
1879 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
1880 src_base = reg_a(meta->insn.src_reg * 2);
1881 len = range_end - range_start;
1882 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
1883
1884 indir = len > 8 * REG_WIDTH;
1885 /* Setup PREV_ALU for indirect mode. */
1886 if (indir)
1887 wrp_immed(nfp_prog, reg_none(),
1888 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
1889
1890 /* Cache memory into transfer-in registers. */
1891 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
1892 off, xfer_num - 1, CMD_CTX_SWAP, indir);
1893}
1894
1895static int
1896mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
1897 struct nfp_insn_meta *meta,
1898 unsigned int size)
1899{
1900 s16 range_start = meta->pkt_cache.range_start;
1901 s16 insn_off = meta->insn.off - range_start;
1902 swreg dst_lo, dst_hi, src_lo, src_mid;
1903 u8 dst_gpr = meta->insn.dst_reg * 2;
1904 u8 len_lo = size, len_mid = 0;
1905 u8 idx = insn_off / REG_WIDTH;
1906 u8 off = insn_off % REG_WIDTH;
1907
1908 dst_hi = reg_both(dst_gpr + 1);
1909 dst_lo = reg_both(dst_gpr);
1910 src_lo = reg_xfer(idx);
1911
1912 /* The read length could involve as many as three registers. */
1913 if (size > REG_WIDTH - off) {
1914 /* Calculate the part in the second register. */
1915 len_lo = REG_WIDTH - off;
1916 len_mid = size - len_lo;
1917
1918 /* Calculate the part in the third register. */
1919 if (size > 2 * REG_WIDTH - off)
1920 len_mid = REG_WIDTH;
1921 }
1922
1923 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
1924
1925 if (!len_mid) {
1926 wrp_immed(nfp_prog, dst_hi, 0);
1927 return 0;
1928 }
1929
1930 src_mid = reg_xfer(idx + 1);
1931
1932 if (size <= REG_WIDTH) {
1933 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
1934 wrp_immed(nfp_prog, dst_hi, 0);
1935 } else {
1936 swreg src_hi = reg_xfer(idx + 2);
1937
1938 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
1939 REG_WIDTH - len_lo, len_lo);
1940 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
1941 REG_WIDTH - len_lo);
1942 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
1943 len_lo);
1944 }
1945
1946 return 0;
1947}
1948
1949static int
1950mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
1951 struct nfp_insn_meta *meta,
1952 unsigned int size)
1953{
1954 swreg dst_lo, dst_hi, src_lo;
1955 u8 dst_gpr, idx;
1956
1957 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
1958 dst_gpr = meta->insn.dst_reg * 2;
1959 dst_hi = reg_both(dst_gpr + 1);
1960 dst_lo = reg_both(dst_gpr);
1961 src_lo = reg_xfer(idx);
1962
1963 if (size < REG_WIDTH) {
1964 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
1965 wrp_immed(nfp_prog, dst_hi, 0);
1966 } else if (size == REG_WIDTH) {
1967 wrp_mov(nfp_prog, dst_lo, src_lo);
1968 wrp_immed(nfp_prog, dst_hi, 0);
1969 } else {
1970 swreg src_hi = reg_xfer(idx + 1);
1971
1972 wrp_mov(nfp_prog, dst_lo, src_lo);
1973 wrp_mov(nfp_prog, dst_hi, src_hi);
1974 }
1975
1976 return 0;
1977}
1978
1979static int
1980mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
1981 struct nfp_insn_meta *meta, unsigned int size)
1982{
1983 u8 off = meta->insn.off - meta->pkt_cache.range_start;
1984
1985 if (IS_ALIGNED(off, REG_WIDTH))
1986 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
1987
1988 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
1989}
1990
1841static int 1991static int
1842mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1992mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1843 unsigned int size) 1993 unsigned int size)
@@ -1852,8 +2002,16 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1852 return mem_ldx_skb(nfp_prog, meta, size); 2002 return mem_ldx_skb(nfp_prog, meta, size);
1853 } 2003 }
1854 2004
1855 if (meta->ptr.type == PTR_TO_PACKET) 2005 if (meta->ptr.type == PTR_TO_PACKET) {
1856 return mem_ldx_data(nfp_prog, meta, size); 2006 if (meta->pkt_cache.range_end) {
2007 if (meta->pkt_cache.do_init)
2008 mem_ldx_data_init_pktcache(nfp_prog, meta);
2009
2010 return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2011 } else {
2012 return mem_ldx_data(nfp_prog, meta, size);
2013 }
2014 }
1857 2015
1858 if (meta->ptr.type == PTR_TO_STACK) 2016 if (meta->ptr.type == PTR_TO_STACK)
1859 return mem_ldx_stack(nfp_prog, meta, size, 2017 return mem_ldx_stack(nfp_prog, meta, size,
@@ -1982,6 +2140,111 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1982 return mem_stx(nfp_prog, meta, 8); 2140 return mem_stx(nfp_prog, meta, 8);
1983} 2141}
1984 2142
2143static int
2144mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2145{
2146 u8 dst_gpr = meta->insn.dst_reg * 2;
2147 u8 src_gpr = meta->insn.src_reg * 2;
2148 unsigned int full_add, out;
2149 swreg addra, addrb, off;
2150
2151 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2152
2153 /* We can fit 16 bits into command immediate, if we know the immediate
2154 * is guaranteed to either always or never fit into 16 bit we only
2155 * generate code to handle that particular case, otherwise generate
2156 * code for both.
2157 */
2158 out = nfp_prog_current_offset(nfp_prog);
2159 full_add = nfp_prog_current_offset(nfp_prog);
2160
2161 if (meta->insn.off) {
2162 out += 2;
2163 full_add += 2;
2164 }
2165 if (meta->xadd_maybe_16bit) {
2166 out += 3;
2167 full_add += 3;
2168 }
2169 if (meta->xadd_over_16bit)
2170 out += 2 + is64;
2171 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2172 out += 5;
2173 full_add += 5;
2174 }
2175
2176 /* Generate the branch for choosing add_imm vs add */
2177 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2178 swreg max_imm = imm_a(nfp_prog);
2179
2180 wrp_immed(nfp_prog, max_imm, 0xffff);
2181 emit_alu(nfp_prog, reg_none(),
2182 max_imm, ALU_OP_SUB, reg_b(src_gpr));
2183 emit_alu(nfp_prog, reg_none(),
2184 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
2185 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
2186 /* defer for add */
2187 }
2188
2189 /* If insn has an offset add to the address */
2190 if (!meta->insn.off) {
2191 addra = reg_a(dst_gpr);
2192 addrb = reg_b(dst_gpr + 1);
2193 } else {
2194 emit_alu(nfp_prog, imma_a(nfp_prog),
2195 reg_a(dst_gpr), ALU_OP_ADD, off);
2196 emit_alu(nfp_prog, imma_b(nfp_prog),
2197 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
2198 addra = imma_a(nfp_prog);
2199 addrb = imma_b(nfp_prog);
2200 }
2201
2202 /* Generate the add_imm if 16 bits are possible */
2203 if (meta->xadd_maybe_16bit) {
2204 swreg prev_alu = imm_a(nfp_prog);
2205
2206 wrp_immed(nfp_prog, prev_alu,
2207 FIELD_PREP(CMD_OVE_DATA, 2) |
2208 CMD_OVE_LEN |
2209 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2210 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2211 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2212 addra, addrb, 0, CMD_CTX_NO_SWAP);
2213
2214 if (meta->xadd_over_16bit)
2215 emit_br(nfp_prog, BR_UNC, out, 0);
2216 }
2217
2218 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
2219 return -EINVAL;
2220
2221 /* Generate the add if 16 bits are not guaranteed */
2222 if (meta->xadd_over_16bit) {
2223 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
2224 addra, addrb, is64 << 2,
2225 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
2226
2227 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
2228 if (is64)
2229 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
2230 }
2231
2232 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
2233 return -EINVAL;
2234
2235 return 0;
2236}
2237
2238static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2239{
2240 return mem_xadd(nfp_prog, meta, false);
2241}
2242
2243static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2244{
2245 return mem_xadd(nfp_prog, meta, true);
2246}
2247
1985static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2248static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1986{ 2249{
1987 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); 2250 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
@@ -2183,7 +2446,11 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2183 case BPF_FUNC_xdp_adjust_head: 2446 case BPF_FUNC_xdp_adjust_head:
2184 return adjust_head(nfp_prog, meta); 2447 return adjust_head(nfp_prog, meta);
2185 case BPF_FUNC_map_lookup_elem: 2448 case BPF_FUNC_map_lookup_elem:
2186 return map_lookup_stack(nfp_prog, meta); 2449 case BPF_FUNC_map_update_elem:
2450 case BPF_FUNC_map_delete_elem:
2451 return map_call_stack_common(nfp_prog, meta);
2452 case BPF_FUNC_get_prandom_u32:
2453 return nfp_get_prandom_u32(nfp_prog, meta);
2187 default: 2454 default:
2188 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2455 WARN_ONCE(1, "verifier allowed unsupported function\n");
2189 return -EOPNOTSUPP; 2456 return -EOPNOTSUPP;
@@ -2243,6 +2510,8 @@ static const instr_cb_t instr_cb[256] = {
2243 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, 2510 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
2244 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, 2511 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
2245 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, 2512 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
2513 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
2514 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
2246 [BPF_ST | BPF_MEM | BPF_B] = mem_st1, 2515 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
2247 [BPF_ST | BPF_MEM | BPF_H] = mem_st2, 2516 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
2248 [BPF_ST | BPF_MEM | BPF_W] = mem_st4, 2517 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
@@ -2821,6 +3090,120 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
2821 } 3090 }
2822} 3091}
2823 3092
3093static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
3094{
3095 struct nfp_insn_meta *meta, *range_node = NULL;
3096 s16 range_start = 0, range_end = 0;
3097 bool cache_avail = false;
3098 struct bpf_insn *insn;
3099 s32 range_ptr_off = 0;
3100 u32 range_ptr_id = 0;
3101
3102 list_for_each_entry(meta, &nfp_prog->insns, l) {
3103 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
3104 cache_avail = false;
3105
3106 if (meta->skip)
3107 continue;
3108
3109 insn = &meta->insn;
3110
3111 if (is_mbpf_store_pkt(meta) ||
3112 insn->code == (BPF_JMP | BPF_CALL) ||
3113 is_mbpf_classic_store_pkt(meta) ||
3114 is_mbpf_classic_load(meta)) {
3115 cache_avail = false;
3116 continue;
3117 }
3118
3119 if (!is_mbpf_load(meta))
3120 continue;
3121
3122 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
3123 cache_avail = false;
3124 continue;
3125 }
3126
3127 if (!cache_avail) {
3128 cache_avail = true;
3129 if (range_node)
3130 goto end_current_then_start_new;
3131 goto start_new;
3132 }
3133
3134 /* Check ID to make sure two reads share the same
3135 * variable offset against PTR_TO_PACKET, and check OFF
3136 * to make sure they also share the same constant
3137 * offset.
3138 *
3139 * OFFs don't really need to be the same, because they
3140 * are the constant offsets against PTR_TO_PACKET, so
3141 * for different OFFs, we could canonicalize them to
3142 * offsets against original packet pointer. We don't
3143 * support this.
3144 */
3145 if (meta->ptr.id == range_ptr_id &&
3146 meta->ptr.off == range_ptr_off) {
3147 s16 new_start = range_start;
3148 s16 end, off = insn->off;
3149 s16 new_end = range_end;
3150 bool changed = false;
3151
3152 if (off < range_start) {
3153 new_start = off;
3154 changed = true;
3155 }
3156
3157 end = off + BPF_LDST_BYTES(insn);
3158 if (end > range_end) {
3159 new_end = end;
3160 changed = true;
3161 }
3162
3163 if (!changed)
3164 continue;
3165
3166 if (new_end - new_start <= 64) {
3167 /* Install new range. */
3168 range_start = new_start;
3169 range_end = new_end;
3170 continue;
3171 }
3172 }
3173
3174end_current_then_start_new:
3175 range_node->pkt_cache.range_start = range_start;
3176 range_node->pkt_cache.range_end = range_end;
3177start_new:
3178 range_node = meta;
3179 range_node->pkt_cache.do_init = true;
3180 range_ptr_id = range_node->ptr.id;
3181 range_ptr_off = range_node->ptr.off;
3182 range_start = insn->off;
3183 range_end = insn->off + BPF_LDST_BYTES(insn);
3184 }
3185
3186 if (range_node) {
3187 range_node->pkt_cache.range_start = range_start;
3188 range_node->pkt_cache.range_end = range_end;
3189 }
3190
3191 list_for_each_entry(meta, &nfp_prog->insns, l) {
3192 if (meta->skip)
3193 continue;
3194
3195 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
3196 if (meta->pkt_cache.do_init) {
3197 range_start = meta->pkt_cache.range_start;
3198 range_end = meta->pkt_cache.range_end;
3199 } else {
3200 meta->pkt_cache.range_start = range_start;
3201 meta->pkt_cache.range_end = range_end;
3202 }
3203 }
3204 }
3205}
3206
2824static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) 3207static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2825{ 3208{
2826 nfp_bpf_opt_reg_init(nfp_prog); 3209 nfp_bpf_opt_reg_init(nfp_prog);
@@ -2828,6 +3211,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2828 nfp_bpf_opt_ld_mask(nfp_prog); 3211 nfp_bpf_opt_ld_mask(nfp_prog);
2829 nfp_bpf_opt_ld_shift(nfp_prog); 3212 nfp_bpf_opt_ld_shift(nfp_prog);
2830 nfp_bpf_opt_ldst_gather(nfp_prog); 3213 nfp_bpf_opt_ldst_gather(nfp_prog);
3214 nfp_bpf_opt_pkt_cache(nfp_prog);
2831 3215
2832 return 0; 3216 return 0;
2833} 3217}
@@ -2952,6 +3336,12 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2952 case BPF_FUNC_map_lookup_elem: 3336 case BPF_FUNC_map_lookup_elem:
2953 val = nfp_prog->bpf->helpers.map_lookup; 3337 val = nfp_prog->bpf->helpers.map_lookup;
2954 break; 3338 break;
3339 case BPF_FUNC_map_update_elem:
3340 val = nfp_prog->bpf->helpers.map_update;
3341 break;
3342 case BPF_FUNC_map_delete_elem:
3343 val = nfp_prog->bpf->helpers.map_delete;
3344 break;
2955 default: 3345 default:
2956 pr_err("relocation of unknown helper %d\n", 3346 pr_err("relocation of unknown helper %d\n",
2957 val); 3347 val);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 752c45763ed9..1dc424685f4e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -284,6 +284,12 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
284 case BPF_FUNC_map_lookup_elem: 284 case BPF_FUNC_map_lookup_elem:
285 bpf->helpers.map_lookup = readl(&cap->func_addr); 285 bpf->helpers.map_lookup = readl(&cap->func_addr);
286 break; 286 break;
287 case BPF_FUNC_map_update_elem:
288 bpf->helpers.map_update = readl(&cap->func_addr);
289 break;
290 case BPF_FUNC_map_delete_elem:
291 bpf->helpers.map_delete = readl(&cap->func_addr);
292 break;
287 } 293 }
288 294
289 return 0; 295 return 0;
@@ -309,6 +315,14 @@ nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
309 return 0; 315 return 0;
310} 316}
311 317
318static int
319nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value,
320 u32 length)
321{
322 bpf->pseudo_random = true;
323 return 0;
324}
325
312static int nfp_bpf_parse_capabilities(struct nfp_app *app) 326static int nfp_bpf_parse_capabilities(struct nfp_app *app)
313{ 327{
314 struct nfp_cpp *cpp = app->pf->cpp; 328 struct nfp_cpp *cpp = app->pf->cpp;
@@ -347,6 +361,10 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
347 if (nfp_bpf_parse_cap_maps(app->priv, value, length)) 361 if (nfp_bpf_parse_cap_maps(app->priv, value, length))
348 goto err_release_free; 362 goto err_release_free;
349 break; 363 break;
364 case NFP_BPF_CAP_TYPE_RANDOM:
365 if (nfp_bpf_parse_cap_random(app->priv, value, length))
366 goto err_release_free;
367 break;
350 default: 368 default:
351 nfp_dbg(cpp, "unknown BPF capability: %d\n", type); 369 nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
352 break; 370 break;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 054df3dc0698..4981c8944ca3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -72,6 +72,7 @@ enum nfp_relo_type {
72#define BR_OFF_RELO 15000 72#define BR_OFF_RELO 15000
73 73
74enum static_regs { 74enum static_regs {
75 STATIC_REG_IMMA = 20, /* Bank AB */
75 STATIC_REG_IMM = 21, /* Bank AB */ 76 STATIC_REG_IMM = 21, /* Bank AB */
76 STATIC_REG_STACK = 22, /* Bank A */ 77 STATIC_REG_STACK = 22, /* Bank A */
77 STATIC_REG_PKT_LEN = 22, /* Bank B */ 78 STATIC_REG_PKT_LEN = 22, /* Bank B */
@@ -91,6 +92,8 @@ enum pkt_vec {
91#define pptr_reg(np) pv_ctm_ptr(np) 92#define pptr_reg(np) pv_ctm_ptr(np)
92#define imm_a(np) reg_a(STATIC_REG_IMM) 93#define imm_a(np) reg_a(STATIC_REG_IMM)
93#define imm_b(np) reg_b(STATIC_REG_IMM) 94#define imm_b(np) reg_b(STATIC_REG_IMM)
95#define imma_a(np) reg_a(STATIC_REG_IMMA)
96#define imma_b(np) reg_b(STATIC_REG_IMMA)
94#define imm_both(np) reg_both(STATIC_REG_IMM) 97#define imm_both(np) reg_both(STATIC_REG_IMM)
95 98
96#define NFP_BPF_ABI_FLAGS reg_imm(0) 99#define NFP_BPF_ABI_FLAGS reg_imm(0)
@@ -128,6 +131,10 @@ enum pkt_vec {
128 * 131 *
129 * @helpers: helper addressess for various calls 132 * @helpers: helper addressess for various calls
130 * @helpers.map_lookup: map lookup helper address 133 * @helpers.map_lookup: map lookup helper address
134 * @helpers.map_update: map update helper address
135 * @helpers.map_delete: map delete helper address
136 *
137 * @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
131 */ 138 */
132struct nfp_app_bpf { 139struct nfp_app_bpf {
133 struct nfp_app *app; 140 struct nfp_app *app;
@@ -162,7 +169,18 @@ struct nfp_app_bpf {
162 169
163 struct { 170 struct {
164 u32 map_lookup; 171 u32 map_lookup;
172 u32 map_update;
173 u32 map_delete;
165 } helpers; 174 } helpers;
175
176 bool pseudo_random;
177};
178
179enum nfp_bpf_map_use {
180 NFP_MAP_UNUSED = 0,
181 NFP_MAP_USE_READ,
182 NFP_MAP_USE_WRITE,
183 NFP_MAP_USE_ATOMIC_CNT,
166}; 184};
167 185
168/** 186/**
@@ -171,12 +189,14 @@ struct nfp_app_bpf {
171 * @bpf: back pointer to bpf app private structure 189 * @bpf: back pointer to bpf app private structure
172 * @tid: table id identifying map on datapath 190 * @tid: table id identifying map on datapath
173 * @l: link on the nfp_app_bpf->map_list list 191 * @l: link on the nfp_app_bpf->map_list list
192 * @use_map: map of how the value is used (in 4B chunks)
174 */ 193 */
175struct nfp_bpf_map { 194struct nfp_bpf_map {
176 struct bpf_offloaded_map *offmap; 195 struct bpf_offloaded_map *offmap;
177 struct nfp_app_bpf *bpf; 196 struct nfp_app_bpf *bpf;
178 u32 tid; 197 u32 tid;
179 struct list_head l; 198 struct list_head l;
199 enum nfp_bpf_map_use use_map[];
180}; 200};
181 201
182struct nfp_prog; 202struct nfp_prog;
@@ -190,6 +210,16 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
190#define nfp_meta_next(meta) list_next_entry(meta, l) 210#define nfp_meta_next(meta) list_next_entry(meta, l)
191#define nfp_meta_prev(meta) list_prev_entry(meta, l) 211#define nfp_meta_prev(meta) list_prev_entry(meta, l)
192 212
213/**
214 * struct nfp_bpf_reg_state - register state for calls
215 * @reg: BPF register state from latest path
216 * @var_off: for stack arg - changes stack offset on different paths
217 */
218struct nfp_bpf_reg_state {
219 struct bpf_reg_state reg;
220 bool var_off;
221};
222
193#define FLAG_INSN_IS_JUMP_DST BIT(0) 223#define FLAG_INSN_IS_JUMP_DST BIT(0)
194 224
195/** 225/**
@@ -199,11 +229,16 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
199 * @ldst_gather_len: memcpy length gathered from load/store sequence 229 * @ldst_gather_len: memcpy length gathered from load/store sequence
200 * @paired_st: the paired store insn at the head of the sequence 230 * @paired_st: the paired store insn at the head of the sequence
201 * @ptr_not_const: pointer is not always constant 231 * @ptr_not_const: pointer is not always constant
232 * @pkt_cache: packet data cache information
233 * @pkt_cache.range_start: start offset for associated packet data cache
234 * @pkt_cache.range_end: end offset for associated packet data cache
235 * @pkt_cache.do_init: this read needs to initialize packet data cache
236 * @xadd_over_16bit: 16bit immediate is not guaranteed
237 * @xadd_maybe_16bit: 16bit immediate is possible
202 * @jmp_dst: destination info for jump instructions 238 * @jmp_dst: destination info for jump instructions
203 * @func_id: function id for call instructions 239 * @func_id: function id for call instructions
204 * @arg1: arg1 for call instructions 240 * @arg1: arg1 for call instructions
205 * @arg2: arg2 for call instructions 241 * @arg2: arg2 for call instructions
206 * @arg2_var_off: arg2 changes stack offset on different paths
207 * @off: index of first generated machine instruction (in nfp_prog.prog) 242 * @off: index of first generated machine instruction (in nfp_prog.prog)
208 * @n: eBPF instruction number 243 * @n: eBPF instruction number
209 * @flags: eBPF instruction extra optimization flags 244 * @flags: eBPF instruction extra optimization flags
@@ -214,18 +249,27 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
214struct nfp_insn_meta { 249struct nfp_insn_meta {
215 struct bpf_insn insn; 250 struct bpf_insn insn;
216 union { 251 union {
252 /* pointer ops (ld/st/xadd) */
217 struct { 253 struct {
218 struct bpf_reg_state ptr; 254 struct bpf_reg_state ptr;
219 struct bpf_insn *paired_st; 255 struct bpf_insn *paired_st;
220 s16 ldst_gather_len; 256 s16 ldst_gather_len;
221 bool ptr_not_const; 257 bool ptr_not_const;
258 struct {
259 s16 range_start;
260 s16 range_end;
261 bool do_init;
262 } pkt_cache;
263 bool xadd_over_16bit;
264 bool xadd_maybe_16bit;
222 }; 265 };
266 /* jump */
223 struct nfp_insn_meta *jmp_dst; 267 struct nfp_insn_meta *jmp_dst;
268 /* function calls */
224 struct { 269 struct {
225 u32 func_id; 270 u32 func_id;
226 struct bpf_reg_state arg1; 271 struct bpf_reg_state arg1;
227 struct bpf_reg_state arg2; 272 struct nfp_bpf_reg_state arg2;
228 bool arg2_var_off;
229 }; 273 };
230 }; 274 };
231 unsigned int off; 275 unsigned int off;
@@ -269,6 +313,41 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
269 return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM); 313 return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
270} 314}
271 315
316static inline bool is_mbpf_load_pkt(const struct nfp_insn_meta *meta)
317{
318 return is_mbpf_load(meta) && meta->ptr.type == PTR_TO_PACKET;
319}
320
321static inline bool is_mbpf_store_pkt(const struct nfp_insn_meta *meta)
322{
323 return is_mbpf_store(meta) && meta->ptr.type == PTR_TO_PACKET;
324}
325
326static inline bool is_mbpf_classic_load(const struct nfp_insn_meta *meta)
327{
328 u8 code = meta->insn.code;
329
330 return BPF_CLASS(code) == BPF_LD &&
331 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND);
332}
333
334static inline bool is_mbpf_classic_store(const struct nfp_insn_meta *meta)
335{
336 u8 code = meta->insn.code;
337
338 return BPF_CLASS(code) == BPF_ST && BPF_MODE(code) == BPF_MEM;
339}
340
341static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta)
342{
343 return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET;
344}
345
346static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
347{
348 return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
349}
350
272/** 351/**
273 * struct nfp_prog - nfp BPF program 352 * struct nfp_prog - nfp BPF program
274 * @bpf: backpointer to the bpf app priv structure 353 * @bpf: backpointer to the bpf app priv structure
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 0a7732385469..42d98792bd25 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -164,6 +164,41 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
164 return 0; 164 return 0;
165} 165}
166 166
167/* Atomic engine requires values to be in big endian, we need to byte swap
168 * the value words used with xadd.
169 */
170static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value)
171{
172 u32 *word = value;
173 unsigned int i;
174
175 for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
176 if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT)
177 word[i] = (__force u32)cpu_to_be32(word[i]);
178}
179
180static int
181nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap,
182 void *key, void *value)
183{
184 int err;
185
186 err = nfp_bpf_ctrl_lookup_entry(offmap, key, value);
187 if (err)
188 return err;
189
190 nfp_map_bpf_byte_swap(offmap->dev_priv, value);
191 return 0;
192}
193
194static int
195nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap,
196 void *key, void *value, u64 flags)
197{
198 nfp_map_bpf_byte_swap(offmap->dev_priv, value);
199 return nfp_bpf_ctrl_update_entry(offmap, key, value, flags);
200}
201
167static int 202static int
168nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap, 203nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
169 void *key, void *next_key) 204 void *key, void *next_key)
@@ -183,8 +218,8 @@ nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
183 218
184static const struct bpf_map_dev_ops nfp_bpf_map_ops = { 219static const struct bpf_map_dev_ops nfp_bpf_map_ops = {
185 .map_get_next_key = nfp_bpf_map_get_next_key, 220 .map_get_next_key = nfp_bpf_map_get_next_key,
186 .map_lookup_elem = nfp_bpf_ctrl_lookup_entry, 221 .map_lookup_elem = nfp_bpf_map_lookup_entry,
187 .map_update_elem = nfp_bpf_ctrl_update_entry, 222 .map_update_elem = nfp_bpf_map_update_entry,
188 .map_delete_elem = nfp_bpf_map_delete_elem, 223 .map_delete_elem = nfp_bpf_map_delete_elem,
189}; 224};
190 225
@@ -192,6 +227,7 @@ static int
192nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) 227nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
193{ 228{
194 struct nfp_bpf_map *nfp_map; 229 struct nfp_bpf_map *nfp_map;
230 unsigned int use_map_size;
195 long long int res; 231 long long int res;
196 232
197 if (!bpf->maps.types) 233 if (!bpf->maps.types)
@@ -226,7 +262,10 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
226 return -ENOMEM; 262 return -ENOMEM;
227 } 263 }
228 264
229 nfp_map = kzalloc(sizeof(*nfp_map), GFP_USER); 265 use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) *
266 FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]);
267
268 nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER);
230 if (!nfp_map) 269 if (!nfp_map)
231 return -ENOMEM; 270 return -ENOMEM;
232 271
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 479f602887e9..06ad53ce4ad9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -97,7 +97,7 @@ nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
97 if (nfp_prog->adjust_head_location != meta->n) 97 if (nfp_prog->adjust_head_location != meta->n)
98 goto exit_set_location; 98 goto exit_set_location;
99 99
100 if (meta->arg2.var_off.value != imm) 100 if (meta->arg2.reg.var_off.value != imm)
101 goto exit_set_location; 101 goto exit_set_location;
102 } 102 }
103 103
@@ -107,14 +107,69 @@ exit_set_location:
107} 107}
108 108
109static int 109static int
110nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
111 const struct bpf_reg_state *reg,
112 struct nfp_bpf_reg_state *old_arg)
113{
114 s64 off, old_off;
115
116 if (reg->type != PTR_TO_STACK) {
117 pr_vlog(env, "%s: unsupported ptr type %d\n",
118 fname, reg->type);
119 return false;
120 }
121 if (!tnum_is_const(reg->var_off)) {
122 pr_vlog(env, "%s: variable pointer\n", fname);
123 return false;
124 }
125
126 off = reg->var_off.value + reg->off;
127 if (-off % 4) {
128 pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off);
129 return false;
130 }
131
132 /* Rest of the checks is only if we re-parse the same insn */
133 if (!old_arg)
134 return true;
135
136 old_off = old_arg->reg.var_off.value + old_arg->reg.off;
137 old_arg->var_off |= off != old_off;
138
139 return true;
140}
141
142static bool
143nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
144 struct nfp_insn_meta *meta,
145 u32 helper_tgt, const struct bpf_reg_state *reg1)
146{
147 if (!helper_tgt) {
148 pr_vlog(env, "%s: not supported by FW\n", fname);
149 return false;
150 }
151
152 /* Rest of the checks is only if we re-parse the same insn */
153 if (!meta->func_id)
154 return true;
155
156 if (meta->arg1.map_ptr != reg1->map_ptr) {
157 pr_vlog(env, "%s: called for different map\n", fname);
158 return false;
159 }
160
161 return true;
162}
163
164static int
110nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, 165nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
111 struct nfp_insn_meta *meta) 166 struct nfp_insn_meta *meta)
112{ 167{
113 const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; 168 const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
114 const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2; 169 const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
170 const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3;
115 struct nfp_app_bpf *bpf = nfp_prog->bpf; 171 struct nfp_app_bpf *bpf = nfp_prog->bpf;
116 u32 func_id = meta->insn.imm; 172 u32 func_id = meta->insn.imm;
117 s64 off, old_off;
118 173
119 switch (func_id) { 174 switch (func_id) {
120 case BPF_FUNC_xdp_adjust_head: 175 case BPF_FUNC_xdp_adjust_head:
@@ -131,41 +186,36 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
131 break; 186 break;
132 187
133 case BPF_FUNC_map_lookup_elem: 188 case BPF_FUNC_map_lookup_elem:
134 if (!bpf->helpers.map_lookup) { 189 if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
135 pr_vlog(env, "map_lookup: not supported by FW\n"); 190 bpf->helpers.map_lookup, reg1) ||
191 !nfp_bpf_stack_arg_ok("map_lookup", env, reg2,
192 meta->func_id ? &meta->arg2 : NULL))
136 return -EOPNOTSUPP; 193 return -EOPNOTSUPP;
137 } 194 break;
138 if (reg2->type != PTR_TO_STACK) { 195
139 pr_vlog(env, 196 case BPF_FUNC_map_update_elem:
140 "map_lookup: unsupported key ptr type %d\n", 197 if (!nfp_bpf_map_call_ok("map_update", env, meta,
141 reg2->type); 198 bpf->helpers.map_update, reg1) ||
142 return -EOPNOTSUPP; 199 !nfp_bpf_stack_arg_ok("map_update", env, reg2,
143 } 200 meta->func_id ? &meta->arg2 : NULL) ||
144 if (!tnum_is_const(reg2->var_off)) { 201 !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
145 pr_vlog(env, "map_lookup: variable key pointer\n");
146 return -EOPNOTSUPP; 202 return -EOPNOTSUPP;
147 } 203 break;
148 204
149 off = reg2->var_off.value + reg2->off; 205 case BPF_FUNC_map_delete_elem:
150 if (-off % 4) { 206 if (!nfp_bpf_map_call_ok("map_delete", env, meta,
151 pr_vlog(env, 207 bpf->helpers.map_delete, reg1) ||
152 "map_lookup: unaligned stack pointer %lld\n", 208 !nfp_bpf_stack_arg_ok("map_delete", env, reg2,
153 -off); 209 meta->func_id ? &meta->arg2 : NULL))
154 return -EOPNOTSUPP; 210 return -EOPNOTSUPP;
155 } 211 break;
156 212
157 /* Rest of the checks is only if we re-parse the same insn */ 213 case BPF_FUNC_get_prandom_u32:
158 if (!meta->func_id) 214 if (bpf->pseudo_random)
159 break; 215 break;
216 pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
217 return -EOPNOTSUPP;
160 218
161 old_off = meta->arg2.var_off.value + meta->arg2.off;
162 meta->arg2_var_off |= off != old_off;
163
164 if (meta->arg1.map_ptr != reg1->map_ptr) {
165 pr_vlog(env, "map_lookup: called for different map\n");
166 return -EOPNOTSUPP;
167 }
168 break;
169 default: 219 default:
170 pr_vlog(env, "unsupported function id: %d\n", func_id); 220 pr_vlog(env, "unsupported function id: %d\n", func_id);
171 return -EOPNOTSUPP; 221 return -EOPNOTSUPP;
@@ -173,7 +223,7 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
173 223
174 meta->func_id = func_id; 224 meta->func_id = func_id;
175 meta->arg1 = *reg1; 225 meta->arg1 = *reg1;
176 meta->arg2 = *reg2; 226 meta->arg2.reg = *reg2;
177 227
178 return 0; 228 return 0;
179} 229}
@@ -242,6 +292,72 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
242 return -EINVAL; 292 return -EINVAL;
243} 293}
244 294
295static const char *nfp_bpf_map_use_name(enum nfp_bpf_map_use use)
296{
297 static const char * const names[] = {
298 [NFP_MAP_UNUSED] = "unused",
299 [NFP_MAP_USE_READ] = "read",
300 [NFP_MAP_USE_WRITE] = "write",
301 [NFP_MAP_USE_ATOMIC_CNT] = "atomic",
302 };
303
304 if (use >= ARRAY_SIZE(names) || !names[use])
305 return "unknown";
306 return names[use];
307}
308
309static int
310nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env,
311 struct nfp_bpf_map *nfp_map,
312 unsigned int off, enum nfp_bpf_map_use use)
313{
314 if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED &&
315 nfp_map->use_map[off / 4] != use) {
316 pr_vlog(env, "map value use type conflict %s vs %s off: %u\n",
317 nfp_bpf_map_use_name(nfp_map->use_map[off / 4]),
318 nfp_bpf_map_use_name(use), off);
319 return -EOPNOTSUPP;
320 }
321
322 nfp_map->use_map[off / 4] = use;
323
324 return 0;
325}
326
327static int
328nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta,
329 const struct bpf_reg_state *reg,
330 enum nfp_bpf_map_use use)
331{
332 struct bpf_offloaded_map *offmap;
333 struct nfp_bpf_map *nfp_map;
334 unsigned int size, off;
335 int i, err;
336
337 if (!tnum_is_const(reg->var_off)) {
338 pr_vlog(env, "map value offset is variable\n");
339 return -EOPNOTSUPP;
340 }
341
342 off = reg->var_off.value + meta->insn.off + reg->off;
343 size = BPF_LDST_BYTES(&meta->insn);
344 offmap = map_to_offmap(reg->map_ptr);
345 nfp_map = offmap->dev_priv;
346
347 if (off + size > offmap->map.value_size) {
348 pr_vlog(env, "map value access out-of-bounds\n");
349 return -EINVAL;
350 }
351
352 for (i = 0; i < size; i += 4 - (off + i) % 4) {
353 err = nfp_bpf_map_mark_used_one(env, nfp_map, off + i, use);
354 if (err)
355 return err;
356 }
357
358 return 0;
359}
360
245static int 361static int
246nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 362nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
247 struct bpf_verifier_env *env, u8 reg_no) 363 struct bpf_verifier_env *env, u8 reg_no)
@@ -264,10 +380,22 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
264 } 380 }
265 381
266 if (reg->type == PTR_TO_MAP_VALUE) { 382 if (reg->type == PTR_TO_MAP_VALUE) {
383 if (is_mbpf_load(meta)) {
384 err = nfp_bpf_map_mark_used(env, meta, reg,
385 NFP_MAP_USE_READ);
386 if (err)
387 return err;
388 }
267 if (is_mbpf_store(meta)) { 389 if (is_mbpf_store(meta)) {
268 pr_vlog(env, "map writes not supported\n"); 390 pr_vlog(env, "map writes not supported\n");
269 return -EOPNOTSUPP; 391 return -EOPNOTSUPP;
270 } 392 }
393 if (is_mbpf_xadd(meta)) {
394 err = nfp_bpf_map_mark_used(env, meta, reg,
395 NFP_MAP_USE_ATOMIC_CNT);
396 if (err)
397 return err;
398 }
271 } 399 }
272 400
273 if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { 401 if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
@@ -282,6 +410,31 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
282} 410}
283 411
284static int 412static int
413nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
414 struct bpf_verifier_env *env)
415{
416 const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg;
417 const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg;
418
419 if (dreg->type != PTR_TO_MAP_VALUE) {
420 pr_vlog(env, "atomic add not to a map value pointer: %d\n",
421 dreg->type);
422 return -EOPNOTSUPP;
423 }
424 if (sreg->type != SCALAR_VALUE) {
425 pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
426 return -EOPNOTSUPP;
427 }
428
429 meta->xadd_over_16bit |=
430 sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
431 meta->xadd_maybe_16bit |=
432 (sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
433
434 return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
435}
436
437static int
285nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) 438nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
286{ 439{
287 struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; 440 struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
@@ -313,6 +466,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
313 if (is_mbpf_store(meta)) 466 if (is_mbpf_store(meta))
314 return nfp_bpf_check_ptr(nfp_prog, meta, env, 467 return nfp_bpf_check_ptr(nfp_prog, meta, env,
315 meta->insn.dst_reg); 468 meta->insn.dst_reg);
469 if (is_mbpf_xadd(meta))
470 return nfp_bpf_check_xadd(nfp_prog, meta, env);
316 471
317 return 0; 472 return 0;
318} 473}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 1e597600c693..cc6ace2be8a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,8 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
48 [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, 48 [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c },
49 [CMD_TGT_READ_LE] = { 0x01, 0x40 }, 49 [CMD_TGT_READ_LE] = { 0x01, 0x40 },
50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, 50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
51 [CMD_TGT_ADD] = { 0x00, 0x47 },
52 [CMD_TGT_ADD_IMM] = { 0x02, 0x47 },
51}; 53};
52 54
53static bool unreg_is_imm(u16 reg) 55static bool unreg_is_imm(u16 reg)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 5f9291db98e0..5f2b2f24f4fa 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -39,6 +39,7 @@
39#include <linux/types.h> 39#include <linux/types.h>
40 40
41#define REG_NONE 0 41#define REG_NONE 0
42#define REG_WIDTH 4
42 43
43#define RE_REG_NO_DST 0x020 44#define RE_REG_NO_DST 0x020
44#define RE_REG_IMM 0x020 45#define RE_REG_IMM 0x020
@@ -237,6 +238,8 @@ enum cmd_tgt_map {
237 CMD_TGT_READ32_SWAP, 238 CMD_TGT_READ32_SWAP,
238 CMD_TGT_READ_LE, 239 CMD_TGT_READ_LE,
239 CMD_TGT_READ_SWAP_LE, 240 CMD_TGT_READ_SWAP_LE,
241 CMD_TGT_ADD,
242 CMD_TGT_ADD_IMM,
240 __CMD_TGT_MAP_SIZE, 243 __CMD_TGT_MAP_SIZE,
241}; 244};
242 245
@@ -250,9 +253,12 @@ enum cmd_mode {
250 253
251enum cmd_ctx_swap { 254enum cmd_ctx_swap {
252 CMD_CTX_SWAP = 0, 255 CMD_CTX_SWAP = 0,
256 CMD_CTX_SWAP_DEFER1 = 1,
257 CMD_CTX_SWAP_DEFER2 = 2,
253 CMD_CTX_NO_SWAP = 3, 258 CMD_CTX_NO_SWAP = 3,
254}; 259};
255 260
261#define CMD_OVE_DATA GENMASK(5, 3)
256#define CMD_OVE_LEN BIT(7) 262#define CMD_OVE_LEN BIT(7)
257#define CMD_OV_LEN GENMASK(12, 8) 263#define CMD_OV_LEN GENMASK(12, 8)
258 264
@@ -278,6 +284,7 @@ enum lcsr_wr_src {
278#define NFP_CSR_ACT_LM_ADDR1 0x6c 284#define NFP_CSR_ACT_LM_ADDR1 0x6c
279#define NFP_CSR_ACT_LM_ADDR2 0x94 285#define NFP_CSR_ACT_LM_ADDR2 0x94
280#define NFP_CSR_ACT_LM_ADDR3 0x9c 286#define NFP_CSR_ACT_LM_ADDR3 0x9c
287#define NFP_CSR_PSEUDO_RND_NUM 0x148
281 288
282/* Software register representation, independent of operand type */ 289/* Software register representation, independent of operand type */
283#define NN_REG_TYPE GENMASK(31, 24) 290#define NN_REG_TYPE GENMASK(31, 24)
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index d11d72615de2..e68254e12764 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv)
1651 priv->status, table.valid); 1651 priv->status, table.valid);
1652 } 1652 }
1653 1653
1654 trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low, 1654 trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver);
1655 table.data1, table.data2, table.line,
1656 table.blink2, table.ilink1, table.ilink2,
1657 table.bcon_time, table.gp1, table.gp2,
1658 table.gp3, table.ucode_ver, table.hw_ver,
1659 0, table.brd_ver);
1660 IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id, 1655 IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id,
1661 desc_lookup(table.error_id)); 1656 desc_lookup(table.error_id));
1662 IWL_ERR(priv, "0x%08X | uPc\n", table.pc); 1657 IWL_ERR(priv, "0x%08X | uPc\n", table.pc);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
index 9518a82f44c2..27e3e4e96aa2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
@@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx,
126 __entry->framelen, __entry->skbaddr) 126 __entry->framelen, __entry->skbaddr)
127); 127);
128 128
129struct iwl_error_event_table;
129TRACE_EVENT(iwlwifi_dev_ucode_error, 130TRACE_EVENT(iwlwifi_dev_ucode_error,
130 TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low, 131 TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table,
131 u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1, 132 u32 hw_ver, u32 brd_ver),
132 u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type, 133 TP_ARGS(dev, table, hw_ver, brd_ver),
133 u32 major, u32 minor, u32 hw_ver, u32 brd_ver),
134 TP_ARGS(dev, desc, tsf_low, data1, data2, line,
135 blink2, ilink1, ilink2, bcon_time, gp1, gp2,
136 rev_type, major, minor, hw_ver, brd_ver),
137 TP_STRUCT__entry( 134 TP_STRUCT__entry(
138 DEV_ENTRY 135 DEV_ENTRY
139 __field(u32, desc) 136 __field(u32, desc)
@@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error,
155 ), 152 ),
156 TP_fast_assign( 153 TP_fast_assign(
157 DEV_ASSIGN; 154 DEV_ASSIGN;
158 __entry->desc = desc; 155 __entry->desc = table->error_id;
159 __entry->tsf_low = tsf_low; 156 __entry->tsf_low = table->tsf_low;
160 __entry->data1 = data1; 157 __entry->data1 = table->data1;
161 __entry->data2 = data2; 158 __entry->data2 = table->data2;
162 __entry->line = line; 159 __entry->line = table->line;
163 __entry->blink2 = blink2; 160 __entry->blink2 = table->blink2;
164 __entry->ilink1 = ilink1; 161 __entry->ilink1 = table->ilink1;
165 __entry->ilink2 = ilink2; 162 __entry->ilink2 = table->ilink2;
166 __entry->bcon_time = bcon_time; 163 __entry->bcon_time = table->bcon_time;
167 __entry->gp1 = gp1; 164 __entry->gp1 = table->gp1;
168 __entry->gp2 = gp2; 165 __entry->gp2 = table->gp2;
169 __entry->rev_type = rev_type; 166 __entry->rev_type = table->gp3;
170 __entry->major = major; 167 __entry->major = table->ucode_ver;
171 __entry->minor = minor; 168 __entry->minor = table->hw_ver;
172 __entry->hw_ver = hw_ver; 169 __entry->hw_ver = hw_ver;
173 __entry->brd_ver = brd_ver; 170 __entry->brd_ver = brd_ver;
174 ), 171 ),
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
index 50510fb6ab8c..6aa719865a58 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
@@ -30,6 +30,7 @@
30#ifndef __CHECKER__ 30#ifndef __CHECKER__
31#include "iwl-trans.h" 31#include "iwl-trans.h"
32 32
33#include "dvm/commands.h"
33#define CREATE_TRACE_POINTS 34#define CREATE_TRACE_POINTS
34#include "iwl-devtrace.h" 35#include "iwl-devtrace.h"
35 36
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index bebcfb44c8c2..d99d9ea78e4c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -549,12 +549,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
549 549
550 IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version); 550 IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
551 551
552 trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low, 552 trace_iwlwifi_dev_ucode_error(trans->dev, &table, table.hw_ver, table.brd_ver);
553 table.data1, table.data2, table.data3,
554 table.blink2, table.ilink1,
555 table.ilink2, table.bcon_time, table.gp1,
556 table.gp2, table.fw_rev_type, table.major,
557 table.minor, table.hw_ver, table.brd_ver);
558 IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id, 553 IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id,
559 desc_lookup(table.error_id)); 554 desc_lookup(table.error_id));
560 IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0); 555 IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);
diff --git a/drivers/net/wireless/mediatek/mt7601u/trace.h b/drivers/net/wireless/mediatek/mt7601u/trace.h
index 289897300ef0..82c8898b9076 100644
--- a/drivers/net/wireless/mediatek/mt7601u/trace.h
+++ b/drivers/net/wireless/mediatek/mt7601u/trace.h
@@ -34,7 +34,7 @@
34#define REG_PR_FMT "%04x=%08x" 34#define REG_PR_FMT "%04x=%08x"
35#define REG_PR_ARG __entry->reg, __entry->val 35#define REG_PR_ARG __entry->reg, __entry->val
36 36
37DECLARE_EVENT_CLASS(dev_reg_evt, 37DECLARE_EVENT_CLASS(dev_reg_evtu,
38 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), 38 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
39 TP_ARGS(dev, reg, val), 39 TP_ARGS(dev, reg, val),
40 TP_STRUCT__entry( 40 TP_STRUCT__entry(
@@ -51,12 +51,12 @@ DECLARE_EVENT_CLASS(dev_reg_evt,
51 ) 51 )
52); 52);
53 53
54DEFINE_EVENT(dev_reg_evt, reg_read, 54DEFINE_EVENT(dev_reg_evtu, reg_read,
55 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), 55 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
56 TP_ARGS(dev, reg, val) 56 TP_ARGS(dev, reg, val)
57); 57);
58 58
59DEFINE_EVENT(dev_reg_evt, reg_write, 59DEFINE_EVENT(dev_reg_evtu, reg_write,
60 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val), 60 TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
61 TP_ARGS(dev, reg, val) 61 TP_ARGS(dev, reg, val)
62); 62);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1ab0e520d6fc..8add3493a202 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -178,6 +178,15 @@
178#define TRACE_SYSCALLS() 178#define TRACE_SYSCALLS()
179#endif 179#endif
180 180
181#ifdef CONFIG_BPF_EVENTS
182#define BPF_RAW_TP() STRUCT_ALIGN(); \
183 VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \
184 KEEP(*(__bpf_raw_tp_map)) \
185 VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
186#else
187#define BPF_RAW_TP()
188#endif
189
181#ifdef CONFIG_SERIAL_EARLYCON 190#ifdef CONFIG_SERIAL_EARLYCON
182#define EARLYCON_TABLE() STRUCT_ALIGN(); \ 191#define EARLYCON_TABLE() STRUCT_ALIGN(); \
183 VMLINUX_SYMBOL(__earlycon_table) = .; \ 192 VMLINUX_SYMBOL(__earlycon_table) = .; \
@@ -249,6 +258,7 @@
249 LIKELY_PROFILE() \ 258 LIKELY_PROFILE() \
250 BRANCH_PROFILE() \ 259 BRANCH_PROFILE() \
251 TRACE_PRINTKS() \ 260 TRACE_PRINTKS() \
261 BPF_RAW_TP() \
252 TRACEPOINT_STR() 262 TRACEPOINT_STR()
253 263
254/* 264/*
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8a4566691c8f..30d15e64b993 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
6#include <uapi/linux/bpf.h> 6#include <uapi/linux/bpf.h>
7 7
8struct sock; 8struct sock;
9struct sockaddr;
9struct cgroup; 10struct cgroup;
10struct sk_buff; 11struct sk_buff;
11struct bpf_sock_ops_kern; 12struct bpf_sock_ops_kern;
@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
63int __cgroup_bpf_run_filter_sk(struct sock *sk, 64int __cgroup_bpf_run_filter_sk(struct sock *sk,
64 enum bpf_attach_type type); 65 enum bpf_attach_type type);
65 66
67int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
68 struct sockaddr *uaddr,
69 enum bpf_attach_type type);
70
66int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 71int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
67 struct bpf_sock_ops_kern *sock_ops, 72 struct bpf_sock_ops_kern *sock_ops,
68 enum bpf_attach_type type); 73 enum bpf_attach_type type);
@@ -93,16 +98,64 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
93 __ret; \ 98 __ret; \
94}) 99})
95 100
96#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ 101#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
97({ \ 102({ \
98 int __ret = 0; \ 103 int __ret = 0; \
99 if (cgroup_bpf_enabled) { \ 104 if (cgroup_bpf_enabled) { \
100 __ret = __cgroup_bpf_run_filter_sk(sk, \ 105 __ret = __cgroup_bpf_run_filter_sk(sk, type); \
101 BPF_CGROUP_INET_SOCK_CREATE); \ 106 } \
107 __ret; \
108})
109
110#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
111 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
112
113#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
114 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
115
116#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
117 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
118
119#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
120({ \
121 int __ret = 0; \
122 if (cgroup_bpf_enabled) \
123 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
124 __ret; \
125})
126
127#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
128({ \
129 int __ret = 0; \
130 if (cgroup_bpf_enabled) { \
131 lock_sock(sk); \
132 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
133 release_sock(sk); \
102 } \ 134 } \
103 __ret; \ 135 __ret; \
104}) 136})
105 137
138#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
139 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
140
141#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
142 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
143
144#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
145 sk->sk_prot->pre_connect)
146
147#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
148 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
149
150#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
151 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
152
153#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
154 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
155
156#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
157 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
158
106#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ 159#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
107({ \ 160({ \
108 int __ret = 0; \ 161 int __ret = 0; \
@@ -132,9 +185,18 @@ struct cgroup_bpf {};
132static inline void cgroup_bpf_put(struct cgroup *cgrp) {} 185static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
133static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 186static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
134 187
188#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
135#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) 189#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
136#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) 190#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
137#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) 191#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
192#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
193#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
194#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
195#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
196#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
197#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
198#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
199#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
138#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) 200#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
139#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) 201#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
140 202
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..95a7abd0ee92 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -208,12 +208,15 @@ struct bpf_prog_ops {
208 208
209struct bpf_verifier_ops { 209struct bpf_verifier_ops {
210 /* return eBPF function prototype for verification */ 210 /* return eBPF function prototype for verification */
211 const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); 211 const struct bpf_func_proto *
212 (*get_func_proto)(enum bpf_func_id func_id,
213 const struct bpf_prog *prog);
212 214
213 /* return true if 'size' wide access at offset 'off' within bpf_context 215 /* return true if 'size' wide access at offset 'off' within bpf_context
214 * with 'type' (read or write) is allowed 216 * with 'type' (read or write) is allowed
215 */ 217 */
216 bool (*is_valid_access)(int off, int size, enum bpf_access_type type, 218 bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
219 const struct bpf_prog *prog,
217 struct bpf_insn_access_aux *info); 220 struct bpf_insn_access_aux *info);
218 int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, 221 int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
219 const struct bpf_prog *prog); 222 const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5e2e8a49fb21..2b28fcf6f6ae 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
8BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) 8BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
9BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) 9BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
10BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) 10BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
11BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
11BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) 12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) 13BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
13BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) 14BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
@@ -19,6 +20,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
19BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) 20BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
20BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) 21BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
21BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) 22BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
23BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
22#endif 24#endif
23#ifdef CONFIG_CGROUP_BPF 25#ifdef CONFIG_CGROUP_BPF
24BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) 26BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6b66cd1aa0b9..7e61c395fddf 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -153,7 +153,7 @@ struct bpf_insn_aux_data {
153 153
154#define BPF_VERIFIER_TMP_LOG_SIZE 1024 154#define BPF_VERIFIER_TMP_LOG_SIZE 1024
155 155
156struct bpf_verifer_log { 156struct bpf_verifier_log {
157 u32 level; 157 u32 level;
158 char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; 158 char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
159 char __user *ubuf; 159 char __user *ubuf;
@@ -161,11 +161,16 @@ struct bpf_verifer_log {
161 u32 len_total; 161 u32 len_total;
162}; 162};
163 163
164static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log) 164static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
165{ 165{
166 return log->len_used >= log->len_total - 1; 166 return log->len_used >= log->len_total - 1;
167} 167}
168 168
169static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
170{
171 return log->level && log->ubuf && !bpf_verifier_log_full(log);
172}
173
169#define BPF_MAX_SUBPROGS 256 174#define BPF_MAX_SUBPROGS 256
170 175
171/* single container for all structs 176/* single container for all structs
@@ -185,13 +190,15 @@ struct bpf_verifier_env {
185 bool allow_ptr_leaks; 190 bool allow_ptr_leaks;
186 bool seen_direct_write; 191 bool seen_direct_write;
187 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ 192 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
188 struct bpf_verifer_log log; 193 struct bpf_verifier_log log;
189 u32 subprog_starts[BPF_MAX_SUBPROGS]; 194 u32 subprog_starts[BPF_MAX_SUBPROGS];
190 /* computes the stack depth of each bpf function */ 195 /* computes the stack depth of each bpf function */
191 u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; 196 u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
192 u32 subprog_cnt; 197 u32 subprog_cnt;
193}; 198};
194 199
200void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
201 va_list args);
195__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, 202__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
196 const char *fmt, ...); 203 const char *fmt, ...);
197 204
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 109d05ccea9a..fc4e8f91b03d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -372,7 +372,7 @@ struct xdp_rxq_info;
372 372
373#define BPF_LDST_BYTES(insn) \ 373#define BPF_LDST_BYTES(insn) \
374 ({ \ 374 ({ \
375 const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \ 375 const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
376 WARN_ON(__size < 0); \ 376 WARN_ON(__size < 0); \
377 __size; \ 377 __size; \
378 }) 378 })
@@ -469,6 +469,7 @@ struct bpf_prog {
469 is_func:1, /* program is a bpf function */ 469 is_func:1, /* program is a bpf function */
470 kprobe_override:1; /* Do we override a kprobe? */ 470 kprobe_override:1; /* Do we override a kprobe? */
471 enum bpf_prog_type type; /* Type of BPF program */ 471 enum bpf_prog_type type; /* Type of BPF program */
472 enum bpf_attach_type expected_attach_type; /* For some prog types */
472 u32 len; /* Number of filter blocks */ 473 u32 len; /* Number of filter blocks */
473 u32 jited_len; /* Size of jited insns in bytes */ 474 u32 jited_len; /* Size of jited insns in bytes */
474 u8 tag[BPF_TAG_SIZE]; 475 u8 tag[BPF_TAG_SIZE];
@@ -521,6 +522,8 @@ struct sk_msg_buff {
521 __u32 key; 522 __u32 key;
522 __u32 flags; 523 __u32 flags;
523 struct bpf_map *map; 524 struct bpf_map *map;
525 struct sk_buff *skb;
526 struct list_head list;
524}; 527};
525 528
526/* Compute the linear packet data range [data, data_end) which 529/* Compute the linear packet data range [data, data_end) which
@@ -1018,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
1018 return SKF_AD_MAX; 1021 return SKF_AD_MAX;
1019} 1022}
1020 1023
1024struct bpf_sock_addr_kern {
1025 struct sock *sk;
1026 struct sockaddr *uaddr;
1027 /* Temporary "register" to make indirect stores to nested structures
1028 * defined above. We need three registers to make such a store, but
1029 * only two (src and dst) are available at convert_ctx_access time
1030 */
1031 u64 tmp_reg;
1032};
1033
1021struct bpf_sock_ops_kern { 1034struct bpf_sock_ops_kern {
1022 struct sock *sk; 1035 struct sock *sk;
1023 u32 op; 1036 u32 op;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fd291503576..293fa0677fba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
919#define swap(a, b) \ 919#define swap(a, b) \
920 do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) 920 do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
921 921
922/* This counts to 12. Any more, it will return 13th argument. */
923#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
924#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
925
926#define __CONCAT(a, b) a ## b
927#define CONCATENATE(a, b) __CONCAT(a, b)
928
922/** 929/**
923 * container_of - cast a member of a structure out to the containing structure 930 * container_of - cast a member of a structure out to the containing structure
924 * @ptr: the pointer to the member. 931 * @ptr: the pointer to the member.
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 22b2131bcdcd..aa5d4eb725f5 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -248,6 +248,24 @@ static inline void *sg_virt(struct scatterlist *sg)
248 return page_address(sg_page(sg)) + sg->offset; 248 return page_address(sg_page(sg)) + sg->offset;
249} 249}
250 250
251/**
252 * sg_init_marker - Initialize markers in sg table
253 * @sgl: The SG table
254 * @nents: Number of entries in table
255 *
256 **/
257static inline void sg_init_marker(struct scatterlist *sgl,
258 unsigned int nents)
259{
260#ifdef CONFIG_DEBUG_SG
261 unsigned int i;
262
263 for (i = 0; i < nents; i++)
264 sgl[i].sg_magic = SG_MAGIC;
265#endif
266 sg_mark_end(&sgl[nents - 1]);
267}
268
251int sg_nents(struct scatterlist *sg); 269int sg_nents(struct scatterlist *sg);
252int sg_nents_for_len(struct scatterlist *sg, u64 len); 270int sg_nents_for_len(struct scatterlist *sg, u64 len);
253struct scatterlist *sg_next(struct scatterlist *); 271struct scatterlist *sg_next(struct scatterlist *);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8a1442c4e513..b0357cd198b0 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
468int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); 468int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
469void perf_event_detach_bpf_prog(struct perf_event *event); 469void perf_event_detach_bpf_prog(struct perf_event *event);
470int perf_event_query_prog_array(struct perf_event *event, void __user *info); 470int perf_event_query_prog_array(struct perf_event *event, void __user *info);
471int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
472int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
473struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
471#else 474#else
472static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 475static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
473{ 476{
@@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
487{ 490{
488 return -EOPNOTSUPP; 491 return -EOPNOTSUPP;
489} 492}
493static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
494{
495 return -EOPNOTSUPP;
496}
497static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
498{
499 return -EOPNOTSUPP;
500}
501static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
502{
503 return NULL;
504}
490#endif 505#endif
491 506
492enum { 507enum {
@@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
546void perf_trace_buf_update(void *record, u16 type); 561void perf_trace_buf_update(void *record, u16 type);
547void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); 562void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
548 563
564void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
565void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
566void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
567 u64 arg3);
568void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
569 u64 arg3, u64 arg4);
570void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
571 u64 arg3, u64 arg4, u64 arg5);
572void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
573 u64 arg3, u64 arg4, u64 arg5, u64 arg6);
574void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
575 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
576void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
577 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
578 u64 arg8);
579void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
580 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
581 u64 arg8, u64 arg9);
582void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
583 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
584 u64 arg8, u64 arg9, u64 arg10);
585void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
586 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
587 u64 arg8, u64 arg9, u64 arg10, u64 arg11);
588void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
589 u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
590 u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
549void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, 591void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
550 struct trace_event_call *call, u64 count, 592 struct trace_event_call *call, u64 count,
551 struct pt_regs *regs, struct hlist_head *head, 593 struct pt_regs *regs, struct hlist_head *head,
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 64ed7064f1fa..22c5a46e9693 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,4 +35,10 @@ struct tracepoint {
35 struct tracepoint_func __rcu *funcs; 35 struct tracepoint_func __rcu *funcs;
36}; 36};
37 37
38struct bpf_raw_event_map {
39 struct tracepoint *tp;
40 void *bpf_func;
41 u32 num_args;
42} __aligned(32);
43
38#endif 44#endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 132e5b95167a..378d601258be 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -231,6 +231,13 @@ struct ipv6_stub {
231}; 231};
232extern const struct ipv6_stub *ipv6_stub __read_mostly; 232extern const struct ipv6_stub *ipv6_stub __read_mostly;
233 233
234/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
235struct ipv6_bpf_stub {
236 int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
237 bool force_bind_address_no_port, bool with_lock);
238};
239extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
240
234/* 241/*
235 * identify MLD packets for MLD filter exceptions 242 * identify MLD packets for MLD filter exceptions
236 */ 243 */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 500f81375200..384b90c62c0b 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
32int inet_listen(struct socket *sock, int backlog); 32int inet_listen(struct socket *sock, int backlog);
33void inet_sock_destruct(struct sock *sk); 33void inet_sock_destruct(struct sock *sk);
34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
35int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
36 bool force_bind_address_no_port, bool with_lock);
35int inet_getname(struct socket *sock, struct sockaddr *uaddr, 37int inet_getname(struct socket *sock, struct sockaddr *uaddr,
36 int peer); 38 int peer);
37int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 39int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 37455e840347..9b6e7f51b1d4 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1043,6 +1043,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
1043void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); 1043void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
1044 1044
1045int inet6_release(struct socket *sock); 1045int inet6_release(struct socket *sock);
1046int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
1047 bool force_bind_address_no_port, bool with_lock);
1046int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 1048int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
1047int inet6_getname(struct socket *sock, struct sockaddr *uaddr, 1049int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
1048 int peer); 1050 int peer);
diff --git a/include/net/sock.h b/include/net/sock.h
index 709311132d4c..49bd2c1796b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1026struct proto { 1026struct proto {
1027 void (*close)(struct sock *sk, 1027 void (*close)(struct sock *sk,
1028 long timeout); 1028 long timeout);
1029 int (*pre_connect)(struct sock *sk,
1030 struct sockaddr *uaddr,
1031 int addr_len);
1029 int (*connect)(struct sock *sk, 1032 int (*connect)(struct sock *sk,
1030 struct sockaddr *uaddr, 1033 struct sockaddr *uaddr,
1031 int addr_len); 1034 int addr_len);
@@ -1085,6 +1088,7 @@ struct proto {
1085#endif 1088#endif
1086 1089
1087 bool (*stream_memory_free)(const struct sock *sk); 1090 bool (*stream_memory_free)(const struct sock *sk);
1091 bool (*stream_memory_read)(const struct sock *sk);
1088 /* Memory pressure */ 1092 /* Memory pressure */
1089 void (*enter_memory_pressure)(struct sock *sk); 1093 void (*enter_memory_pressure)(struct sock *sk);
1090 void (*leave_memory_pressure)(struct sock *sk); 1094 void (*leave_memory_pressure)(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index 850a8e581cce..0676b272f6ac 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
273int udp_rcv(struct sk_buff *skb); 273int udp_rcv(struct sk_buff *skb);
274int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); 274int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
275int udp_init_sock(struct sock *sk); 275int udp_init_sock(struct sock *sk);
276int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
276int __udp_disconnect(struct sock *sk, int flags); 277int __udp_disconnect(struct sock *sk, int flags);
277int udp_disconnect(struct sock *sk, int flags); 278int udp_disconnect(struct sock *sk, int flags);
278__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); 279__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
new file mode 100644
index 000000000000..505dae0bed80
--- /dev/null
+++ b/include/trace/bpf_probe.h
@@ -0,0 +1,92 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#undef TRACE_SYSTEM_VAR
4
5#ifdef CONFIG_BPF_EVENTS
6
7#undef __entry
8#define __entry entry
9
10#undef __get_dynamic_array
11#define __get_dynamic_array(field) \
12 ((void *)__entry + (__entry->__data_loc_##field & 0xffff))
13
14#undef __get_dynamic_array_len
15#define __get_dynamic_array_len(field) \
16 ((__entry->__data_loc_##field >> 16) & 0xffff)
17
18#undef __get_str
19#define __get_str(field) ((char *)__get_dynamic_array(field))
20
21#undef __get_bitmask
22#define __get_bitmask(field) (char *)__get_dynamic_array(field)
23
24#undef __perf_count
25#define __perf_count(c) (c)
26
27#undef __perf_task
28#define __perf_task(t) (t)
29
30/* cast any integer, pointer, or small struct to u64 */
31#define UINTTYPE(size) \
32 __typeof__(__builtin_choose_expr(size == 1, (u8)1, \
33 __builtin_choose_expr(size == 2, (u16)2, \
34 __builtin_choose_expr(size == 4, (u32)3, \
35 __builtin_choose_expr(size == 8, (u64)4, \
36 (void)5)))))
37#define __CAST_TO_U64(x) ({ \
38 typeof(x) __src = (x); \
39 UINTTYPE(sizeof(x)) __dst; \
40 memcpy(&__dst, &__src, sizeof(__dst)); \
41 (u64)__dst; })
42
43#define __CAST1(a,...) __CAST_TO_U64(a)
44#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
45#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
46#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
47#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
48#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
49#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
50#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
51#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
52#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
53#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
54#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
55/* tracepoints with more than 12 arguments will hit build error */
56#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
57
58#undef DECLARE_EVENT_CLASS
59#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
60static notrace void \
61__bpf_trace_##call(void *__data, proto) \
62{ \
63 struct bpf_prog *prog = __data; \
64 CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \
65}
66
67/*
68 * This part is compiled out, it is only here as a build time check
69 * to make sure that if the tracepoint handling changes, the
70 * bpf probe will fail to compile unless it too is updated.
71 */
72#undef DEFINE_EVENT
73#define DEFINE_EVENT(template, call, proto, args) \
74static inline void bpf_test_probe_##call(void) \
75{ \
76 check_trace_callback_type_##call(__bpf_trace_##template); \
77} \
78static struct bpf_raw_event_map __used \
79 __attribute__((section("__bpf_raw_tp_map"))) \
80__bpf_trace_tp_map_##call = { \
81 .tp = &__tracepoint_##call, \
82 .bpf_func = (void *)__bpf_trace_##template, \
83 .num_args = COUNT_ARGS(args), \
84};
85
86
87#undef DEFINE_EVENT_PRINT
88#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
89 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
90
91#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
92#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index d9e3d4aa3f6e..cb30c5532144 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -95,6 +95,7 @@
95#ifdef TRACEPOINTS_ENABLED 95#ifdef TRACEPOINTS_ENABLED
96#include <trace/trace_events.h> 96#include <trace/trace_events.h>
97#include <trace/perf.h> 97#include <trace/perf.h>
98#include <trace/bpf_probe.h>
98#endif 99#endif
99 100
100#undef TRACE_EVENT 101#undef TRACE_EVENT
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 06c87f9f720c..795698925d20 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
491 491
492TRACE_EVENT(f2fs_truncate_partial_nodes, 492TRACE_EVENT(f2fs_truncate_partial_nodes,
493 493
494 TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), 494 TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err),
495 495
496 TP_ARGS(inode, nid, depth, err), 496 TP_ARGS(inode, nid, depth, err),
497 497
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..c5ec89732a8d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
94 BPF_MAP_GET_FD_BY_ID, 94 BPF_MAP_GET_FD_BY_ID,
95 BPF_OBJ_GET_INFO_BY_FD, 95 BPF_OBJ_GET_INFO_BY_FD,
96 BPF_PROG_QUERY, 96 BPF_PROG_QUERY,
97 BPF_RAW_TRACEPOINT_OPEN,
97}; 98};
98 99
99enum bpf_map_type { 100enum bpf_map_type {
@@ -134,6 +135,8 @@ enum bpf_prog_type {
134 BPF_PROG_TYPE_SK_SKB, 135 BPF_PROG_TYPE_SK_SKB,
135 BPF_PROG_TYPE_CGROUP_DEVICE, 136 BPF_PROG_TYPE_CGROUP_DEVICE,
136 BPF_PROG_TYPE_SK_MSG, 137 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
137}; 140};
138 141
139enum bpf_attach_type { 142enum bpf_attach_type {
@@ -145,6 +148,12 @@ enum bpf_attach_type {
145 BPF_SK_SKB_STREAM_VERDICT, 148 BPF_SK_SKB_STREAM_VERDICT,
146 BPF_CGROUP_DEVICE, 149 BPF_CGROUP_DEVICE,
147 BPF_SK_MSG_VERDICT, 150 BPF_SK_MSG_VERDICT,
151 BPF_CGROUP_INET4_BIND,
152 BPF_CGROUP_INET6_BIND,
153 BPF_CGROUP_INET4_CONNECT,
154 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND,
148 __MAX_BPF_ATTACH_TYPE 157 __MAX_BPF_ATTACH_TYPE
149}; 158};
150 159
@@ -294,6 +303,11 @@ union bpf_attr {
294 __u32 prog_flags; 303 __u32 prog_flags;
295 char prog_name[BPF_OBJ_NAME_LEN]; 304 char prog_name[BPF_OBJ_NAME_LEN];
296 __u32 prog_ifindex; /* ifindex of netdev to prep for */ 305 __u32 prog_ifindex; /* ifindex of netdev to prep for */
306 /* For some prog types expected attach type must be known at
307 * load time to verify attach type specific parts of prog
308 * (context accesses, allowed helpers, etc).
309 */
310 __u32 expected_attach_type;
297 }; 311 };
298 312
299 struct { /* anonymous struct used by BPF_OBJ_* commands */ 313 struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -344,6 +358,11 @@ union bpf_attr {
344 __aligned_u64 prog_ids; 358 __aligned_u64 prog_ids;
345 __u32 prog_cnt; 359 __u32 prog_cnt;
346 } query; 360 } query;
361
362 struct {
363 __u64 name;
364 __u32 prog_fd;
365 } raw_tracepoint;
347} __attribute__((aligned(8))); 366} __attribute__((aligned(8)));
348 367
349/* BPF helper function descriptions: 368/* BPF helper function descriptions:
@@ -729,6 +748,13 @@ union bpf_attr {
729 * @flags: reserved for future use 748 * @flags: reserved for future use
730 * Return: SK_PASS 749 * Return: SK_PASS
731 * 750 *
751 * int bpf_bind(ctx, addr, addr_len)
752 * Bind socket to address. Only binding to IP is supported, no port can be
753 * set in addr.
754 * @ctx: pointer to context of type bpf_sock_addr
755 * @addr: pointer to struct sockaddr to bind socket to
756 * @addr_len: length of sockaddr structure
757 * Return: 0 on success or negative error code
732 */ 758 */
733#define __BPF_FUNC_MAPPER(FN) \ 759#define __BPF_FUNC_MAPPER(FN) \
734 FN(unspec), \ 760 FN(unspec), \
@@ -794,7 +820,8 @@ union bpf_attr {
794 FN(msg_redirect_map), \ 820 FN(msg_redirect_map), \
795 FN(msg_apply_bytes), \ 821 FN(msg_apply_bytes), \
796 FN(msg_cork_bytes), \ 822 FN(msg_cork_bytes), \
797 FN(msg_pull_data), 823 FN(msg_pull_data), \
824 FN(bind),
798 825
799/* integer value in 'imm' field of BPF_CALL instruction selects which helper 826/* integer value in 'imm' field of BPF_CALL instruction selects which helper
800 * function eBPF program intends to call 827 * function eBPF program intends to call
@@ -923,6 +950,15 @@ struct bpf_sock {
923 __u32 protocol; 950 __u32 protocol;
924 __u32 mark; 951 __u32 mark;
925 __u32 priority; 952 __u32 priority;
953 __u32 src_ip4; /* Allows 1,2,4-byte read.
954 * Stored in network byte order.
955 */
956 __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
957 * Stored in network byte order.
958 */
959 __u32 src_port; /* Allows 4-byte read.
960 * Stored in host byte order
961 */
926}; 962};
927 963
928#define XDP_PACKET_HEADROOM 256 964#define XDP_PACKET_HEADROOM 256
@@ -998,6 +1034,26 @@ struct bpf_map_info {
998 __u64 netns_ino; 1034 __u64 netns_ino;
999} __attribute__((aligned(8))); 1035} __attribute__((aligned(8)));
1000 1036
1037/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
1038 * by user and intended to be used by socket (e.g. to bind to, depends on
1039 * attach attach type).
1040 */
1041struct bpf_sock_addr {
1042 __u32 user_family; /* Allows 4-byte read, but no write. */
1043 __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
1044 * Stored in network byte order.
1045 */
1046 __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
1047 * Stored in network byte order.
1048 */
1049 __u32 user_port; /* Allows 4-byte read and write.
1050 * Stored in network byte order
1051 */
1052 __u32 family; /* Allows 4-byte read, but no write */
1053 __u32 type; /* Allows 4-byte read, but no write */
1054 __u32 protocol; /* Allows 4-byte read, but no write */
1055};
1056
1001/* User bpf_sock_ops struct to access socket values and specify request ops 1057/* User bpf_sock_ops struct to access socket values and specify request ops
1002 * and their replies. 1058 * and their replies.
1003 * Some of this fields are in network (bigendian) byte order and may need 1059 * Some of this fields are in network (bigendian) byte order and may need
@@ -1152,4 +1208,8 @@ struct bpf_cgroup_dev_ctx {
1152 __u32 minor; 1208 __u32 minor;
1153}; 1209};
1154 1210
1211struct bpf_raw_tracepoint_args {
1212 __u64 args[0];
1213};
1214
1155#endif /* _UAPI__LINUX_BPF_H__ */ 1215#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c1c0b60d3f2f..43171a0bb02b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -495,6 +495,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
495EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 495EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
496 496
497/** 497/**
498 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
499 * provided by user sockaddr
500 * @sk: sock struct that will use sockaddr
501 * @uaddr: sockaddr struct provided by user
502 * @type: The type of program to be exectuted
503 *
504 * socket is expected to be of type INET or INET6.
505 *
506 * This function will return %-EPERM if an attached program is found and
507 * returned value != 1 during execution. In all other cases, 0 is returned.
508 */
509int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
510 struct sockaddr *uaddr,
511 enum bpf_attach_type type)
512{
513 struct bpf_sock_addr_kern ctx = {
514 .sk = sk,
515 .uaddr = uaddr,
516 };
517 struct cgroup *cgrp;
518 int ret;
519
520 /* Check socket family since not all sockets represent network
521 * endpoint (e.g. AF_UNIX).
522 */
523 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
524 return 0;
525
526 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
527 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
528
529 return ret == 1 ? 0 : -EPERM;
530}
531EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
532
533/**
498 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 534 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
499 * @sk: socket to get cgroup from 535 * @sk: socket to get cgroup from
500 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 536 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
545EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); 581EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
546 582
547static const struct bpf_func_proto * 583static const struct bpf_func_proto *
548cgroup_dev_func_proto(enum bpf_func_id func_id) 584cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
549{ 585{
550 switch (func_id) { 586 switch (func_id) {
551 case BPF_FUNC_map_lookup_elem: 587 case BPF_FUNC_map_lookup_elem:
@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
566 602
567static bool cgroup_dev_is_valid_access(int off, int size, 603static bool cgroup_dev_is_valid_access(int off, int size,
568 enum bpf_access_type type, 604 enum bpf_access_type type,
605 const struct bpf_prog *prog,
569 struct bpf_insn_access_aux *info) 606 struct bpf_insn_access_aux *info)
570{ 607{
571 const int size_default = sizeof(__u32); 608 const int size_default = sizeof(__u32);
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 8740406df2cd..d6b76377cb6e 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -113,16 +113,16 @@ static const char *const bpf_jmp_string[16] = {
113}; 113};
114 114
115static void print_bpf_end_insn(bpf_insn_print_t verbose, 115static void print_bpf_end_insn(bpf_insn_print_t verbose,
116 struct bpf_verifier_env *env, 116 void *private_data,
117 const struct bpf_insn *insn) 117 const struct bpf_insn *insn)
118{ 118{
119 verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, 119 verbose(private_data, "(%02x) r%d = %s%d r%d\n",
120 insn->code, insn->dst_reg,
120 BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", 121 BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
121 insn->imm, insn->dst_reg); 122 insn->imm, insn->dst_reg);
122} 123}
123 124
124void print_bpf_insn(const struct bpf_insn_cbs *cbs, 125void print_bpf_insn(const struct bpf_insn_cbs *cbs,
125 struct bpf_verifier_env *env,
126 const struct bpf_insn *insn, 126 const struct bpf_insn *insn,
127 bool allow_ptr_leaks) 127 bool allow_ptr_leaks)
128{ 128{
@@ -132,23 +132,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
132 if (class == BPF_ALU || class == BPF_ALU64) { 132 if (class == BPF_ALU || class == BPF_ALU64) {
133 if (BPF_OP(insn->code) == BPF_END) { 133 if (BPF_OP(insn->code) == BPF_END) {
134 if (class == BPF_ALU64) 134 if (class == BPF_ALU64)
135 verbose(env, "BUG_alu64_%02x\n", insn->code); 135 verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
136 else 136 else
137 print_bpf_end_insn(verbose, env, insn); 137 print_bpf_end_insn(verbose, cbs->private_data, insn);
138 } else if (BPF_OP(insn->code) == BPF_NEG) { 138 } else if (BPF_OP(insn->code) == BPF_NEG) {
139 verbose(env, "(%02x) r%d = %s-r%d\n", 139 verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
140 insn->code, insn->dst_reg, 140 insn->code, insn->dst_reg,
141 class == BPF_ALU ? "(u32) " : "", 141 class == BPF_ALU ? "(u32) " : "",
142 insn->dst_reg); 142 insn->dst_reg);
143 } else if (BPF_SRC(insn->code) == BPF_X) { 143 } else if (BPF_SRC(insn->code) == BPF_X) {
144 verbose(env, "(%02x) %sr%d %s %sr%d\n", 144 verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
145 insn->code, class == BPF_ALU ? "(u32) " : "", 145 insn->code, class == BPF_ALU ? "(u32) " : "",
146 insn->dst_reg, 146 insn->dst_reg,
147 bpf_alu_string[BPF_OP(insn->code) >> 4], 147 bpf_alu_string[BPF_OP(insn->code) >> 4],
148 class == BPF_ALU ? "(u32) " : "", 148 class == BPF_ALU ? "(u32) " : "",
149 insn->src_reg); 149 insn->src_reg);
150 } else { 150 } else {
151 verbose(env, "(%02x) %sr%d %s %s%d\n", 151 verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
152 insn->code, class == BPF_ALU ? "(u32) " : "", 152 insn->code, class == BPF_ALU ? "(u32) " : "",
153 insn->dst_reg, 153 insn->dst_reg,
154 bpf_alu_string[BPF_OP(insn->code) >> 4], 154 bpf_alu_string[BPF_OP(insn->code) >> 4],
@@ -157,46 +157,46 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
157 } 157 }
158 } else if (class == BPF_STX) { 158 } else if (class == BPF_STX) {
159 if (BPF_MODE(insn->code) == BPF_MEM) 159 if (BPF_MODE(insn->code) == BPF_MEM)
160 verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n", 160 verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
161 insn->code, 161 insn->code,
162 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 162 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
163 insn->dst_reg, 163 insn->dst_reg,
164 insn->off, insn->src_reg); 164 insn->off, insn->src_reg);
165 else if (BPF_MODE(insn->code) == BPF_XADD) 165 else if (BPF_MODE(insn->code) == BPF_XADD)
166 verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", 166 verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
167 insn->code, 167 insn->code,
168 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 168 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
169 insn->dst_reg, insn->off, 169 insn->dst_reg, insn->off,
170 insn->src_reg); 170 insn->src_reg);
171 else 171 else
172 verbose(env, "BUG_%02x\n", insn->code); 172 verbose(cbs->private_data, "BUG_%02x\n", insn->code);
173 } else if (class == BPF_ST) { 173 } else if (class == BPF_ST) {
174 if (BPF_MODE(insn->code) != BPF_MEM) { 174 if (BPF_MODE(insn->code) != BPF_MEM) {
175 verbose(env, "BUG_st_%02x\n", insn->code); 175 verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
176 return; 176 return;
177 } 177 }
178 verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n", 178 verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
179 insn->code, 179 insn->code,
180 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 180 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
181 insn->dst_reg, 181 insn->dst_reg,
182 insn->off, insn->imm); 182 insn->off, insn->imm);
183 } else if (class == BPF_LDX) { 183 } else if (class == BPF_LDX) {
184 if (BPF_MODE(insn->code) != BPF_MEM) { 184 if (BPF_MODE(insn->code) != BPF_MEM) {
185 verbose(env, "BUG_ldx_%02x\n", insn->code); 185 verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
186 return; 186 return;
187 } 187 }
188 verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n", 188 verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
189 insn->code, insn->dst_reg, 189 insn->code, insn->dst_reg,
190 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 190 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
191 insn->src_reg, insn->off); 191 insn->src_reg, insn->off);
192 } else if (class == BPF_LD) { 192 } else if (class == BPF_LD) {
193 if (BPF_MODE(insn->code) == BPF_ABS) { 193 if (BPF_MODE(insn->code) == BPF_ABS) {
194 verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n", 194 verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
195 insn->code, 195 insn->code,
196 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 196 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
197 insn->imm); 197 insn->imm);
198 } else if (BPF_MODE(insn->code) == BPF_IND) { 198 } else if (BPF_MODE(insn->code) == BPF_IND) {
199 verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n", 199 verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
200 insn->code, 200 insn->code,
201 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 201 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
202 insn->src_reg, insn->imm); 202 insn->src_reg, insn->imm);
@@ -212,12 +212,12 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
212 if (map_ptr && !allow_ptr_leaks) 212 if (map_ptr && !allow_ptr_leaks)
213 imm = 0; 213 imm = 0;
214 214
215 verbose(env, "(%02x) r%d = %s\n", 215 verbose(cbs->private_data, "(%02x) r%d = %s\n",
216 insn->code, insn->dst_reg, 216 insn->code, insn->dst_reg,
217 __func_imm_name(cbs, insn, imm, 217 __func_imm_name(cbs, insn, imm,
218 tmp, sizeof(tmp))); 218 tmp, sizeof(tmp)));
219 } else { 219 } else {
220 verbose(env, "BUG_ld_%02x\n", insn->code); 220 verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
221 return; 221 return;
222 } 222 }
223 } else if (class == BPF_JMP) { 223 } else if (class == BPF_JMP) {
@@ -227,35 +227,35 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
227 char tmp[64]; 227 char tmp[64];
228 228
229 if (insn->src_reg == BPF_PSEUDO_CALL) { 229 if (insn->src_reg == BPF_PSEUDO_CALL) {
230 verbose(env, "(%02x) call pc%s\n", 230 verbose(cbs->private_data, "(%02x) call pc%s\n",
231 insn->code, 231 insn->code,
232 __func_get_name(cbs, insn, 232 __func_get_name(cbs, insn,
233 tmp, sizeof(tmp))); 233 tmp, sizeof(tmp)));
234 } else { 234 } else {
235 strcpy(tmp, "unknown"); 235 strcpy(tmp, "unknown");
236 verbose(env, "(%02x) call %s#%d\n", insn->code, 236 verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
237 __func_get_name(cbs, insn, 237 __func_get_name(cbs, insn,
238 tmp, sizeof(tmp)), 238 tmp, sizeof(tmp)),
239 insn->imm); 239 insn->imm);
240 } 240 }
241 } else if (insn->code == (BPF_JMP | BPF_JA)) { 241 } else if (insn->code == (BPF_JMP | BPF_JA)) {
242 verbose(env, "(%02x) goto pc%+d\n", 242 verbose(cbs->private_data, "(%02x) goto pc%+d\n",
243 insn->code, insn->off); 243 insn->code, insn->off);
244 } else if (insn->code == (BPF_JMP | BPF_EXIT)) { 244 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
245 verbose(env, "(%02x) exit\n", insn->code); 245 verbose(cbs->private_data, "(%02x) exit\n", insn->code);
246 } else if (BPF_SRC(insn->code) == BPF_X) { 246 } else if (BPF_SRC(insn->code) == BPF_X) {
247 verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n", 247 verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
248 insn->code, insn->dst_reg, 248 insn->code, insn->dst_reg,
249 bpf_jmp_string[BPF_OP(insn->code) >> 4], 249 bpf_jmp_string[BPF_OP(insn->code) >> 4],
250 insn->src_reg, insn->off); 250 insn->src_reg, insn->off);
251 } else { 251 } else {
252 verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n", 252 verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
253 insn->code, insn->dst_reg, 253 insn->code, insn->dst_reg,
254 bpf_jmp_string[BPF_OP(insn->code) >> 4], 254 bpf_jmp_string[BPF_OP(insn->code) >> 4],
255 insn->imm, insn->off); 255 insn->imm, insn->off);
256 } 256 }
257 } else { 257 } else {
258 verbose(env, "(%02x) %s\n", 258 verbose(cbs->private_data, "(%02x) %s\n",
259 insn->code, bpf_class_string[class]); 259 insn->code, bpf_class_string[class]);
260 } 260 }
261} 261}
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index 266fe8ee542b..e1324a834a24 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -22,14 +22,12 @@
22#include <string.h> 22#include <string.h>
23#endif 23#endif
24 24
25struct bpf_verifier_env;
26
27extern const char *const bpf_alu_string[16]; 25extern const char *const bpf_alu_string[16];
28extern const char *const bpf_class_string[8]; 26extern const char *const bpf_class_string[8];
29 27
30const char *func_id_name(int id); 28const char *func_id_name(int id);
31 29
32typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env, 30typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
33 const char *, ...); 31 const char *, ...);
34typedef const char *(*bpf_insn_revmap_call_t)(void *private_data, 32typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
35 const struct bpf_insn *insn); 33 const struct bpf_insn *insn);
@@ -45,7 +43,6 @@ struct bpf_insn_cbs {
45}; 43};
46 44
47void print_bpf_insn(const struct bpf_insn_cbs *cbs, 45void print_bpf_insn(const struct bpf_insn_cbs *cbs,
48 struct bpf_verifier_env *env,
49 const struct bpf_insn *insn, 46 const struct bpf_insn *insn,
50 bool allow_ptr_leaks); 47 bool allow_ptr_leaks);
51#endif 48#endif
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 69c5bccabd22..d2bda5aa25d7 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,8 @@
41#include <linux/mm.h> 41#include <linux/mm.h>
42#include <net/strparser.h> 42#include <net/strparser.h>
43#include <net/tcp.h> 43#include <net/tcp.h>
44#include <linux/ptr_ring.h>
45#include <net/inet_common.h>
44 46
45#define SOCK_CREATE_FLAG_MASK \ 47#define SOCK_CREATE_FLAG_MASK \
46 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 48 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -82,6 +84,7 @@ struct smap_psock {
82 int sg_size; 84 int sg_size;
83 int eval; 85 int eval;
84 struct sk_msg_buff *cork; 86 struct sk_msg_buff *cork;
87 struct list_head ingress;
85 88
86 struct strparser strp; 89 struct strparser strp;
87 struct bpf_prog *bpf_tx_msg; 90 struct bpf_prog *bpf_tx_msg;
@@ -103,6 +106,8 @@ struct smap_psock {
103}; 106};
104 107
105static void smap_release_sock(struct smap_psock *psock, struct sock *sock); 108static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
109static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
110 int nonblock, int flags, int *addr_len);
106static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); 111static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
107static int bpf_tcp_sendpage(struct sock *sk, struct page *page, 112static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
108 int offset, size_t size, int flags); 113 int offset, size_t size, int flags);
@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
112 return rcu_dereference_sk_user_data(sk); 117 return rcu_dereference_sk_user_data(sk);
113} 118}
114 119
120static bool bpf_tcp_stream_read(const struct sock *sk)
121{
122 struct smap_psock *psock;
123 bool empty = true;
124
125 rcu_read_lock();
126 psock = smap_psock_sk(sk);
127 if (unlikely(!psock))
128 goto out;
129 empty = list_empty(&psock->ingress);
130out:
131 rcu_read_unlock();
132 return !empty;
133}
134
115static struct proto tcp_bpf_proto; 135static struct proto tcp_bpf_proto;
116static int bpf_tcp_init(struct sock *sk) 136static int bpf_tcp_init(struct sock *sk)
117{ 137{
@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk)
135 if (psock->bpf_tx_msg) { 155 if (psock->bpf_tx_msg) {
136 tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; 156 tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
137 tcp_bpf_proto.sendpage = bpf_tcp_sendpage; 157 tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
158 tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
159 tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
138 } 160 }
139 161
140 sk->sk_prot = &tcp_bpf_proto; 162 sk->sk_prot = &tcp_bpf_proto;
@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
170{ 192{
171 void (*close_fun)(struct sock *sk, long timeout); 193 void (*close_fun)(struct sock *sk, long timeout);
172 struct smap_psock_map_entry *e, *tmp; 194 struct smap_psock_map_entry *e, *tmp;
195 struct sk_msg_buff *md, *mtmp;
173 struct smap_psock *psock; 196 struct smap_psock *psock;
174 struct sock *osk; 197 struct sock *osk;
175 198
@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
188 close_fun = psock->save_close; 211 close_fun = psock->save_close;
189 212
190 write_lock_bh(&sk->sk_callback_lock); 213 write_lock_bh(&sk->sk_callback_lock);
214 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
215 list_del(&md->list);
216 free_start_sg(psock->sock, md);
217 kfree(md);
218 }
219
191 list_for_each_entry_safe(e, tmp, &psock->maps, list) { 220 list_for_each_entry_safe(e, tmp, &psock->maps, list) {
192 osk = cmpxchg(e->entry, sk, NULL); 221 osk = cmpxchg(e->entry, sk, NULL);
193 if (osk == sk) { 222 if (osk == sk) {
@@ -312,7 +341,7 @@ retry:
312 md->sg_start++; 341 md->sg_start++;
313 if (md->sg_start == MAX_SKB_FRAGS) 342 if (md->sg_start == MAX_SKB_FRAGS)
314 md->sg_start = 0; 343 md->sg_start = 0;
315 memset(sg, 0, sizeof(*sg)); 344 sg_init_table(sg, 1);
316 345
317 if (md->sg_start == md->sg_end) 346 if (md->sg_start == md->sg_end)
318 break; 347 break;
@@ -468,6 +497,72 @@ verdict:
468 return _rc; 497 return _rc;
469} 498}
470 499
500static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
501 struct smap_psock *psock,
502 struct sk_msg_buff *md, int flags)
503{
504 bool apply = apply_bytes;
505 size_t size, copied = 0;
506 struct sk_msg_buff *r;
507 int err = 0, i;
508
509 r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
510 if (unlikely(!r))
511 return -ENOMEM;
512
513 lock_sock(sk);
514 r->sg_start = md->sg_start;
515 i = md->sg_start;
516
517 do {
518 r->sg_data[i] = md->sg_data[i];
519
520 size = (apply && apply_bytes < md->sg_data[i].length) ?
521 apply_bytes : md->sg_data[i].length;
522
523 if (!sk_wmem_schedule(sk, size)) {
524 if (!copied)
525 err = -ENOMEM;
526 break;
527 }
528
529 sk_mem_charge(sk, size);
530 r->sg_data[i].length = size;
531 md->sg_data[i].length -= size;
532 md->sg_data[i].offset += size;
533 copied += size;
534
535 if (md->sg_data[i].length) {
536 get_page(sg_page(&r->sg_data[i]));
537 r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
538 } else {
539 i++;
540 if (i == MAX_SKB_FRAGS)
541 i = 0;
542 r->sg_end = i;
543 }
544
545 if (apply) {
546 apply_bytes -= size;
547 if (!apply_bytes)
548 break;
549 }
550 } while (i != md->sg_end);
551
552 md->sg_start = i;
553
554 if (!err) {
555 list_add_tail(&r->list, &psock->ingress);
556 sk->sk_data_ready(sk);
557 } else {
558 free_start_sg(sk, r);
559 kfree(r);
560 }
561
562 release_sock(sk);
563 return err;
564}
565
471static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, 566static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
472 struct sk_msg_buff *md, 567 struct sk_msg_buff *md,
473 int flags) 568 int flags)
@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
475 struct smap_psock *psock; 570 struct smap_psock *psock;
476 struct scatterlist *sg; 571 struct scatterlist *sg;
477 int i, err, free = 0; 572 int i, err, free = 0;
573 bool ingress = !!(md->flags & BPF_F_INGRESS);
478 574
479 sg = md->sg_data; 575 sg = md->sg_data;
480 576
@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
487 goto out_rcu; 583 goto out_rcu;
488 584
489 rcu_read_unlock(); 585 rcu_read_unlock();
490 lock_sock(sk); 586
491 err = bpf_tcp_push(sk, send, md, flags, false); 587 if (ingress) {
492 release_sock(sk); 588 err = bpf_tcp_ingress(sk, send, psock, md, flags);
589 } else {
590 lock_sock(sk);
591 err = bpf_tcp_push(sk, send, md, flags, false);
592 release_sock(sk);
593 }
493 smap_release_sock(psock, sk); 594 smap_release_sock(psock, sk);
494 if (unlikely(err)) 595 if (unlikely(err))
495 goto out; 596 goto out;
@@ -623,6 +724,92 @@ out_err:
623 return err; 724 return err;
624} 725}
625 726
727static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
728 int nonblock, int flags, int *addr_len)
729{
730 struct iov_iter *iter = &msg->msg_iter;
731 struct smap_psock *psock;
732 int copied = 0;
733
734 if (unlikely(flags & MSG_ERRQUEUE))
735 return inet_recv_error(sk, msg, len, addr_len);
736
737 rcu_read_lock();
738 psock = smap_psock_sk(sk);
739 if (unlikely(!psock))
740 goto out;
741
742 if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
743 goto out;
744 rcu_read_unlock();
745
746 if (!skb_queue_empty(&sk->sk_receive_queue))
747 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
748
749 lock_sock(sk);
750 while (copied != len) {
751 struct scatterlist *sg;
752 struct sk_msg_buff *md;
753 int i;
754
755 md = list_first_entry_or_null(&psock->ingress,
756 struct sk_msg_buff, list);
757 if (unlikely(!md))
758 break;
759 i = md->sg_start;
760 do {
761 struct page *page;
762 int n, copy;
763
764 sg = &md->sg_data[i];
765 copy = sg->length;
766 page = sg_page(sg);
767
768 if (copied + copy > len)
769 copy = len - copied;
770
771 n = copy_page_to_iter(page, sg->offset, copy, iter);
772 if (n != copy) {
773 md->sg_start = i;
774 release_sock(sk);
775 smap_release_sock(psock, sk);
776 return -EFAULT;
777 }
778
779 copied += copy;
780 sg->offset += copy;
781 sg->length -= copy;
782 sk_mem_uncharge(sk, copy);
783
784 if (!sg->length) {
785 i++;
786 if (i == MAX_SKB_FRAGS)
787 i = 0;
788 if (!md->skb)
789 put_page(page);
790 }
791 if (copied == len)
792 break;
793 } while (i != md->sg_end);
794 md->sg_start = i;
795
796 if (!sg->length && md->sg_start == md->sg_end) {
797 list_del(&md->list);
798 if (md->skb)
799 consume_skb(md->skb);
800 kfree(md);
801 }
802 }
803
804 release_sock(sk);
805 smap_release_sock(psock, sk);
806 return copied;
807out:
808 rcu_read_unlock();
809 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
810}
811
812
626static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 813static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
627{ 814{
628 int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS; 815 int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
@@ -656,7 +843,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
656 } 843 }
657 844
658 sg = md.sg_data; 845 sg = md.sg_data;
659 sg_init_table(sg, MAX_SKB_FRAGS); 846 sg_init_marker(sg, MAX_SKB_FRAGS);
660 rcu_read_unlock(); 847 rcu_read_unlock();
661 848
662 lock_sock(sk); 849 lock_sock(sk);
@@ -763,10 +950,14 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
763 950
764 lock_sock(sk); 951 lock_sock(sk);
765 952
766 if (psock->cork_bytes) 953 if (psock->cork_bytes) {
767 m = psock->cork; 954 m = psock->cork;
768 else 955 sg = &m->sg_data[m->sg_end];
956 } else {
769 m = &md; 957 m = &md;
958 sg = m->sg_data;
959 sg_init_marker(sg, MAX_SKB_FRAGS);
960 }
770 961
771 /* Catch case where ring is full and sendpage is stalled. */ 962 /* Catch case where ring is full and sendpage is stalled. */
772 if (unlikely(m->sg_end == m->sg_start && 963 if (unlikely(m->sg_end == m->sg_start &&
@@ -774,7 +965,6 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
774 goto out_err; 965 goto out_err;
775 966
776 psock->sg_size += size; 967 psock->sg_size += size;
777 sg = &m->sg_data[m->sg_end];
778 sg_set_page(sg, page, size, offset); 968 sg_set_page(sg, page, size, offset);
779 get_page(page); 969 get_page(page);
780 m->sg_copy[m->sg_end] = true; 970 m->sg_copy[m->sg_end] = true;
@@ -861,27 +1051,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
861 __SK_DROP; 1051 __SK_DROP;
862} 1052}
863 1053
1054static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
1055{
1056 struct sock *sk = psock->sock;
1057 int copied = 0, num_sg;
1058 struct sk_msg_buff *r;
1059
1060 r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
1061 if (unlikely(!r))
1062 return -EAGAIN;
1063
1064 if (!sk_rmem_schedule(sk, skb, skb->len)) {
1065 kfree(r);
1066 return -EAGAIN;
1067 }
1068
1069 sg_init_table(r->sg_data, MAX_SKB_FRAGS);
1070 num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
1071 if (unlikely(num_sg < 0)) {
1072 kfree(r);
1073 return num_sg;
1074 }
1075 sk_mem_charge(sk, skb->len);
1076 copied = skb->len;
1077 r->sg_start = 0;
1078 r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
1079 r->skb = skb;
1080 list_add_tail(&r->list, &psock->ingress);
1081 sk->sk_data_ready(sk);
1082 return copied;
1083}
1084
864static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) 1085static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
865{ 1086{
1087 struct smap_psock *peer;
866 struct sock *sk; 1088 struct sock *sk;
1089 __u32 in;
867 int rc; 1090 int rc;
868 1091
869 rc = smap_verdict_func(psock, skb); 1092 rc = smap_verdict_func(psock, skb);
870 switch (rc) { 1093 switch (rc) {
871 case __SK_REDIRECT: 1094 case __SK_REDIRECT:
872 sk = do_sk_redirect_map(skb); 1095 sk = do_sk_redirect_map(skb);
873 if (likely(sk)) { 1096 if (!sk) {
874 struct smap_psock *peer = smap_psock_sk(sk); 1097 kfree_skb(skb);
875 1098 break;
876 if (likely(peer && 1099 }
877 test_bit(SMAP_TX_RUNNING, &peer->state) && 1100
878 !sock_flag(sk, SOCK_DEAD) && 1101 peer = smap_psock_sk(sk);
879 sock_writeable(sk))) { 1102 in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
880 skb_set_owner_w(skb, sk); 1103
881 skb_queue_tail(&peer->rxqueue, skb); 1104 if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
882 schedule_work(&peer->tx_work); 1105 !test_bit(SMAP_TX_RUNNING, &peer->state))) {
883 break; 1106 kfree_skb(skb);
884 } 1107 break;
1108 }
1109
1110 if (!in && sock_writeable(sk)) {
1111 skb_set_owner_w(skb, sk);
1112 skb_queue_tail(&peer->rxqueue, skb);
1113 schedule_work(&peer->tx_work);
1114 break;
1115 } else if (in &&
1116 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
1117 skb_queue_tail(&peer->rxqueue, skb);
1118 schedule_work(&peer->tx_work);
1119 break;
885 } 1120 }
886 /* Fall through and free skb otherwise */ 1121 /* Fall through and free skb otherwise */
887 case __SK_DROP: 1122 case __SK_DROP:
@@ -943,15 +1178,23 @@ static void smap_tx_work(struct work_struct *w)
943 } 1178 }
944 1179
945 while ((skb = skb_dequeue(&psock->rxqueue))) { 1180 while ((skb = skb_dequeue(&psock->rxqueue))) {
1181 __u32 flags;
1182
946 rem = skb->len; 1183 rem = skb->len;
947 off = 0; 1184 off = 0;
948start: 1185start:
1186 flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
949 do { 1187 do {
950 if (likely(psock->sock->sk_socket)) 1188 if (likely(psock->sock->sk_socket)) {
951 n = skb_send_sock_locked(psock->sock, 1189 if (flags)
952 skb, off, rem); 1190 n = smap_do_ingress(psock, skb);
953 else 1191 else
1192 n = skb_send_sock_locked(psock->sock,
1193 skb, off, rem);
1194 } else {
954 n = -EINVAL; 1195 n = -EINVAL;
1196 }
1197
955 if (n <= 0) { 1198 if (n <= 0) {
956 if (n == -EAGAIN) { 1199 if (n == -EAGAIN) {
957 /* Retry when space is available */ 1200 /* Retry when space is available */
@@ -969,7 +1212,9 @@ start:
969 rem -= n; 1212 rem -= n;
970 off += n; 1213 off += n;
971 } while (rem); 1214 } while (rem);
972 kfree_skb(skb); 1215
1216 if (!flags)
1217 kfree_skb(skb);
973 } 1218 }
974out: 1219out:
975 release_sock(psock->sock); 1220 release_sock(psock->sock);
@@ -1107,6 +1352,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
1107static void smap_gc_work(struct work_struct *w) 1352static void smap_gc_work(struct work_struct *w)
1108{ 1353{
1109 struct smap_psock_map_entry *e, *tmp; 1354 struct smap_psock_map_entry *e, *tmp;
1355 struct sk_msg_buff *md, *mtmp;
1110 struct smap_psock *psock; 1356 struct smap_psock *psock;
1111 1357
1112 psock = container_of(w, struct smap_psock, gc_work); 1358 psock = container_of(w, struct smap_psock, gc_work);
@@ -1131,6 +1377,12 @@ static void smap_gc_work(struct work_struct *w)
1131 kfree(psock->cork); 1377 kfree(psock->cork);
1132 } 1378 }
1133 1379
1380 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
1381 list_del(&md->list);
1382 free_start_sg(psock->sock, md);
1383 kfree(md);
1384 }
1385
1134 list_for_each_entry_safe(e, tmp, &psock->maps, list) { 1386 list_for_each_entry_safe(e, tmp, &psock->maps, list) {
1135 list_del(&e->list); 1387 list_del(&e->list);
1136 kfree(e); 1388 kfree(e);
@@ -1160,6 +1412,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
1160 INIT_WORK(&psock->tx_work, smap_tx_work); 1412 INIT_WORK(&psock->tx_work, smap_tx_work);
1161 INIT_WORK(&psock->gc_work, smap_gc_work); 1413 INIT_WORK(&psock->gc_work, smap_gc_work);
1162 INIT_LIST_HEAD(&psock->maps); 1414 INIT_LIST_HEAD(&psock->maps);
1415 INIT_LIST_HEAD(&psock->ingress);
1163 refcount_set(&psock->refcnt, 1); 1416 refcount_set(&psock->refcnt, 1);
1164 1417
1165 rcu_assign_sk_user_data(sock, psock); 1418 rcu_assign_sk_user_data(sock, psock);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dd172ee16716..0244973ee544 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -203,11 +203,13 @@ static int bpf_map_alloc_id(struct bpf_map *map)
203{ 203{
204 int id; 204 int id;
205 205
206 idr_preload(GFP_KERNEL);
206 spin_lock_bh(&map_idr_lock); 207 spin_lock_bh(&map_idr_lock);
207 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 208 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
208 if (id > 0) 209 if (id > 0)
209 map->id = id; 210 map->id = id;
210 spin_unlock_bh(&map_idr_lock); 211 spin_unlock_bh(&map_idr_lock);
212 idr_preload_end();
211 213
212 if (WARN_ON_ONCE(!id)) 214 if (WARN_ON_ONCE(!id))
213 return -ENOSPC; 215 return -ENOSPC;
@@ -940,11 +942,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
940{ 942{
941 int id; 943 int id;
942 944
945 idr_preload(GFP_KERNEL);
943 spin_lock_bh(&prog_idr_lock); 946 spin_lock_bh(&prog_idr_lock);
944 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 947 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
945 if (id > 0) 948 if (id > 0)
946 prog->aux->id = id; 949 prog->aux->id = id;
947 spin_unlock_bh(&prog_idr_lock); 950 spin_unlock_bh(&prog_idr_lock);
951 idr_preload_end();
948 952
949 /* id is in [1, INT_MAX) */ 953 /* id is in [1, INT_MAX) */
950 if (WARN_ON_ONCE(!id)) 954 if (WARN_ON_ONCE(!id))
@@ -1167,8 +1171,75 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
1167} 1171}
1168EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1172EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
1169 1173
1174/* Initially all BPF programs could be loaded w/o specifying
1175 * expected_attach_type. Later for some of them specifying expected_attach_type
1176 * at load time became required so that program could be validated properly.
1177 * Programs of types that are allowed to be loaded both w/ and w/o (for
1178 * backward compatibility) expected_attach_type, should have the default attach
1179 * type assigned to expected_attach_type for the latter case, so that it can be
1180 * validated later at attach time.
1181 *
1182 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1183 * prog type requires it but has some attach types that have to be backward
1184 * compatible.
1185 */
1186static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
1187{
1188 switch (attr->prog_type) {
1189 case BPF_PROG_TYPE_CGROUP_SOCK:
1190 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
1191 * exist so checking for non-zero is the way to go here.
1192 */
1193 if (!attr->expected_attach_type)
1194 attr->expected_attach_type =
1195 BPF_CGROUP_INET_SOCK_CREATE;
1196 break;
1197 }
1198}
1199
1200static int
1201bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
1202 enum bpf_attach_type expected_attach_type)
1203{
1204 switch (prog_type) {
1205 case BPF_PROG_TYPE_CGROUP_SOCK:
1206 switch (expected_attach_type) {
1207 case BPF_CGROUP_INET_SOCK_CREATE:
1208 case BPF_CGROUP_INET4_POST_BIND:
1209 case BPF_CGROUP_INET6_POST_BIND:
1210 return 0;
1211 default:
1212 return -EINVAL;
1213 }
1214 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1215 switch (expected_attach_type) {
1216 case BPF_CGROUP_INET4_BIND:
1217 case BPF_CGROUP_INET6_BIND:
1218 case BPF_CGROUP_INET4_CONNECT:
1219 case BPF_CGROUP_INET6_CONNECT:
1220 return 0;
1221 default:
1222 return -EINVAL;
1223 }
1224 default:
1225 return 0;
1226 }
1227}
1228
1229static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
1230 enum bpf_attach_type attach_type)
1231{
1232 switch (prog->type) {
1233 case BPF_PROG_TYPE_CGROUP_SOCK:
1234 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1235 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
1236 default:
1237 return 0;
1238 }
1239}
1240
1170/* last field in 'union bpf_attr' used by this command */ 1241/* last field in 'union bpf_attr' used by this command */
1171#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1242#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
1172 1243
1173static int bpf_prog_load(union bpf_attr *attr) 1244static int bpf_prog_load(union bpf_attr *attr)
1174{ 1245{
@@ -1205,11 +1276,17 @@ static int bpf_prog_load(union bpf_attr *attr)
1205 !capable(CAP_SYS_ADMIN)) 1276 !capable(CAP_SYS_ADMIN))
1206 return -EPERM; 1277 return -EPERM;
1207 1278
1279 bpf_prog_load_fixup_attach_type(attr);
1280 if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
1281 return -EINVAL;
1282
1208 /* plain bpf_prog allocation */ 1283 /* plain bpf_prog allocation */
1209 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1284 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
1210 if (!prog) 1285 if (!prog)
1211 return -ENOMEM; 1286 return -ENOMEM;
1212 1287
1288 prog->expected_attach_type = attr->expected_attach_type;
1289
1213 prog->aux->offload_requested = !!attr->prog_ifindex; 1290 prog->aux->offload_requested = !!attr->prog_ifindex;
1214 1291
1215 err = security_bpf_prog_alloc(prog->aux); 1292 err = security_bpf_prog_alloc(prog->aux);
@@ -1311,6 +1388,81 @@ static int bpf_obj_get(const union bpf_attr *attr)
1311 attr->file_flags); 1388 attr->file_flags);
1312} 1389}
1313 1390
1391struct bpf_raw_tracepoint {
1392 struct bpf_raw_event_map *btp;
1393 struct bpf_prog *prog;
1394};
1395
1396static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
1397{
1398 struct bpf_raw_tracepoint *raw_tp = filp->private_data;
1399
1400 if (raw_tp->prog) {
1401 bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
1402 bpf_prog_put(raw_tp->prog);
1403 }
1404 kfree(raw_tp);
1405 return 0;
1406}
1407
1408static const struct file_operations bpf_raw_tp_fops = {
1409 .release = bpf_raw_tracepoint_release,
1410 .read = bpf_dummy_read,
1411 .write = bpf_dummy_write,
1412};
1413
1414#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
1415
1416static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
1417{
1418 struct bpf_raw_tracepoint *raw_tp;
1419 struct bpf_raw_event_map *btp;
1420 struct bpf_prog *prog;
1421 char tp_name[128];
1422 int tp_fd, err;
1423
1424 if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
1425 sizeof(tp_name) - 1) < 0)
1426 return -EFAULT;
1427 tp_name[sizeof(tp_name) - 1] = 0;
1428
1429 btp = bpf_find_raw_tracepoint(tp_name);
1430 if (!btp)
1431 return -ENOENT;
1432
1433 raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
1434 if (!raw_tp)
1435 return -ENOMEM;
1436 raw_tp->btp = btp;
1437
1438 prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
1439 BPF_PROG_TYPE_RAW_TRACEPOINT);
1440 if (IS_ERR(prog)) {
1441 err = PTR_ERR(prog);
1442 goto out_free_tp;
1443 }
1444
1445 err = bpf_probe_register(raw_tp->btp, prog);
1446 if (err)
1447 goto out_put_prog;
1448
1449 raw_tp->prog = prog;
1450 tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
1451 O_CLOEXEC);
1452 if (tp_fd < 0) {
1453 bpf_probe_unregister(raw_tp->btp, prog);
1454 err = tp_fd;
1455 goto out_put_prog;
1456 }
1457 return tp_fd;
1458
1459out_put_prog:
1460 bpf_prog_put(prog);
1461out_free_tp:
1462 kfree(raw_tp);
1463 return err;
1464}
1465
1314#ifdef CONFIG_CGROUP_BPF 1466#ifdef CONFIG_CGROUP_BPF
1315 1467
1316#define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1468#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
@@ -1374,8 +1526,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1374 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1526 ptype = BPF_PROG_TYPE_CGROUP_SKB;
1375 break; 1527 break;
1376 case BPF_CGROUP_INET_SOCK_CREATE: 1528 case BPF_CGROUP_INET_SOCK_CREATE:
1529 case BPF_CGROUP_INET4_POST_BIND:
1530 case BPF_CGROUP_INET6_POST_BIND:
1377 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1531 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1378 break; 1532 break;
1533 case BPF_CGROUP_INET4_BIND:
1534 case BPF_CGROUP_INET6_BIND:
1535 case BPF_CGROUP_INET4_CONNECT:
1536 case BPF_CGROUP_INET6_CONNECT:
1537 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1538 break;
1379 case BPF_CGROUP_SOCK_OPS: 1539 case BPF_CGROUP_SOCK_OPS:
1380 ptype = BPF_PROG_TYPE_SOCK_OPS; 1540 ptype = BPF_PROG_TYPE_SOCK_OPS;
1381 break; 1541 break;
@@ -1395,6 +1555,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1395 if (IS_ERR(prog)) 1555 if (IS_ERR(prog))
1396 return PTR_ERR(prog); 1556 return PTR_ERR(prog);
1397 1557
1558 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
1559 bpf_prog_put(prog);
1560 return -EINVAL;
1561 }
1562
1398 cgrp = cgroup_get_from_fd(attr->target_fd); 1563 cgrp = cgroup_get_from_fd(attr->target_fd);
1399 if (IS_ERR(cgrp)) { 1564 if (IS_ERR(cgrp)) {
1400 bpf_prog_put(prog); 1565 bpf_prog_put(prog);
@@ -1431,8 +1596,16 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1431 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1596 ptype = BPF_PROG_TYPE_CGROUP_SKB;
1432 break; 1597 break;
1433 case BPF_CGROUP_INET_SOCK_CREATE: 1598 case BPF_CGROUP_INET_SOCK_CREATE:
1599 case BPF_CGROUP_INET4_POST_BIND:
1600 case BPF_CGROUP_INET6_POST_BIND:
1434 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1601 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1435 break; 1602 break;
1603 case BPF_CGROUP_INET4_BIND:
1604 case BPF_CGROUP_INET6_BIND:
1605 case BPF_CGROUP_INET4_CONNECT:
1606 case BPF_CGROUP_INET6_CONNECT:
1607 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1608 break;
1436 case BPF_CGROUP_SOCK_OPS: 1609 case BPF_CGROUP_SOCK_OPS:
1437 ptype = BPF_PROG_TYPE_SOCK_OPS; 1610 ptype = BPF_PROG_TYPE_SOCK_OPS;
1438 break; 1611 break;
@@ -1482,6 +1655,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
1482 case BPF_CGROUP_INET_INGRESS: 1655 case BPF_CGROUP_INET_INGRESS:
1483 case BPF_CGROUP_INET_EGRESS: 1656 case BPF_CGROUP_INET_EGRESS:
1484 case BPF_CGROUP_INET_SOCK_CREATE: 1657 case BPF_CGROUP_INET_SOCK_CREATE:
1658 case BPF_CGROUP_INET4_BIND:
1659 case BPF_CGROUP_INET6_BIND:
1660 case BPF_CGROUP_INET4_POST_BIND:
1661 case BPF_CGROUP_INET6_POST_BIND:
1662 case BPF_CGROUP_INET4_CONNECT:
1663 case BPF_CGROUP_INET6_CONNECT:
1485 case BPF_CGROUP_SOCK_OPS: 1664 case BPF_CGROUP_SOCK_OPS:
1486 case BPF_CGROUP_DEVICE: 1665 case BPF_CGROUP_DEVICE:
1487 break; 1666 break;
@@ -1921,6 +2100,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
1921 case BPF_OBJ_GET_INFO_BY_FD: 2100 case BPF_OBJ_GET_INFO_BY_FD:
1922 err = bpf_obj_get_info_by_fd(&attr, uattr); 2101 err = bpf_obj_get_info_by_fd(&attr, uattr);
1923 break; 2102 break;
2103 case BPF_RAW_TRACEPOINT_OPEN:
2104 err = bpf_raw_tracepoint_open(&attr);
2105 break;
1924 default: 2106 default:
1925 err = -EINVAL; 2107 err = -EINVAL;
1926 break; 2108 break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e9f7c20691c1..5dd1dcb902bf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,23 +168,12 @@ struct bpf_call_arg_meta {
168 168
169static DEFINE_MUTEX(bpf_verifier_lock); 169static DEFINE_MUTEX(bpf_verifier_lock);
170 170
171/* log_level controls verbosity level of eBPF verifier. 171void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
172 * bpf_verifier_log_write() is used to dump the verification trace to the log, 172 va_list args)
173 * so the user can figure out what's wrong with the program
174 */
175__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
176 const char *fmt, ...)
177{ 173{
178 struct bpf_verifer_log *log = &env->log;
179 unsigned int n; 174 unsigned int n;
180 va_list args;
181
182 if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
183 return;
184 175
185 va_start(args, fmt);
186 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 176 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
187 va_end(args);
188 177
189 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 178 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
190 "verifier log line truncated - local buffer too short\n"); 179 "verifier log line truncated - local buffer too short\n");
@@ -197,14 +186,37 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
197 else 186 else
198 log->ubuf = NULL; 187 log->ubuf = NULL;
199} 188}
200EXPORT_SYMBOL_GPL(bpf_verifier_log_write); 189
201/* Historically bpf_verifier_log_write was called verbose, but the name was too 190/* log_level controls verbosity level of eBPF verifier.
202 * generic for symbol export. The function was renamed, but not the calls in 191 * bpf_verifier_log_write() is used to dump the verification trace to the log,
203 * the verifier to avoid complicating backports. Hence the alias below. 192 * so the user can figure out what's wrong with the program
204 */ 193 */
205static __printf(2, 3) void verbose(struct bpf_verifier_env *env, 194__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
206 const char *fmt, ...) 195 const char *fmt, ...)
207 __attribute__((alias("bpf_verifier_log_write"))); 196{
197 va_list args;
198
199 if (!bpf_verifier_log_needed(&env->log))
200 return;
201
202 va_start(args, fmt);
203 bpf_verifier_vlog(&env->log, fmt, args);
204 va_end(args);
205}
206EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
207
208__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
209{
210 struct bpf_verifier_env *env = private_data;
211 va_list args;
212
213 if (!bpf_verifier_log_needed(&env->log))
214 return;
215
216 va_start(args, fmt);
217 bpf_verifier_vlog(&env->log, fmt, args);
218 va_end(args);
219}
208 220
209static bool type_is_pkt_pointer(enum bpf_reg_type type) 221static bool type_is_pkt_pointer(enum bpf_reg_type type)
210{ 222{
@@ -1311,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
1311 }; 1323 };
1312 1324
1313 if (env->ops->is_valid_access && 1325 if (env->ops->is_valid_access &&
1314 env->ops->is_valid_access(off, size, t, &info)) { 1326 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1315 /* A non zero info.ctx_field_size indicates that this field is a 1327 /* A non zero info.ctx_field_size indicates that this field is a
1316 * candidate for later verifier transformation to load the whole 1328 * candidate for later verifier transformation to load the whole
1317 * field and then apply a mask when accessed with a narrower 1329 * field and then apply a mask when accessed with a narrower
@@ -2337,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2337 } 2349 }
2338 2350
2339 if (env->ops->get_func_proto) 2351 if (env->ops->get_func_proto)
2340 fn = env->ops->get_func_proto(func_id); 2352 fn = env->ops->get_func_proto(func_id, env->prog);
2341 if (!fn) { 2353 if (!fn) {
2342 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 2354 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2343 func_id); 2355 func_id);
@@ -3875,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
3875 switch (env->prog->type) { 3887 switch (env->prog->type) {
3876 case BPF_PROG_TYPE_CGROUP_SKB: 3888 case BPF_PROG_TYPE_CGROUP_SKB:
3877 case BPF_PROG_TYPE_CGROUP_SOCK: 3889 case BPF_PROG_TYPE_CGROUP_SOCK:
3890 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3878 case BPF_PROG_TYPE_SOCK_OPS: 3891 case BPF_PROG_TYPE_SOCK_OPS:
3879 case BPF_PROG_TYPE_CGROUP_DEVICE: 3892 case BPF_PROG_TYPE_CGROUP_DEVICE:
3880 break; 3893 break;
@@ -4600,10 +4613,11 @@ static int do_check(struct bpf_verifier_env *env)
4600 if (env->log.level) { 4613 if (env->log.level) {
4601 const struct bpf_insn_cbs cbs = { 4614 const struct bpf_insn_cbs cbs = {
4602 .cb_print = verbose, 4615 .cb_print = verbose,
4616 .private_data = env,
4603 }; 4617 };
4604 4618
4605 verbose(env, "%d: ", insn_idx); 4619 verbose(env, "%d: ", insn_idx);
4606 print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); 4620 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
4607 } 4621 }
4608 4622
4609 if (bpf_prog_is_dev_bound(env->prog->aux)) { 4623 if (bpf_prog_is_dev_bound(env->prog->aux)) {
@@ -5559,7 +5573,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
5559 insn = new_prog->insnsi + i + delta; 5573 insn = new_prog->insnsi + i + delta;
5560 } 5574 }
5561patch_call_imm: 5575patch_call_imm:
5562 fn = env->ops->get_func_proto(insn->imm); 5576 fn = env->ops->get_func_proto(insn->imm, env->prog);
5563 /* all functions that have prototype and verifier allowed 5577 /* all functions that have prototype and verifier allowed
5564 * programs to call them, must be real in-kernel functions 5578 * programs to call them, must be real in-kernel functions
5565 */ 5579 */
@@ -5601,7 +5615,7 @@ static void free_states(struct bpf_verifier_env *env)
5601int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) 5615int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5602{ 5616{
5603 struct bpf_verifier_env *env; 5617 struct bpf_verifier_env *env;
5604 struct bpf_verifer_log *log; 5618 struct bpf_verifier_log *log;
5605 int ret = -EINVAL; 5619 int ret = -EINVAL;
5606 5620
5607 /* no program is valid */ 5621 /* no program is valid */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7f9691c86b6e..d88e96d4e12c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
524 .arg3_type = ARG_ANYTHING, 524 .arg3_type = ARG_ANYTHING,
525}; 525};
526 526
527static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) 527static const struct bpf_func_proto *
528tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
528{ 529{
529 switch (func_id) { 530 switch (func_id) {
530 case BPF_FUNC_map_lookup_elem: 531 case BPF_FUNC_map_lookup_elem:
@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
568 } 569 }
569} 570}
570 571
571static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) 572static const struct bpf_func_proto *
573kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
572{ 574{
573 switch (func_id) { 575 switch (func_id) {
574 case BPF_FUNC_perf_event_output: 576 case BPF_FUNC_perf_event_output:
@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
582 return &bpf_override_return_proto; 584 return &bpf_override_return_proto;
583#endif 585#endif
584 default: 586 default:
585 return tracing_func_proto(func_id); 587 return tracing_func_proto(func_id, prog);
586 } 588 }
587} 589}
588 590
589/* bpf+kprobe programs can access fields of 'struct pt_regs' */ 591/* bpf+kprobe programs can access fields of 'struct pt_regs' */
590static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 592static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
593 const struct bpf_prog *prog,
591 struct bpf_insn_access_aux *info) 594 struct bpf_insn_access_aux *info)
592{ 595{
593 if (off < 0 || off >= sizeof(struct pt_regs)) 596 if (off < 0 || off >= sizeof(struct pt_regs))
@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
661 .arg3_type = ARG_ANYTHING, 664 .arg3_type = ARG_ANYTHING,
662}; 665};
663 666
664static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) 667static const struct bpf_func_proto *
668tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
665{ 669{
666 switch (func_id) { 670 switch (func_id) {
667 case BPF_FUNC_perf_event_output: 671 case BPF_FUNC_perf_event_output:
@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
669 case BPF_FUNC_get_stackid: 673 case BPF_FUNC_get_stackid:
670 return &bpf_get_stackid_proto_tp; 674 return &bpf_get_stackid_proto_tp;
671 default: 675 default:
672 return tracing_func_proto(func_id); 676 return tracing_func_proto(func_id, prog);
673 } 677 }
674} 678}
675 679
676static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 680static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
681 const struct bpf_prog *prog,
677 struct bpf_insn_access_aux *info) 682 struct bpf_insn_access_aux *info)
678{ 683{
679 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 684 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
721 .arg3_type = ARG_CONST_SIZE, 726 .arg3_type = ARG_CONST_SIZE,
722}; 727};
723 728
724static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id) 729static const struct bpf_func_proto *
730pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
725{ 731{
726 switch (func_id) { 732 switch (func_id) {
727 case BPF_FUNC_perf_event_output: 733 case BPF_FUNC_perf_event_output:
@@ -731,11 +737,94 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
731 case BPF_FUNC_perf_prog_read_value: 737 case BPF_FUNC_perf_prog_read_value:
732 return &bpf_perf_prog_read_value_proto; 738 return &bpf_perf_prog_read_value_proto;
733 default: 739 default:
734 return tracing_func_proto(func_id); 740 return tracing_func_proto(func_id, prog);
735 } 741 }
736} 742}
737 743
744/*
745 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
746 * to avoid potential recursive reuse issue when/if tracepoints are added
747 * inside bpf_*_event_output and/or bpf_get_stack_id
748 */
749static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
750BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
751 struct bpf_map *, map, u64, flags, void *, data, u64, size)
752{
753 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
754
755 perf_fetch_caller_regs(regs);
756 return ____bpf_perf_event_output(regs, map, flags, data, size);
757}
758
759static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
760 .func = bpf_perf_event_output_raw_tp,
761 .gpl_only = true,
762 .ret_type = RET_INTEGER,
763 .arg1_type = ARG_PTR_TO_CTX,
764 .arg2_type = ARG_CONST_MAP_PTR,
765 .arg3_type = ARG_ANYTHING,
766 .arg4_type = ARG_PTR_TO_MEM,
767 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
768};
769
770BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
771 struct bpf_map *, map, u64, flags)
772{
773 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
774
775 perf_fetch_caller_regs(regs);
776 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
777 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
778 flags, 0, 0);
779}
780
781static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
782 .func = bpf_get_stackid_raw_tp,
783 .gpl_only = true,
784 .ret_type = RET_INTEGER,
785 .arg1_type = ARG_PTR_TO_CTX,
786 .arg2_type = ARG_CONST_MAP_PTR,
787 .arg3_type = ARG_ANYTHING,
788};
789
790static const struct bpf_func_proto *
791raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
792{
793 switch (func_id) {
794 case BPF_FUNC_perf_event_output:
795 return &bpf_perf_event_output_proto_raw_tp;
796 case BPF_FUNC_get_stackid:
797 return &bpf_get_stackid_proto_raw_tp;
798 default:
799 return tracing_func_proto(func_id, prog);
800 }
801}
802
803static bool raw_tp_prog_is_valid_access(int off, int size,
804 enum bpf_access_type type,
805 const struct bpf_prog *prog,
806 struct bpf_insn_access_aux *info)
807{
808 /* largest tracepoint in the kernel has 12 args */
809 if (off < 0 || off >= sizeof(__u64) * 12)
810 return false;
811 if (type != BPF_READ)
812 return false;
813 if (off % size != 0)
814 return false;
815 return true;
816}
817
818const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
819 .get_func_proto = raw_tp_prog_func_proto,
820 .is_valid_access = raw_tp_prog_is_valid_access,
821};
822
823const struct bpf_prog_ops raw_tracepoint_prog_ops = {
824};
825
738static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 826static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
827 const struct bpf_prog *prog,
739 struct bpf_insn_access_aux *info) 828 struct bpf_insn_access_aux *info)
740{ 829{
741 const int size_u64 = sizeof(u64); 830 const int size_u64 = sizeof(u64);
@@ -908,3 +997,106 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
908 997
909 return ret; 998 return ret;
910} 999}
1000
1001extern struct bpf_raw_event_map __start__bpf_raw_tp[];
1002extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
1003
1004struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
1005{
1006 struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
1007
1008 for (; btp < __stop__bpf_raw_tp; btp++) {
1009 if (!strcmp(btp->tp->name, name))
1010 return btp;
1011 }
1012 return NULL;
1013}
1014
1015static __always_inline
1016void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
1017{
1018 rcu_read_lock();
1019 preempt_disable();
1020 (void) BPF_PROG_RUN(prog, args);
1021 preempt_enable();
1022 rcu_read_unlock();
1023}
1024
1025#define UNPACK(...) __VA_ARGS__
1026#define REPEAT_1(FN, DL, X, ...) FN(X)
1027#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
1028#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
1029#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
1030#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
1031#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
1032#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
1033#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
1034#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
1035#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
1036#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
1037#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
1038#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
1039
1040#define SARG(X) u64 arg##X
1041#define COPY(X) args[X] = arg##X
1042
1043#define __DL_COM (,)
1044#define __DL_SEM (;)
1045
1046#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
1047
1048#define BPF_TRACE_DEFN_x(x) \
1049 void bpf_trace_run##x(struct bpf_prog *prog, \
1050 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
1051 { \
1052 u64 args[x]; \
1053 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
1054 __bpf_trace_run(prog, args); \
1055 } \
1056 EXPORT_SYMBOL_GPL(bpf_trace_run##x)
1057BPF_TRACE_DEFN_x(1);
1058BPF_TRACE_DEFN_x(2);
1059BPF_TRACE_DEFN_x(3);
1060BPF_TRACE_DEFN_x(4);
1061BPF_TRACE_DEFN_x(5);
1062BPF_TRACE_DEFN_x(6);
1063BPF_TRACE_DEFN_x(7);
1064BPF_TRACE_DEFN_x(8);
1065BPF_TRACE_DEFN_x(9);
1066BPF_TRACE_DEFN_x(10);
1067BPF_TRACE_DEFN_x(11);
1068BPF_TRACE_DEFN_x(12);
1069
1070static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1071{
1072 struct tracepoint *tp = btp->tp;
1073
1074 /*
1075 * check that program doesn't access arguments beyond what's
1076 * available in this tracepoint
1077 */
1078 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
1079 return -EINVAL;
1080
1081 return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
1082}
1083
1084int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1085{
1086 int err;
1087
1088 mutex_lock(&bpf_event_mutex);
1089 err = __bpf_probe_register(btp, prog);
1090 mutex_unlock(&bpf_event_mutex);
1091 return err;
1092}
1093
1094int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1095{
1096 int err;
1097
1098 mutex_lock(&bpf_event_mutex);
1099 err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1100 mutex_unlock(&bpf_event_mutex);
1101 return err;
1102}
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 53728d391d3a..06dad7a072fd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
132void sg_init_table(struct scatterlist *sgl, unsigned int nents) 132void sg_init_table(struct scatterlist *sgl, unsigned int nents)
133{ 133{
134 memset(sgl, 0, sizeof(*sgl) * nents); 134 memset(sgl, 0, sizeof(*sgl) * nents);
135#ifdef CONFIG_DEBUG_SG 135 sg_init_marker(sgl, nents);
136 {
137 unsigned int i;
138 for (i = 0; i < nents; i++)
139 sgl[i].sg_magic = SG_MAGIC;
140 }
141#endif
142 sg_mark_end(&sgl[nents - 1]);
143} 136}
144EXPORT_SYMBOL(sg_init_table); 137EXPORT_SYMBOL(sg_init_table);
145 138
diff --git a/net/core/filter.c b/net/core/filter.c
index 00c711c5f1a2..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
33#include <linux/if_packet.h> 33#include <linux/if_packet.h>
34#include <linux/if_arp.h> 34#include <linux/if_arp.h>
35#include <linux/gfp.h> 35#include <linux/gfp.h>
36#include <net/inet_common.h>
36#include <net/ip.h> 37#include <net/ip.h>
37#include <net/protocol.h> 38#include <net/protocol.h>
38#include <net/netlink.h> 39#include <net/netlink.h>
@@ -1855,7 +1856,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
1855 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 1856 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1856 1857
1857 /* If user passes invalid input drop the packet. */ 1858 /* If user passes invalid input drop the packet. */
1858 if (unlikely(flags)) 1859 if (unlikely(flags & ~(BPF_F_INGRESS)))
1859 return SK_DROP; 1860 return SK_DROP;
1860 1861
1861 tcb->bpf.key = key; 1862 tcb->bpf.key = key;
@@ -1894,7 +1895,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
1894 struct bpf_map *, map, u32, key, u64, flags) 1895 struct bpf_map *, map, u32, key, u64, flags)
1895{ 1896{
1896 /* If user passes invalid input drop the packet. */ 1897 /* If user passes invalid input drop the packet. */
1897 if (unlikely(flags)) 1898 if (unlikely(flags & ~(BPF_F_INGRESS)))
1898 return SK_DROP; 1899 return SK_DROP;
1899 1900
1900 msg->key = key; 1901 msg->key = key;
@@ -3462,6 +3463,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3462 ret = -EINVAL; 3463 ret = -EINVAL;
3463 } 3464 }
3464#ifdef CONFIG_INET 3465#ifdef CONFIG_INET
3466 } else if (level == SOL_IP) {
3467 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
3468 return -EINVAL;
3469
3470 val = *((int *)optval);
3471 /* Only some options are supported */
3472 switch (optname) {
3473 case IP_TOS:
3474 if (val < -1 || val > 0xff) {
3475 ret = -EINVAL;
3476 } else {
3477 struct inet_sock *inet = inet_sk(sk);
3478
3479 if (val == -1)
3480 val = 0;
3481 inet->tos = val;
3482 }
3483 break;
3484 default:
3485 ret = -EINVAL;
3486 }
3465#if IS_ENABLED(CONFIG_IPV6) 3487#if IS_ENABLED(CONFIG_IPV6)
3466 } else if (level == SOL_IPV6) { 3488 } else if (level == SOL_IPV6) {
3467 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 3489 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
@@ -3561,6 +3583,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3561 } else { 3583 } else {
3562 goto err_clear; 3584 goto err_clear;
3563 } 3585 }
3586 } else if (level == SOL_IP) {
3587 struct inet_sock *inet = inet_sk(sk);
3588
3589 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
3590 goto err_clear;
3591
3592 /* Only some options are supported */
3593 switch (optname) {
3594 case IP_TOS:
3595 *((int *)optval) = (int)inet->tos;
3596 break;
3597 default:
3598 goto err_clear;
3599 }
3564#if IS_ENABLED(CONFIG_IPV6) 3600#if IS_ENABLED(CONFIG_IPV6)
3565 } else if (level == SOL_IPV6) { 3601 } else if (level == SOL_IPV6) {
3566 struct ipv6_pinfo *np = inet6_sk(sk); 3602 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -3621,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
3621 .arg2_type = ARG_ANYTHING, 3657 .arg2_type = ARG_ANYTHING,
3622}; 3658};
3623 3659
3660const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
3661EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
3662
3663BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
3664 int, addr_len)
3665{
3666#ifdef CONFIG_INET
3667 struct sock *sk = ctx->sk;
3668 int err;
3669
3670 /* Binding to port can be expensive so it's prohibited in the helper.
3671 * Only binding to IP is supported.
3672 */
3673 err = -EINVAL;
3674 if (addr->sa_family == AF_INET) {
3675 if (addr_len < sizeof(struct sockaddr_in))
3676 return err;
3677 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
3678 return err;
3679 return __inet_bind(sk, addr, addr_len, true, false);
3680#if IS_ENABLED(CONFIG_IPV6)
3681 } else if (addr->sa_family == AF_INET6) {
3682 if (addr_len < SIN6_LEN_RFC2133)
3683 return err;
3684 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
3685 return err;
3686 /* ipv6_bpf_stub cannot be NULL, since it's called from
3687 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
3688 */
3689 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
3690#endif /* CONFIG_IPV6 */
3691 }
3692#endif /* CONFIG_INET */
3693
3694 return -EAFNOSUPPORT;
3695}
3696
3697static const struct bpf_func_proto bpf_bind_proto = {
3698 .func = bpf_bind,
3699 .gpl_only = false,
3700 .ret_type = RET_INTEGER,
3701 .arg1_type = ARG_PTR_TO_CTX,
3702 .arg2_type = ARG_PTR_TO_MEM,
3703 .arg3_type = ARG_CONST_SIZE,
3704};
3705
3624static const struct bpf_func_proto * 3706static const struct bpf_func_proto *
3625bpf_base_func_proto(enum bpf_func_id func_id) 3707bpf_base_func_proto(enum bpf_func_id func_id)
3626{ 3708{
@@ -3650,7 +3732,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
3650} 3732}
3651 3733
3652static const struct bpf_func_proto * 3734static const struct bpf_func_proto *
3653sock_filter_func_proto(enum bpf_func_id func_id) 3735sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3654{ 3736{
3655 switch (func_id) { 3737 switch (func_id) {
3656 /* inet and inet6 sockets are created in a process 3738 /* inet and inet6 sockets are created in a process
@@ -3664,7 +3746,29 @@ sock_filter_func_proto(enum bpf_func_id func_id)
3664} 3746}
3665 3747
3666static const struct bpf_func_proto * 3748static const struct bpf_func_proto *
3667sk_filter_func_proto(enum bpf_func_id func_id) 3749sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3750{
3751 switch (func_id) {
3752 /* inet and inet6 sockets are created in a process
3753 * context so there is always a valid uid/gid
3754 */
3755 case BPF_FUNC_get_current_uid_gid:
3756 return &bpf_get_current_uid_gid_proto;
3757 case BPF_FUNC_bind:
3758 switch (prog->expected_attach_type) {
3759 case BPF_CGROUP_INET4_CONNECT:
3760 case BPF_CGROUP_INET6_CONNECT:
3761 return &bpf_bind_proto;
3762 default:
3763 return NULL;
3764 }
3765 default:
3766 return bpf_base_func_proto(func_id);
3767 }
3768}
3769
3770static const struct bpf_func_proto *
3771sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3668{ 3772{
3669 switch (func_id) { 3773 switch (func_id) {
3670 case BPF_FUNC_skb_load_bytes: 3774 case BPF_FUNC_skb_load_bytes:
@@ -3679,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
3679} 3783}
3680 3784
3681static const struct bpf_func_proto * 3785static const struct bpf_func_proto *
3682tc_cls_act_func_proto(enum bpf_func_id func_id) 3786tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3683{ 3787{
3684 switch (func_id) { 3788 switch (func_id) {
3685 case BPF_FUNC_skb_store_bytes: 3789 case BPF_FUNC_skb_store_bytes:
@@ -3746,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
3746} 3850}
3747 3851
3748static const struct bpf_func_proto * 3852static const struct bpf_func_proto *
3749xdp_func_proto(enum bpf_func_id func_id) 3853xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3750{ 3854{
3751 switch (func_id) { 3855 switch (func_id) {
3752 case BPF_FUNC_perf_event_output: 3856 case BPF_FUNC_perf_event_output:
@@ -3769,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
3769} 3873}
3770 3874
3771static const struct bpf_func_proto * 3875static const struct bpf_func_proto *
3772lwt_inout_func_proto(enum bpf_func_id func_id) 3876lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3773{ 3877{
3774 switch (func_id) { 3878 switch (func_id) {
3775 case BPF_FUNC_skb_load_bytes: 3879 case BPF_FUNC_skb_load_bytes:
@@ -3796,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
3796} 3900}
3797 3901
3798static const struct bpf_func_proto * 3902static const struct bpf_func_proto *
3799 sock_ops_func_proto(enum bpf_func_id func_id) 3903sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3800{ 3904{
3801 switch (func_id) { 3905 switch (func_id) {
3802 case BPF_FUNC_setsockopt: 3906 case BPF_FUNC_setsockopt:
@@ -3812,7 +3916,8 @@ static const struct bpf_func_proto *
3812 } 3916 }
3813} 3917}
3814 3918
3815static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) 3919static const struct bpf_func_proto *
3920sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3816{ 3921{
3817 switch (func_id) { 3922 switch (func_id) {
3818 case BPF_FUNC_msg_redirect_map: 3923 case BPF_FUNC_msg_redirect_map:
@@ -3828,7 +3933,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
3828 } 3933 }
3829} 3934}
3830 3935
3831static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) 3936static const struct bpf_func_proto *
3937sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3832{ 3938{
3833 switch (func_id) { 3939 switch (func_id) {
3834 case BPF_FUNC_skb_store_bytes: 3940 case BPF_FUNC_skb_store_bytes:
@@ -3853,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
3853} 3959}
3854 3960
3855static const struct bpf_func_proto * 3961static const struct bpf_func_proto *
3856lwt_xmit_func_proto(enum bpf_func_id func_id) 3962lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3857{ 3963{
3858 switch (func_id) { 3964 switch (func_id) {
3859 case BPF_FUNC_skb_get_tunnel_key: 3965 case BPF_FUNC_skb_get_tunnel_key:
@@ -3883,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
3883 case BPF_FUNC_set_hash_invalid: 3989 case BPF_FUNC_set_hash_invalid:
3884 return &bpf_set_hash_invalid_proto; 3990 return &bpf_set_hash_invalid_proto;
3885 default: 3991 default:
3886 return lwt_inout_func_proto(func_id); 3992 return lwt_inout_func_proto(func_id, prog);
3887 } 3993 }
3888} 3994}
3889 3995
3890static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, 3996static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
3997 const struct bpf_prog *prog,
3891 struct bpf_insn_access_aux *info) 3998 struct bpf_insn_access_aux *info)
3892{ 3999{
3893 const int size_default = sizeof(__u32); 4000 const int size_default = sizeof(__u32);
@@ -3931,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
3931 4038
3932static bool sk_filter_is_valid_access(int off, int size, 4039static bool sk_filter_is_valid_access(int off, int size,
3933 enum bpf_access_type type, 4040 enum bpf_access_type type,
4041 const struct bpf_prog *prog,
3934 struct bpf_insn_access_aux *info) 4042 struct bpf_insn_access_aux *info)
3935{ 4043{
3936 switch (off) { 4044 switch (off) {
@@ -3951,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
3951 } 4059 }
3952 } 4060 }
3953 4061
3954 return bpf_skb_is_valid_access(off, size, type, info); 4062 return bpf_skb_is_valid_access(off, size, type, prog, info);
3955} 4063}
3956 4064
3957static bool lwt_is_valid_access(int off, int size, 4065static bool lwt_is_valid_access(int off, int size,
3958 enum bpf_access_type type, 4066 enum bpf_access_type type,
4067 const struct bpf_prog *prog,
3959 struct bpf_insn_access_aux *info) 4068 struct bpf_insn_access_aux *info)
3960{ 4069{
3961 switch (off) { 4070 switch (off) {
@@ -3985,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
3985 break; 4094 break;
3986 } 4095 }
3987 4096
3988 return bpf_skb_is_valid_access(off, size, type, info); 4097 return bpf_skb_is_valid_access(off, size, type, prog, info);
3989} 4098}
3990 4099
3991static bool sock_filter_is_valid_access(int off, int size, 4100
3992 enum bpf_access_type type, 4101/* Attach type specific accesses */
3993 struct bpf_insn_access_aux *info) 4102static bool __sock_filter_check_attach_type(int off,
4103 enum bpf_access_type access_type,
4104 enum bpf_attach_type attach_type)
3994{ 4105{
3995 if (type == BPF_WRITE) { 4106 switch (off) {
3996 switch (off) { 4107 case offsetof(struct bpf_sock, bound_dev_if):
3997 case offsetof(struct bpf_sock, bound_dev_if): 4108 case offsetof(struct bpf_sock, mark):
3998 case offsetof(struct bpf_sock, mark): 4109 case offsetof(struct bpf_sock, priority):
3999 case offsetof(struct bpf_sock, priority): 4110 switch (attach_type) {
4000 break; 4111 case BPF_CGROUP_INET_SOCK_CREATE:
4112 goto full_access;
4001 default: 4113 default:
4002 return false; 4114 return false;
4003 } 4115 }
4116 case bpf_ctx_range(struct bpf_sock, src_ip4):
4117 switch (attach_type) {
4118 case BPF_CGROUP_INET4_POST_BIND:
4119 goto read_only;
4120 default:
4121 return false;
4122 }
4123 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4124 switch (attach_type) {
4125 case BPF_CGROUP_INET6_POST_BIND:
4126 goto read_only;
4127 default:
4128 return false;
4129 }
4130 case bpf_ctx_range(struct bpf_sock, src_port):
4131 switch (attach_type) {
4132 case BPF_CGROUP_INET4_POST_BIND:
4133 case BPF_CGROUP_INET6_POST_BIND:
4134 goto read_only;
4135 default:
4136 return false;
4137 }
4138 }
4139read_only:
4140 return access_type == BPF_READ;
4141full_access:
4142 return true;
4143}
4144
4145static bool __sock_filter_check_size(int off, int size,
4146 struct bpf_insn_access_aux *info)
4147{
4148 const int size_default = sizeof(__u32);
4149
4150 switch (off) {
4151 case bpf_ctx_range(struct bpf_sock, src_ip4):
4152 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4153 bpf_ctx_record_field_size(info, size_default);
4154 return bpf_ctx_narrow_access_ok(off, size, size_default);
4004 } 4155 }
4005 4156
4006 if (off < 0 || off + size > sizeof(struct bpf_sock)) 4157 return size == size_default;
4158}
4159
4160static bool sock_filter_is_valid_access(int off, int size,
4161 enum bpf_access_type type,
4162 const struct bpf_prog *prog,
4163 struct bpf_insn_access_aux *info)
4164{
4165 if (off < 0 || off >= sizeof(struct bpf_sock))
4007 return false; 4166 return false;
4008 /* The verifier guarantees that size > 0. */
4009 if (off % size != 0) 4167 if (off % size != 0)
4010 return false; 4168 return false;
4011 if (size != sizeof(__u32)) 4169 if (!__sock_filter_check_attach_type(off, type,
4170 prog->expected_attach_type))
4171 return false;
4172 if (!__sock_filter_check_size(off, size, info))
4012 return false; 4173 return false;
4013
4014 return true; 4174 return true;
4015} 4175}
4016 4176
@@ -4061,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
4061 4221
4062static bool tc_cls_act_is_valid_access(int off, int size, 4222static bool tc_cls_act_is_valid_access(int off, int size,
4063 enum bpf_access_type type, 4223 enum bpf_access_type type,
4224 const struct bpf_prog *prog,
4064 struct bpf_insn_access_aux *info) 4225 struct bpf_insn_access_aux *info)
4065{ 4226{
4066 if (type == BPF_WRITE) { 4227 if (type == BPF_WRITE) {
@@ -4090,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
4090 return false; 4251 return false;
4091 } 4252 }
4092 4253
4093 return bpf_skb_is_valid_access(off, size, type, info); 4254 return bpf_skb_is_valid_access(off, size, type, prog, info);
4094} 4255}
4095 4256
4096static bool __is_valid_xdp_access(int off, int size) 4257static bool __is_valid_xdp_access(int off, int size)
@@ -4107,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
4107 4268
4108static bool xdp_is_valid_access(int off, int size, 4269static bool xdp_is_valid_access(int off, int size,
4109 enum bpf_access_type type, 4270 enum bpf_access_type type,
4271 const struct bpf_prog *prog,
4110 struct bpf_insn_access_aux *info) 4272 struct bpf_insn_access_aux *info)
4111{ 4273{
4112 if (type == BPF_WRITE) 4274 if (type == BPF_WRITE)
@@ -4137,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
4137} 4299}
4138EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 4300EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
4139 4301
4302static bool sock_addr_is_valid_access(int off, int size,
4303 enum bpf_access_type type,
4304 const struct bpf_prog *prog,
4305 struct bpf_insn_access_aux *info)
4306{
4307 const int size_default = sizeof(__u32);
4308
4309 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
4310 return false;
4311 if (off % size != 0)
4312 return false;
4313
4314 /* Disallow access to IPv6 fields from IPv4 contex and vise
4315 * versa.
4316 */
4317 switch (off) {
4318 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4319 switch (prog->expected_attach_type) {
4320 case BPF_CGROUP_INET4_BIND:
4321 case BPF_CGROUP_INET4_CONNECT:
4322 break;
4323 default:
4324 return false;
4325 }
4326 break;
4327 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4328 switch (prog->expected_attach_type) {
4329 case BPF_CGROUP_INET6_BIND:
4330 case BPF_CGROUP_INET6_CONNECT:
4331 break;
4332 default:
4333 return false;
4334 }
4335 break;
4336 }
4337
4338 switch (off) {
4339 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4340 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4341 /* Only narrow read access allowed for now. */
4342 if (type == BPF_READ) {
4343 bpf_ctx_record_field_size(info, size_default);
4344 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
4345 return false;
4346 } else {
4347 if (size != size_default)
4348 return false;
4349 }
4350 break;
4351 case bpf_ctx_range(struct bpf_sock_addr, user_port):
4352 if (size != size_default)
4353 return false;
4354 break;
4355 default:
4356 if (type == BPF_READ) {
4357 if (size != size_default)
4358 return false;
4359 } else {
4360 return false;
4361 }
4362 }
4363
4364 return true;
4365}
4366
4140static bool sock_ops_is_valid_access(int off, int size, 4367static bool sock_ops_is_valid_access(int off, int size,
4141 enum bpf_access_type type, 4368 enum bpf_access_type type,
4369 const struct bpf_prog *prog,
4142 struct bpf_insn_access_aux *info) 4370 struct bpf_insn_access_aux *info)
4143{ 4371{
4144 const int size_default = sizeof(__u32); 4372 const int size_default = sizeof(__u32);
@@ -4185,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
4185 4413
4186static bool sk_skb_is_valid_access(int off, int size, 4414static bool sk_skb_is_valid_access(int off, int size,
4187 enum bpf_access_type type, 4415 enum bpf_access_type type,
4416 const struct bpf_prog *prog,
4188 struct bpf_insn_access_aux *info) 4417 struct bpf_insn_access_aux *info)
4189{ 4418{
4190 switch (off) { 4419 switch (off) {
@@ -4214,11 +4443,12 @@ static bool sk_skb_is_valid_access(int off, int size,
4214 break; 4443 break;
4215 } 4444 }
4216 4445
4217 return bpf_skb_is_valid_access(off, size, type, info); 4446 return bpf_skb_is_valid_access(off, size, type, prog, info);
4218} 4447}
4219 4448
4220static bool sk_msg_is_valid_access(int off, int size, 4449static bool sk_msg_is_valid_access(int off, int size,
4221 enum bpf_access_type type, 4450 enum bpf_access_type type,
4451 const struct bpf_prog *prog,
4222 struct bpf_insn_access_aux *info) 4452 struct bpf_insn_access_aux *info)
4223{ 4453{
4224 if (type == BPF_WRITE) 4454 if (type == BPF_WRITE)
@@ -4548,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
4548 struct bpf_prog *prog, u32 *target_size) 4778 struct bpf_prog *prog, u32 *target_size)
4549{ 4779{
4550 struct bpf_insn *insn = insn_buf; 4780 struct bpf_insn *insn = insn_buf;
4781 int off;
4551 4782
4552 switch (si->off) { 4783 switch (si->off) {
4553 case offsetof(struct bpf_sock, bound_dev_if): 4784 case offsetof(struct bpf_sock, bound_dev_if):
@@ -4603,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
4603 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 4834 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
4604 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); 4835 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
4605 break; 4836 break;
4837
4838 case offsetof(struct bpf_sock, src_ip4):
4839 *insn++ = BPF_LDX_MEM(
4840 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4841 bpf_target_off(struct sock_common, skc_rcv_saddr,
4842 FIELD_SIZEOF(struct sock_common,
4843 skc_rcv_saddr),
4844 target_size));
4845 break;
4846
4847 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4848#if IS_ENABLED(CONFIG_IPV6)
4849 off = si->off;
4850 off -= offsetof(struct bpf_sock, src_ip6[0]);
4851 *insn++ = BPF_LDX_MEM(
4852 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4853 bpf_target_off(
4854 struct sock_common,
4855 skc_v6_rcv_saddr.s6_addr32[0],
4856 FIELD_SIZEOF(struct sock_common,
4857 skc_v6_rcv_saddr.s6_addr32[0]),
4858 target_size) + off);
4859#else
4860 (void)off;
4861 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4862#endif
4863 break;
4864
4865 case offsetof(struct bpf_sock, src_port):
4866 *insn++ = BPF_LDX_MEM(
4867 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
4868 si->dst_reg, si->src_reg,
4869 bpf_target_off(struct sock_common, skc_num,
4870 FIELD_SIZEOF(struct sock_common,
4871 skc_num),
4872 target_size));
4873 break;
4606 } 4874 }
4607 4875
4608 return insn - insn_buf; 4876 return insn - insn_buf;
@@ -4678,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4678 return insn - insn_buf; 4946 return insn - insn_buf;
4679} 4947}
4680 4948
4949/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
4950 * context Structure, F is Field in context structure that contains a pointer
4951 * to Nested Structure of type NS that has the field NF.
4952 *
4953 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
4954 * sure that SIZE is not greater than actual size of S.F.NF.
4955 *
4956 * If offset OFF is provided, the load happens from that offset relative to
4957 * offset of NF.
4958 */
4959#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
4960 do { \
4961 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
4962 si->src_reg, offsetof(S, F)); \
4963 *insn++ = BPF_LDX_MEM( \
4964 SIZE, si->dst_reg, si->dst_reg, \
4965 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
4966 target_size) \
4967 + OFF); \
4968 } while (0)
4969
4970#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
4971 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
4972 BPF_FIELD_SIZEOF(NS, NF), 0)
4973
4974/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
4975 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
4976 *
4977 * It doesn't support SIZE argument though since narrow stores are not
4978 * supported for now.
4979 *
4980 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
4981 * "register" since two registers available in convert_ctx_access are not
4982 * enough: we can't override neither SRC, since it contains value to store, nor
4983 * DST since it contains pointer to context that may be used by later
4984 * instructions. But we need a temporary place to save pointer to nested
4985 * structure whose field we want to store to.
4986 */
4987#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
4988 do { \
4989 int tmp_reg = BPF_REG_9; \
4990 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
4991 --tmp_reg; \
4992 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
4993 --tmp_reg; \
4994 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
4995 offsetof(S, TF)); \
4996 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
4997 si->dst_reg, offsetof(S, F)); \
4998 *insn++ = BPF_STX_MEM( \
4999 BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
5000 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
5001 target_size) \
5002 + OFF); \
5003 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
5004 offsetof(S, TF)); \
5005 } while (0)
5006
5007#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
5008 TF) \
5009 do { \
5010 if (type == BPF_WRITE) { \
5011 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
5012 TF); \
5013 } else { \
5014 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
5015 S, NS, F, NF, SIZE, OFF); \
5016 } \
5017 } while (0)
5018
5019#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
5020 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
5021 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
5022
5023static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
5024 const struct bpf_insn *si,
5025 struct bpf_insn *insn_buf,
5026 struct bpf_prog *prog, u32 *target_size)
5027{
5028 struct bpf_insn *insn = insn_buf;
5029 int off;
5030
5031 switch (si->off) {
5032 case offsetof(struct bpf_sock_addr, user_family):
5033 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5034 struct sockaddr, uaddr, sa_family);
5035 break;
5036
5037 case offsetof(struct bpf_sock_addr, user_ip4):
5038 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5039 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
5040 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
5041 break;
5042
5043 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5044 off = si->off;
5045 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
5046 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5047 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
5048 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
5049 tmp_reg);
5050 break;
5051
5052 case offsetof(struct bpf_sock_addr, user_port):
5053 /* To get port we need to know sa_family first and then treat
5054 * sockaddr as either sockaddr_in or sockaddr_in6.
5055 * Though we can simplify since port field has same offset and
5056 * size in both structures.
5057 * Here we check this invariant and use just one of the
5058 * structures if it's true.
5059 */
5060 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
5061 offsetof(struct sockaddr_in6, sin6_port));
5062 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
5063 FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
5064 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
5065 struct sockaddr_in6, uaddr,
5066 sin6_port, tmp_reg);
5067 break;
5068
5069 case offsetof(struct bpf_sock_addr, family):
5070 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5071 struct sock, sk, sk_family);
5072 break;
5073
5074 case offsetof(struct bpf_sock_addr, type):
5075 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5076 struct bpf_sock_addr_kern, struct sock, sk,
5077 __sk_flags_offset, BPF_W, 0);
5078 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
5079 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
5080 break;
5081
5082 case offsetof(struct bpf_sock_addr, protocol):
5083 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5084 struct bpf_sock_addr_kern, struct sock, sk,
5085 __sk_flags_offset, BPF_W, 0);
5086 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
5087 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
5088 SK_FL_PROTO_SHIFT);
5089 break;
5090 }
5091
5092 return insn - insn_buf;
5093}
5094
4681static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, 5095static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
4682 const struct bpf_insn *si, 5096 const struct bpf_insn *si,
4683 struct bpf_insn *insn_buf, 5097 struct bpf_insn *insn_buf,
@@ -5135,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
5135const struct bpf_prog_ops cg_sock_prog_ops = { 5549const struct bpf_prog_ops cg_sock_prog_ops = {
5136}; 5550};
5137 5551
5552const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
5553 .get_func_proto = sock_addr_func_proto,
5554 .is_valid_access = sock_addr_is_valid_access,
5555 .convert_ctx_access = sock_addr_convert_ctx_access,
5556};
5557
5558const struct bpf_prog_ops cg_sock_addr_prog_ops = {
5559};
5560
5138const struct bpf_verifier_ops sock_ops_verifier_ops = { 5561const struct bpf_verifier_ops sock_ops_verifier_ops = {
5139 .get_func_proto = sock_ops_func_proto, 5562 .get_func_proto = sock_ops_func_proto,
5140 .is_valid_access = sock_ops_is_valid_access, 5563 .is_valid_access = sock_ops_is_valid_access,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f98e2f0db841..eaed0367e669 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
432 432
433int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 433int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
434{ 434{
435 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
436 struct sock *sk = sock->sk; 435 struct sock *sk = sock->sk;
437 struct inet_sock *inet = inet_sk(sk);
438 struct net *net = sock_net(sk);
439 unsigned short snum;
440 int chk_addr_ret;
441 u32 tb_id = RT_TABLE_LOCAL;
442 int err; 436 int err;
443 437
444 /* If the socket has its own bind function then use it. (RAW) */ 438 /* If the socket has its own bind function then use it. (RAW) */
445 if (sk->sk_prot->bind) { 439 if (sk->sk_prot->bind) {
446 err = sk->sk_prot->bind(sk, uaddr, addr_len); 440 return sk->sk_prot->bind(sk, uaddr, addr_len);
447 goto out;
448 } 441 }
449 err = -EINVAL;
450 if (addr_len < sizeof(struct sockaddr_in)) 442 if (addr_len < sizeof(struct sockaddr_in))
451 goto out; 443 return -EINVAL;
444
445 /* BPF prog is run before any checks are done so that if the prog
446 * changes context in a wrong way it will be caught.
447 */
448 err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
449 if (err)
450 return err;
451
452 return __inet_bind(sk, uaddr, addr_len, false, true);
453}
454EXPORT_SYMBOL(inet_bind);
455
456int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
457 bool force_bind_address_no_port, bool with_lock)
458{
459 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
460 struct inet_sock *inet = inet_sk(sk);
461 struct net *net = sock_net(sk);
462 unsigned short snum;
463 int chk_addr_ret;
464 u32 tb_id = RT_TABLE_LOCAL;
465 int err;
452 466
453 if (addr->sin_family != AF_INET) { 467 if (addr->sin_family != AF_INET) {
454 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) 468 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
492 * would be illegal to use them (multicast/broadcast) in 506 * would be illegal to use them (multicast/broadcast) in
493 * which case the sending device address is used. 507 * which case the sending device address is used.
494 */ 508 */
495 lock_sock(sk); 509 if (with_lock)
510 lock_sock(sk);
496 511
497 /* Check these errors (active socket, double bind). */ 512 /* Check these errors (active socket, double bind). */
498 err = -EINVAL; 513 err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
504 inet->inet_saddr = 0; /* Use device */ 519 inet->inet_saddr = 0; /* Use device */
505 520
506 /* Make sure we are allowed to bind here. */ 521 /* Make sure we are allowed to bind here. */
507 if ((snum || !inet->bind_address_no_port) && 522 if (snum || !(inet->bind_address_no_port ||
508 sk->sk_prot->get_port(sk, snum)) { 523 force_bind_address_no_port)) {
509 inet->inet_saddr = inet->inet_rcv_saddr = 0; 524 if (sk->sk_prot->get_port(sk, snum)) {
510 err = -EADDRINUSE; 525 inet->inet_saddr = inet->inet_rcv_saddr = 0;
511 goto out_release_sock; 526 err = -EADDRINUSE;
527 goto out_release_sock;
528 }
529 err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
530 if (err) {
531 inet->inet_saddr = inet->inet_rcv_saddr = 0;
532 goto out_release_sock;
533 }
512 } 534 }
513 535
514 if (inet->inet_rcv_saddr) 536 if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
521 sk_dst_reset(sk); 543 sk_dst_reset(sk);
522 err = 0; 544 err = 0;
523out_release_sock: 545out_release_sock:
524 release_sock(sk); 546 if (with_lock)
547 release_sock(sk);
525out: 548out:
526 return err; 549 return err;
527} 550}
528EXPORT_SYMBOL(inet_bind);
529 551
530int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, 552int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
531 int addr_len, int flags) 553 int addr_len, int flags)
532{ 554{
533 struct sock *sk = sock->sk; 555 struct sock *sk = sock->sk;
556 int err;
534 557
535 if (addr_len < sizeof(uaddr->sa_family)) 558 if (addr_len < sizeof(uaddr->sa_family))
536 return -EINVAL; 559 return -EINVAL;
537 if (uaddr->sa_family == AF_UNSPEC) 560 if (uaddr->sa_family == AF_UNSPEC)
538 return sk->sk_prot->disconnect(sk, flags); 561 return sk->sk_prot->disconnect(sk, flags);
539 562
563 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
564 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
565 if (err)
566 return err;
567 }
568
540 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 569 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
541 return -EAGAIN; 570 return -EAGAIN;
542 return sk->sk_prot->connect(sk, uaddr, addr_len); 571 return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
617 if (sk->sk_state != TCP_CLOSE) 646 if (sk->sk_state != TCP_CLOSE)
618 goto out; 647 goto out;
619 648
649 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
650 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
651 if (err)
652 goto out;
653 }
654
620 err = sk->sk_prot->connect(sk, uaddr, addr_len); 655 err = sk->sk_prot->connect(sk, uaddr, addr_len);
621 if (err < 0) 656 if (err < 0)
622 goto out; 657 goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c31be306572..bccc4c270087 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
485 } 485 }
486} 486}
487 487
488static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
489 int target, struct sock *sk)
490{
491 return (tp->rcv_nxt - tp->copied_seq >= target) ||
492 (sk->sk_prot->stream_memory_read ?
493 sk->sk_prot->stream_memory_read(sk) : false);
494}
495
488/* 496/*
489 * Wait for a TCP event. 497 * Wait for a TCP event.
490 * 498 *
@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
554 tp->urg_data) 562 tp->urg_data)
555 target++; 563 target++;
556 564
557 if (tp->rcv_nxt - tp->copied_seq >= target) 565 if (tcp_stream_is_readable(tp, target, sk))
558 mask |= EPOLLIN | EPOLLRDNORM; 566 mask |= EPOLLIN | EPOLLRDNORM;
559 567
560 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 568 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9639334ebb7c..f70586b50838 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
140} 140}
141EXPORT_SYMBOL_GPL(tcp_twsk_unique); 141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142 142
143static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
144 int addr_len)
145{
146 /* This check is replicated from tcp_v4_connect() and intended to
147 * prevent BPF program called below from accessing bytes that are out
148 * of the bound specified by user in addr_len.
149 */
150 if (addr_len < sizeof(struct sockaddr_in))
151 return -EINVAL;
152
153 sock_owned_by_me(sk);
154
155 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
156}
157
143/* This will initiate an outgoing connection. */ 158/* This will initiate an outgoing connection. */
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 159int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{ 160{
@@ -2408,6 +2423,7 @@ struct proto tcp_prot = {
2408 .name = "TCP", 2423 .name = "TCP",
2409 .owner = THIS_MODULE, 2424 .owner = THIS_MODULE,
2410 .close = tcp_close, 2425 .close = tcp_close,
2426 .pre_connect = tcp_v4_pre_connect,
2411 .connect = tcp_v4_connect, 2427 .connect = tcp_v4_connect,
2412 .disconnect = tcp_disconnect, 2428 .disconnect = tcp_disconnect,
2413 .accept = inet_csk_accept, 2429 .accept = inet_csk_accept,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f49e14cd3891..24b5c59b1c53 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1658,6 +1658,19 @@ csum_copy_err:
1658 goto try_again; 1658 goto try_again;
1659} 1659}
1660 1660
1661int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1662{
1663 /* This check is replicated from __ip4_datagram_connect() and
1664 * intended to prevent BPF program called below from accessing bytes
1665 * that are out of the bound specified by user in addr_len.
1666 */
1667 if (addr_len < sizeof(struct sockaddr_in))
1668 return -EINVAL;
1669
1670 return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
1671}
1672EXPORT_SYMBOL(udp_pre_connect);
1673
1661int __udp_disconnect(struct sock *sk, int flags) 1674int __udp_disconnect(struct sock *sk, int flags)
1662{ 1675{
1663 struct inet_sock *inet = inet_sk(sk); 1676 struct inet_sock *inet = inet_sk(sk);
@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
2530 .name = "UDP", 2543 .name = "UDP",
2531 .owner = THIS_MODULE, 2544 .owner = THIS_MODULE,
2532 .close = udp_lib_close, 2545 .close = udp_lib_close,
2546 .pre_connect = udp_pre_connect,
2533 .connect = ip4_datagram_connect, 2547 .connect = ip4_datagram_connect,
2534 .disconnect = udp_disconnect, 2548 .disconnect = udp_disconnect,
2535 .ioctl = udp_ioctl, 2549 .ioctl = udp_ioctl,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c1e292db04db..8da0b513f188 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
277/* bind for INET6 API */ 277/* bind for INET6 API */
278int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 278int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
279{ 279{
280 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
281 struct sock *sk = sock->sk; 280 struct sock *sk = sock->sk;
282 struct inet_sock *inet = inet_sk(sk);
283 struct ipv6_pinfo *np = inet6_sk(sk);
284 struct net *net = sock_net(sk);
285 __be32 v4addr = 0;
286 unsigned short snum;
287 bool saved_ipv6only;
288 int addr_type = 0;
289 int err = 0; 281 int err = 0;
290 282
291 /* If the socket has its own bind function then use it. */ 283 /* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
295 if (addr_len < SIN6_LEN_RFC2133) 287 if (addr_len < SIN6_LEN_RFC2133)
296 return -EINVAL; 288 return -EINVAL;
297 289
290 /* BPF prog is run before any checks are done so that if the prog
291 * changes context in a wrong way it will be caught.
292 */
293 err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
294 if (err)
295 return err;
296
297 return __inet6_bind(sk, uaddr, addr_len, false, true);
298}
299EXPORT_SYMBOL(inet6_bind);
300
301int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
302 bool force_bind_address_no_port, bool with_lock)
303{
304 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
305 struct inet_sock *inet = inet_sk(sk);
306 struct ipv6_pinfo *np = inet6_sk(sk);
307 struct net *net = sock_net(sk);
308 __be32 v4addr = 0;
309 unsigned short snum;
310 bool saved_ipv6only;
311 int addr_type = 0;
312 int err = 0;
313
298 if (addr->sin6_family != AF_INET6) 314 if (addr->sin6_family != AF_INET6)
299 return -EAFNOSUPPORT; 315 return -EAFNOSUPPORT;
300 316
301 addr_type = ipv6_addr_type(&addr->sin6_addr); 317 addr_type = ipv6_addr_type(&addr->sin6_addr);
302 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) 318 if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
303 return -EINVAL; 319 return -EINVAL;
304 320
305 snum = ntohs(addr->sin6_port); 321 snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
307 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) 323 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
308 return -EACCES; 324 return -EACCES;
309 325
310 lock_sock(sk); 326 if (with_lock)
327 lock_sock(sk);
311 328
312 /* Check these errors (active socket, double bind). */ 329 /* Check these errors (active socket, double bind). */
313 if (sk->sk_state != TCP_CLOSE || inet->inet_num) { 330 if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
395 sk->sk_ipv6only = 1; 412 sk->sk_ipv6only = 1;
396 413
397 /* Make sure we are allowed to bind here. */ 414 /* Make sure we are allowed to bind here. */
398 if ((snum || !inet->bind_address_no_port) && 415 if (snum || !(inet->bind_address_no_port ||
399 sk->sk_prot->get_port(sk, snum)) { 416 force_bind_address_no_port)) {
400 sk->sk_ipv6only = saved_ipv6only; 417 if (sk->sk_prot->get_port(sk, snum)) {
401 inet_reset_saddr(sk); 418 sk->sk_ipv6only = saved_ipv6only;
402 err = -EADDRINUSE; 419 inet_reset_saddr(sk);
403 goto out; 420 err = -EADDRINUSE;
421 goto out;
422 }
423 err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
424 if (err) {
425 sk->sk_ipv6only = saved_ipv6only;
426 inet_reset_saddr(sk);
427 goto out;
428 }
404 } 429 }
405 430
406 if (addr_type != IPV6_ADDR_ANY) 431 if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
411 inet->inet_dport = 0; 436 inet->inet_dport = 0;
412 inet->inet_daddr = 0; 437 inet->inet_daddr = 0;
413out: 438out:
414 release_sock(sk); 439 if (with_lock)
440 release_sock(sk);
415 return err; 441 return err;
416out_unlock: 442out_unlock:
417 rcu_read_unlock(); 443 rcu_read_unlock();
418 goto out; 444 goto out;
419} 445}
420EXPORT_SYMBOL(inet6_bind);
421 446
422int inet6_release(struct socket *sock) 447int inet6_release(struct socket *sock)
423{ 448{
@@ -868,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
868 .nd_tbl = &nd_tbl, 893 .nd_tbl = &nd_tbl,
869}; 894};
870 895
896static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
897 .inet6_bind = __inet6_bind,
898};
899
871static int __init inet6_init(void) 900static int __init inet6_init(void)
872{ 901{
873 struct list_head *r; 902 struct list_head *r;
@@ -1024,6 +1053,7 @@ static int __init inet6_init(void)
1024 /* ensure that ipv6 stubs are visible only after ipv6 is ready */ 1053 /* ensure that ipv6 stubs are visible only after ipv6 is ready */
1025 wmb(); 1054 wmb();
1026 ipv6_stub = &ipv6_stub_impl; 1055 ipv6_stub = &ipv6_stub_impl;
1056 ipv6_bpf_stub = &ipv6_bpf_stub_impl;
1027out: 1057out:
1028 return err; 1058 return err;
1029 1059
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 883df0ad5bfe..6d664d83cd16 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
117 ipv6_hdr(skb)->saddr.s6_addr32); 117 ipv6_hdr(skb)->saddr.s6_addr32);
118} 118}
119 119
120static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
121 int addr_len)
122{
123 /* This check is replicated from tcp_v6_connect() and intended to
124 * prevent BPF program called below from accessing bytes that are out
125 * of the bound specified by user in addr_len.
126 */
127 if (addr_len < SIN6_LEN_RFC2133)
128 return -EINVAL;
129
130 sock_owned_by_me(sk);
131
132 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
133}
134
120static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 135static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 int addr_len) 136 int addr_len)
122{ 137{
@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
1925 .name = "TCPv6", 1940 .name = "TCPv6",
1926 .owner = THIS_MODULE, 1941 .owner = THIS_MODULE,
1927 .close = tcp_close, 1942 .close = tcp_close,
1943 .pre_connect = tcp_v6_pre_connect,
1928 .connect = tcp_v6_connect, 1944 .connect = tcp_v6_connect,
1929 .disconnect = tcp_disconnect, 1945 .disconnect = tcp_disconnect,
1930 .accept = inet_csk_accept, 1946 .accept = inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ad30f5e31969..6861ed479469 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
957 } 957 }
958} 958}
959 959
960static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
961 int addr_len)
962{
963 /* The following checks are replicated from __ip6_datagram_connect()
964 * and intended to prevent BPF program called below from accessing
965 * bytes that are out of the bound specified by user in addr_len.
966 */
967 if (uaddr->sa_family == AF_INET) {
968 if (__ipv6_only_sock(sk))
969 return -EAFNOSUPPORT;
970 return udp_pre_connect(sk, uaddr, addr_len);
971 }
972
973 if (addr_len < SIN6_LEN_RFC2133)
974 return -EINVAL;
975
976 return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
977}
978
960/** 979/**
961 * udp6_hwcsum_outgoing - handle outgoing HW checksumming 980 * udp6_hwcsum_outgoing - handle outgoing HW checksumming
962 * @sk: socket we are sending on 981 * @sk: socket we are sending on
@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
1512 .name = "UDPv6", 1531 .name = "UDPv6",
1513 .owner = THIS_MODULE, 1532 .owner = THIS_MODULE,
1514 .close = udp_lib_close, 1533 .close = udp_lib_close,
1534 .pre_connect = udpv6_pre_connect,
1515 .connect = ip6_datagram_connect, 1535 .connect = ip6_datagram_connect,
1516 .disconnect = udp_disconnect, 1536 .disconnect = udp_disconnect,
1517 .ioctl = udp_ioctl, 1537 .ioctl = udp_ioctl,
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 2c8a43d3607f..df855c33daf2 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -33,7 +33,7 @@
33 33
34/* Tracing for driver callbacks */ 34/* Tracing for driver callbacks */
35 35
36DECLARE_EVENT_CLASS(local_only_evt, 36DECLARE_EVENT_CLASS(local_only_evt4,
37 TP_PROTO(struct ieee802154_local *local), 37 TP_PROTO(struct ieee802154_local *local),
38 TP_ARGS(local), 38 TP_ARGS(local),
39 TP_STRUCT__entry( 39 TP_STRUCT__entry(
@@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt,
45 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) 45 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
46); 46);
47 47
48DEFINE_EVENT(local_only_evt, 802154_drv_return_void, 48DEFINE_EVENT(local_only_evt4, 802154_drv_return_void,
49 TP_PROTO(struct ieee802154_local *local), 49 TP_PROTO(struct ieee802154_local *local),
50 TP_ARGS(local) 50 TP_ARGS(local)
51); 51);
@@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int,
65 __entry->ret) 65 __entry->ret)
66); 66);
67 67
68DEFINE_EVENT(local_only_evt, 802154_drv_start, 68DEFINE_EVENT(local_only_evt4, 802154_drv_start,
69 TP_PROTO(struct ieee802154_local *local), 69 TP_PROTO(struct ieee802154_local *local),
70 TP_ARGS(local) 70 TP_ARGS(local)
71); 71);
72 72
73DEFINE_EVENT(local_only_evt, 802154_drv_stop, 73DEFINE_EVENT(local_only_evt4, 802154_drv_stop,
74 TP_PROTO(struct ieee802154_local *local), 74 TP_PROTO(struct ieee802154_local *local),
75 TP_ARGS(local) 75 TP_ARGS(local)
76); 76);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a64291ae52a6..55fb279a5196 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3184,7 +3184,7 @@ TRACE_EVENT(rdev_start_radar_detection,
3184 3184
3185TRACE_EVENT(rdev_set_mcast_rate, 3185TRACE_EVENT(rdev_set_mcast_rate,
3186 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, 3186 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
3187 int mcast_rate[NUM_NL80211_BANDS]), 3187 int *mcast_rate),
3188 TP_ARGS(wiphy, netdev, mcast_rate), 3188 TP_ARGS(wiphy, netdev, mcast_rate),
3189 TP_STRUCT__entry( 3189 TP_STRUCT__entry(
3190 WIPHY_ENTRY 3190 WIPHY_ENTRY
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 2c2a587e0942..4d6a6edd4bf6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -119,6 +119,7 @@ always += offwaketime_kern.o
119always += spintest_kern.o 119always += spintest_kern.o
120always += map_perf_test_kern.o 120always += map_perf_test_kern.o
121always += test_overhead_tp_kern.o 121always += test_overhead_tp_kern.o
122always += test_overhead_raw_tp_kern.o
122always += test_overhead_kprobe_kern.o 123always += test_overhead_kprobe_kern.o
123always += parse_varlen.o parse_simple.o parse_ldabs.o 124always += parse_varlen.o parse_simple.o parse_ldabs.o
124always += test_cgrp2_tc_kern.o 125always += test_cgrp2_tc_kern.o
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index b1a310c3ae89..bebe4188b4b3 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -61,6 +61,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
61 bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; 61 bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
62 bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; 62 bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
63 bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; 63 bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
64 bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
64 bool is_xdp = strncmp(event, "xdp", 3) == 0; 65 bool is_xdp = strncmp(event, "xdp", 3) == 0;
65 bool is_perf_event = strncmp(event, "perf_event", 10) == 0; 66 bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
66 bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; 67 bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
@@ -85,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
85 prog_type = BPF_PROG_TYPE_KPROBE; 86 prog_type = BPF_PROG_TYPE_KPROBE;
86 } else if (is_tracepoint) { 87 } else if (is_tracepoint) {
87 prog_type = BPF_PROG_TYPE_TRACEPOINT; 88 prog_type = BPF_PROG_TYPE_TRACEPOINT;
89 } else if (is_raw_tracepoint) {
90 prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
88 } else if (is_xdp) { 91 } else if (is_xdp) {
89 prog_type = BPF_PROG_TYPE_XDP; 92 prog_type = BPF_PROG_TYPE_XDP;
90 } else if (is_perf_event) { 93 } else if (is_perf_event) {
@@ -131,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
131 return populate_prog_array(event, fd); 134 return populate_prog_array(event, fd);
132 } 135 }
133 136
137 if (is_raw_tracepoint) {
138 efd = bpf_raw_tracepoint_open(event + 15, fd);
139 if (efd < 0) {
140 printf("tracepoint %s %s\n", event + 15, strerror(errno));
141 return -1;
142 }
143 event_fd[prog_cnt - 1] = efd;
144 return 0;
145 }
146
134 if (is_kprobe || is_kretprobe) { 147 if (is_kprobe || is_kretprobe) {
135 if (is_kprobe) 148 if (is_kprobe)
136 event += 7; 149 event += 7;
@@ -587,6 +600,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
587 if (memcmp(shname, "kprobe/", 7) == 0 || 600 if (memcmp(shname, "kprobe/", 7) == 0 ||
588 memcmp(shname, "kretprobe/", 10) == 0 || 601 memcmp(shname, "kretprobe/", 10) == 0 ||
589 memcmp(shname, "tracepoint/", 11) == 0 || 602 memcmp(shname, "tracepoint/", 11) == 0 ||
603 memcmp(shname, "raw_tracepoint/", 15) == 0 ||
590 memcmp(shname, "xdp", 3) == 0 || 604 memcmp(shname, "xdp", 3) == 0 ||
591 memcmp(shname, "perf_event", 10) == 0 || 605 memcmp(shname, "perf_event", 10) == 0 ||
592 memcmp(shname, "socket", 6) == 0 || 606 memcmp(shname, "socket", 6) == 0 ||
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index 9d751e209f31..8eca27e595ae 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -246,7 +246,7 @@ static void udp_client(void)
246 recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0, 246 recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
247 (struct sockaddr *)&si_me, &slen); 247 (struct sockaddr *)&si_me, &slen);
248 if (recv_len < 0) 248 if (recv_len < 0)
249 error(1, errno, "revieve\n"); 249 error(1, errno, "receive\n");
250 res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr), 250 res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
251 sizeof(si_me.sin_addr)); 251 sizeof(si_me.sin_addr));
252 if (res != 0) 252 if (res != 0)
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
new file mode 100644
index 000000000000..d2af8bc1c805
--- /dev/null
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -0,0 +1,17 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018 Facebook */
3#include <uapi/linux/bpf.h>
4#include "bpf_helpers.h"
5
6SEC("raw_tracepoint/task_rename")
7int prog(struct bpf_raw_tracepoint_args *ctx)
8{
9 return 0;
10}
11
12SEC("raw_tracepoint/urandom_read")
13int prog2(struct bpf_raw_tracepoint_args *ctx)
14{
15 return 0;
16}
17char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index d291167fd3c7..e1d35e07a10e 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -158,5 +158,17 @@ int main(int argc, char **argv)
158 unload_progs(); 158 unload_progs();
159 } 159 }
160 160
161 if (test_flags & 0xC0) {
162 snprintf(filename, sizeof(filename),
163 "%s_raw_tp_kern.o", argv[0]);
164 if (load_bpf_file(filename)) {
165 printf("%s", bpf_log_buf);
166 return 1;
167 }
168 printf("w/RAW_TRACEPOINT\n");
169 run_perf_test(num_cpu, test_flags >> 6);
170 unload_progs();
171 }
172
161 return 0; 173 return 0;
162} 174}
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 9ad5ba79c85a..9ff8bc5dc206 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
54 .type = BPF_MAP_TYPE_SOCKMAP, 54 .type = BPF_MAP_TYPE_SOCKMAP,
55 .key_size = sizeof(int), 55 .key_size = sizeof(int),
56 .value_size = sizeof(int), 56 .value_size = sizeof(int),
57 .max_entries = 1, 57 .max_entries = 20,
58}; 58};
59 59
60struct bpf_map_def SEC("maps") sock_apply_bytes = { 60struct bpf_map_def SEC("maps") sock_apply_bytes = {
@@ -78,6 +78,19 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
78 .max_entries = 2 78 .max_entries = 2
79}; 79};
80 80
81struct bpf_map_def SEC("maps") sock_redir_flags = {
82 .type = BPF_MAP_TYPE_ARRAY,
83 .key_size = sizeof(int),
84 .value_size = sizeof(int),
85 .max_entries = 1
86};
87
88struct bpf_map_def SEC("maps") sock_skb_opts = {
89 .type = BPF_MAP_TYPE_ARRAY,
90 .key_size = sizeof(int),
91 .value_size = sizeof(int),
92 .max_entries = 1
93};
81 94
82SEC("sk_skb1") 95SEC("sk_skb1")
83int bpf_prog1(struct __sk_buff *skb) 96int bpf_prog1(struct __sk_buff *skb)
@@ -90,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
90{ 103{
91 __u32 lport = skb->local_port; 104 __u32 lport = skb->local_port;
92 __u32 rport = skb->remote_port; 105 __u32 rport = skb->remote_port;
93 int ret = 0; 106 int len, *f, ret, zero = 0;
107 __u64 flags = 0;
94 108
95 if (lport == 10000) 109 if (lport == 10000)
96 ret = 10; 110 ret = 10;
97 else 111 else
98 ret = 1; 112 ret = 1;
99 113
100 bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); 114 len = (__u32)skb->data_end - (__u32)skb->data;
101 return bpf_sk_redirect_map(skb, &sock_map, ret, 0); 115 f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
116 if (f && *f) {
117 ret = 3;
118 flags = *f;
119 }
120
121 bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
122 len, flags);
123 return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
102} 124}
103 125
104SEC("sockops") 126SEC("sockops")
@@ -197,8 +219,9 @@ int bpf_prog5(struct sk_msg_md *msg)
197SEC("sk_msg3") 219SEC("sk_msg3")
198int bpf_prog6(struct sk_msg_md *msg) 220int bpf_prog6(struct sk_msg_md *msg)
199{ 221{
200 int *bytes, zero = 0, one = 1; 222 int *bytes, zero = 0, one = 1, key = 0;
201 int *start, *end; 223 int *start, *end, *f;
224 __u64 flags = 0;
202 225
203 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 226 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
204 if (bytes) 227 if (bytes)
@@ -210,15 +233,22 @@ int bpf_prog6(struct sk_msg_md *msg)
210 end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 233 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
211 if (start && end) 234 if (start && end)
212 bpf_msg_pull_data(msg, *start, *end, 0); 235 bpf_msg_pull_data(msg, *start, *end, 0);
213 return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); 236 f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
237 if (f && *f) {
238 key = 2;
239 flags = *f;
240 }
241 return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
214} 242}
215 243
216SEC("sk_msg4") 244SEC("sk_msg4")
217int bpf_prog7(struct sk_msg_md *msg) 245int bpf_prog7(struct sk_msg_md *msg)
218{ 246{
219 int err1 = 0, err2 = 0, zero = 0, one = 1; 247 int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
220 int *bytes, *start, *end, len1, len2; 248 int *f, *bytes, *start, *end, len1, len2;
249 __u64 flags = 0;
221 250
251 int err;
222 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 252 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
223 if (bytes) 253 if (bytes)
224 err1 = bpf_msg_apply_bytes(msg, *bytes); 254 err1 = bpf_msg_apply_bytes(msg, *bytes);
@@ -229,7 +259,6 @@ int bpf_prog7(struct sk_msg_md *msg)
229 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 259 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
230 end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 260 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
231 if (start && end) { 261 if (start && end) {
232 int err;
233 262
234 bpf_printk("sk_msg2: pull(%i:%i)\n", 263 bpf_printk("sk_msg2: pull(%i:%i)\n",
235 start ? *start : 0, end ? *end : 0); 264 start ? *start : 0, end ? *end : 0);
@@ -241,9 +270,16 @@ int bpf_prog7(struct sk_msg_md *msg)
241 bpf_printk("sk_msg2: length update %i->%i\n", 270 bpf_printk("sk_msg2: length update %i->%i\n",
242 len1, len2); 271 len1, len2);
243 } 272 }
244 bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n", 273 f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
245 len1, err1, err2); 274 if (f && *f) {
246 return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); 275 key = 2;
276 flags = *f;
277 }
278 bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
279 len1, flags, err1 ? err1 : err2);
280 err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
281 bpf_printk("sk_msg3: err %i\n", err);
282 return err;
247} 283}
248 284
249SEC("sk_msg5") 285SEC("sk_msg5")
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
index 6d8cc40cca22..ace75f070eb8 100755
--- a/samples/sockmap/sockmap_test.sh
+++ b/samples/sockmap/sockmap_test.sh
@@ -1,5 +1,5 @@
1#Test a bunch of positive cases to verify basic functionality 1#Test a bunch of positive cases to verify basic functionality
2for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do 2for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
3for t in "sendmsg" "sendpage"; do 3for t in "sendmsg" "sendpage"; do
4for r in 1 10 100; do 4for r in 1 10 100; do
5 for i in 1 10 100; do 5 for i in 1 10 100; do
@@ -100,6 +100,25 @@ for t in "sendmsg" "sendpage"; do
100 sleep 2 100 sleep 2
101done 101done
102 102
103prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
104
105for t in "sendmsg" "sendpage"; do
106 TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
107 echo $TEST
108 $TEST
109 sleep 2
110done
111
112prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
113
114for t in "sendmsg" "sendpage"; do
115 TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
116 echo $TEST
117 $TEST
118 sleep 2
119done
120
121
103# Test apply and redirect with larger value than send 122# Test apply and redirect with larger value than send
104r=1 123r=1
105i=8 124i=8
@@ -113,6 +132,25 @@ for t in "sendmsg" "sendpage"; do
113 sleep 2 132 sleep 2
114done 133done
115 134
135prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
136
137for t in "sendmsg" "sendpage"; do
138 TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
139 echo $TEST
140 $TEST
141 sleep 2
142done
143
144prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
145
146for t in "sendmsg" "sendpage"; do
147 TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
148 echo $TEST
149 $TEST
150 sleep 2
151done
152
153
116# Test apply and redirect with apply that never reaches limit 154# Test apply and redirect with apply that never reaches limit
117r=1024 155r=1024
118i=1 156i=1
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 07aa237221d1..6f2334912283 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -64,6 +64,8 @@ int txmsg_apply;
64int txmsg_cork; 64int txmsg_cork;
65int txmsg_start; 65int txmsg_start;
66int txmsg_end; 66int txmsg_end;
67int txmsg_ingress;
68int txmsg_skb;
67 69
68static const struct option long_options[] = { 70static const struct option long_options[] = {
69 {"help", no_argument, NULL, 'h' }, 71 {"help", no_argument, NULL, 'h' },
@@ -83,6 +85,8 @@ static const struct option long_options[] = {
83 {"txmsg_cork", required_argument, NULL, 'k'}, 85 {"txmsg_cork", required_argument, NULL, 'k'},
84 {"txmsg_start", required_argument, NULL, 's'}, 86 {"txmsg_start", required_argument, NULL, 's'},
85 {"txmsg_end", required_argument, NULL, 'e'}, 87 {"txmsg_end", required_argument, NULL, 'e'},
88 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
89 {"txmsg_skb", no_argument, &txmsg_skb, 1 },
86 {0, 0, NULL, 0 } 90 {0, 0, NULL, 0 }
87}; 91};
88 92
@@ -793,6 +797,60 @@ run:
793 return err; 797 return err;
794 } 798 }
795 } 799 }
800
801 if (txmsg_ingress) {
802 int in = BPF_F_INGRESS;
803
804 i = 0;
805 err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
806 if (err) {
807 fprintf(stderr,
808 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
809 err, strerror(errno));
810 }
811 i = 1;
812 err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
813 if (err) {
814 fprintf(stderr,
815 "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
816 err, strerror(errno));
817 }
818 err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
819 if (err) {
820 fprintf(stderr,
821 "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
822 err, strerror(errno));
823 }
824
825 i = 2;
826 err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
827 if (err) {
828 fprintf(stderr,
829 "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
830 err, strerror(errno));
831 }
832 }
833
834 if (txmsg_skb) {
835 int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
836 int ingress = BPF_F_INGRESS;
837
838 i = 0;
839 err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
840 if (err) {
841 fprintf(stderr,
842 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
843 err, strerror(errno));
844 }
845
846 i = 3;
847 err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
848 if (err) {
849 fprintf(stderr,
850 "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
851 err, strerror(errno));
852 }
853 }
796 } 854 }
797 855
798 if (txmsg_drop) 856 if (txmsg_drop)
diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h
index 05fb3305671e..e042b994f2b8 100644
--- a/security/apparmor/include/path.h
+++ b/security/apparmor/include/path.h
@@ -43,15 +43,10 @@ struct aa_buffers {
43 43
44DECLARE_PER_CPU(struct aa_buffers, aa_buffers); 44DECLARE_PER_CPU(struct aa_buffers, aa_buffers);
45 45
46#define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
47#define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n
48#define CONCAT(X, Y) X ## Y
49#define CONCAT_AFTER(X, Y) CONCAT(X, Y)
50
51#define ASSIGN(FN, X, N) ((X) = FN(N)) 46#define ASSIGN(FN, X, N) ((X) = FN(N))
52#define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/ 47#define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/
53#define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1); EVAL1(FN, Y); } while (0) 48#define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1); EVAL1(FN, Y); } while (0)
54#define EVAL(FN, X...) CONCAT_AFTER(EVAL, COUNT_ARGS(X))(FN, X) 49#define EVAL(FN, X...) CONCATENATE(EVAL, COUNT_ARGS(X))(FN, X)
55 50
56#define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++) 51#define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++)
57 52
diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h
index ea0d486652c8..54cdd4ffa9ce 100644
--- a/sound/firewire/amdtp-stream-trace.h
+++ b/sound/firewire/amdtp-stream-trace.h
@@ -14,7 +14,7 @@
14#include <linux/tracepoint.h> 14#include <linux/tracepoint.h>
15 15
16TRACE_EVENT(in_packet, 16TRACE_EVENT(in_packet,
17 TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index), 17 TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 *cip_header, unsigned int payload_length, unsigned int index),
18 TP_ARGS(s, cycles, cip_header, payload_length, index), 18 TP_ARGS(s, cycles, cip_header, payload_length, index),
19 TP_STRUCT__entry( 19 TP_STRUCT__entry(
20 __field(unsigned int, second) 20 __field(unsigned int, second)
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 20da835e9e38..7a3173b76c16 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -114,7 +114,7 @@ static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
114 sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; 114 sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
115} 115}
116 116
117static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) 117static void print_insn(void *private_data, const char *fmt, ...)
118{ 118{
119 va_list args; 119 va_list args;
120 120
@@ -124,7 +124,7 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
124} 124}
125 125
126static void 126static void
127print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...) 127print_insn_for_graph(void *private_data, const char *fmt, ...)
128{ 128{
129 char buf[64], *p; 129 char buf[64], *p;
130 va_list args; 130 va_list args;
@@ -154,7 +154,7 @@ print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
154 printf("%s", buf); 154 printf("%s", buf);
155} 155}
156 156
157static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) 157static void print_insn_json(void *private_data, const char *fmt, ...)
158{ 158{
159 unsigned int l = strlen(fmt); 159 unsigned int l = strlen(fmt);
160 char chomped_fmt[l]; 160 char chomped_fmt[l];
@@ -248,7 +248,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
248 248
249 jsonw_start_object(json_wtr); 249 jsonw_start_object(json_wtr);
250 jsonw_name(json_wtr, "disasm"); 250 jsonw_name(json_wtr, "disasm");
251 print_bpf_insn(&cbs, NULL, insn + i, true); 251 print_bpf_insn(&cbs, insn + i, true);
252 252
253 if (opcodes) { 253 if (opcodes) {
254 jsonw_name(json_wtr, "opcodes"); 254 jsonw_name(json_wtr, "opcodes");
@@ -302,7 +302,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
302 double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); 302 double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
303 303
304 printf("% 4d: ", i); 304 printf("% 4d: ", i);
305 print_bpf_insn(&cbs, NULL, insn + i, true); 305 print_bpf_insn(&cbs, insn + i, true);
306 306
307 if (opcodes) { 307 if (opcodes) {
308 printf(" "); 308 printf(" ");
@@ -331,7 +331,7 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
331 331
332 for (; cur <= insn_end; cur++) { 332 for (; cur <= insn_end; cur++) {
333 printf("% 4d: ", (int)(cur - insn_start + start_idx)); 333 printf("% 4d: ", (int)(cur - insn_start + start_idx));
334 print_bpf_insn(&cbs, NULL, cur, true); 334 print_bpf_insn(&cbs, cur, true);
335 if (cur != insn_end) 335 if (cur != insn_end)
336 printf(" | "); 336 printf(" | ");
337 } 337 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d245c41213ac..9d07465023a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
94 BPF_MAP_GET_FD_BY_ID, 94 BPF_MAP_GET_FD_BY_ID,
95 BPF_OBJ_GET_INFO_BY_FD, 95 BPF_OBJ_GET_INFO_BY_FD,
96 BPF_PROG_QUERY, 96 BPF_PROG_QUERY,
97 BPF_RAW_TRACEPOINT_OPEN,
97}; 98};
98 99
99enum bpf_map_type { 100enum bpf_map_type {
@@ -134,6 +135,8 @@ enum bpf_prog_type {
134 BPF_PROG_TYPE_SK_SKB, 135 BPF_PROG_TYPE_SK_SKB,
135 BPF_PROG_TYPE_CGROUP_DEVICE, 136 BPF_PROG_TYPE_CGROUP_DEVICE,
136 BPF_PROG_TYPE_SK_MSG, 137 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
137}; 140};
138 141
139enum bpf_attach_type { 142enum bpf_attach_type {
@@ -145,6 +148,12 @@ enum bpf_attach_type {
145 BPF_SK_SKB_STREAM_VERDICT, 148 BPF_SK_SKB_STREAM_VERDICT,
146 BPF_CGROUP_DEVICE, 149 BPF_CGROUP_DEVICE,
147 BPF_SK_MSG_VERDICT, 150 BPF_SK_MSG_VERDICT,
151 BPF_CGROUP_INET4_BIND,
152 BPF_CGROUP_INET6_BIND,
153 BPF_CGROUP_INET4_CONNECT,
154 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND,
148 __MAX_BPF_ATTACH_TYPE 157 __MAX_BPF_ATTACH_TYPE
149}; 158};
150 159
@@ -294,6 +303,11 @@ union bpf_attr {
294 __u32 prog_flags; 303 __u32 prog_flags;
295 char prog_name[BPF_OBJ_NAME_LEN]; 304 char prog_name[BPF_OBJ_NAME_LEN];
296 __u32 prog_ifindex; /* ifindex of netdev to prep for */ 305 __u32 prog_ifindex; /* ifindex of netdev to prep for */
306 /* For some prog types expected attach type must be known at
307 * load time to verify attach type specific parts of prog
308 * (context accesses, allowed helpers, etc).
309 */
310 __u32 expected_attach_type;
297 }; 311 };
298 312
299 struct { /* anonymous struct used by BPF_OBJ_* commands */ 313 struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -344,6 +358,11 @@ union bpf_attr {
344 __aligned_u64 prog_ids; 358 __aligned_u64 prog_ids;
345 __u32 prog_cnt; 359 __u32 prog_cnt;
346 } query; 360 } query;
361
362 struct {
363 __u64 name;
364 __u32 prog_fd;
365 } raw_tracepoint;
347} __attribute__((aligned(8))); 366} __attribute__((aligned(8)));
348 367
349/* BPF helper function descriptions: 368/* BPF helper function descriptions:
@@ -729,6 +748,13 @@ union bpf_attr {
729 * @flags: reserved for future use 748 * @flags: reserved for future use
730 * Return: SK_PASS 749 * Return: SK_PASS
731 * 750 *
751 * int bpf_bind(ctx, addr, addr_len)
752 * Bind socket to address. Only binding to IP is supported, no port can be
753 * set in addr.
754 * @ctx: pointer to context of type bpf_sock_addr
755 * @addr: pointer to struct sockaddr to bind socket to
756 * @addr_len: length of sockaddr structure
757 * Return: 0 on success or negative error code
732 */ 758 */
733#define __BPF_FUNC_MAPPER(FN) \ 759#define __BPF_FUNC_MAPPER(FN) \
734 FN(unspec), \ 760 FN(unspec), \
@@ -794,7 +820,8 @@ union bpf_attr {
794 FN(msg_redirect_map), \ 820 FN(msg_redirect_map), \
795 FN(msg_apply_bytes), \ 821 FN(msg_apply_bytes), \
796 FN(msg_cork_bytes), \ 822 FN(msg_cork_bytes), \
797 FN(msg_pull_data), 823 FN(msg_pull_data), \
824 FN(bind),
798 825
799/* integer value in 'imm' field of BPF_CALL instruction selects which helper 826/* integer value in 'imm' field of BPF_CALL instruction selects which helper
800 * function eBPF program intends to call 827 * function eBPF program intends to call
@@ -922,6 +949,15 @@ struct bpf_sock {
922 __u32 protocol; 949 __u32 protocol;
923 __u32 mark; 950 __u32 mark;
924 __u32 priority; 951 __u32 priority;
952 __u32 src_ip4; /* Allows 1,2,4-byte read.
953 * Stored in network byte order.
954 */
955 __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
956 * Stored in network byte order.
957 */
958 __u32 src_port; /* Allows 4-byte read.
959 * Stored in host byte order
960 */
925}; 961};
926 962
927#define XDP_PACKET_HEADROOM 256 963#define XDP_PACKET_HEADROOM 256
@@ -997,6 +1033,26 @@ struct bpf_map_info {
997 __u64 netns_ino; 1033 __u64 netns_ino;
998} __attribute__((aligned(8))); 1034} __attribute__((aligned(8)));
999 1035
1036/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
1037 * by user and intended to be used by socket (e.g. to bind to, depends on
1038 * attach attach type).
1039 */
1040struct bpf_sock_addr {
1041 __u32 user_family; /* Allows 4-byte read, but no write. */
1042 __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
1043 * Stored in network byte order.
1044 */
1045 __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
1046 * Stored in network byte order.
1047 */
1048 __u32 user_port; /* Allows 4-byte read and write.
1049 * Stored in network byte order
1050 */
1051 __u32 family; /* Allows 4-byte read, but no write */
1052 __u32 type; /* Allows 4-byte read, but no write */
1053 __u32 protocol; /* Allows 4-byte read, but no write */
1054};
1055
1000/* User bpf_sock_ops struct to access socket values and specify request ops 1056/* User bpf_sock_ops struct to access socket values and specify request ops
1001 * and their replies. 1057 * and their replies.
1002 * Some of this fields are in network (bigendian) byte order and may need 1058 * Some of this fields are in network (bigendian) byte order and may need
@@ -1151,4 +1207,8 @@ struct bpf_cgroup_dev_ctx {
1151 __u32 minor; 1207 __u32 minor;
1152}; 1208};
1153 1209
1210struct bpf_raw_tracepoint_args {
1211 __u64 args[0];
1212};
1213
1154#endif /* _UAPI__LINUX_BPF_H__ */ 1214#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 592a58a2b681..acbb3f8b3bec 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
146 -1); 146 -1);
147} 147}
148 148
149int bpf_load_program_name(enum bpf_prog_type type, const char *name, 149int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
150 const struct bpf_insn *insns, 150 char *log_buf, size_t log_buf_sz)
151 size_t insns_cnt, const char *license,
152 __u32 kern_version, char *log_buf,
153 size_t log_buf_sz)
154{ 151{
155 int fd;
156 union bpf_attr attr; 152 union bpf_attr attr;
157 __u32 name_len = name ? strlen(name) : 0; 153 __u32 name_len;
154 int fd;
155
156 if (!load_attr)
157 return -EINVAL;
158
159 name_len = load_attr->name ? strlen(load_attr->name) : 0;
158 160
159 bzero(&attr, sizeof(attr)); 161 bzero(&attr, sizeof(attr));
160 attr.prog_type = type; 162 attr.prog_type = load_attr->prog_type;
161 attr.insn_cnt = (__u32)insns_cnt; 163 attr.expected_attach_type = load_attr->expected_attach_type;
162 attr.insns = ptr_to_u64(insns); 164 attr.insn_cnt = (__u32)load_attr->insns_cnt;
163 attr.license = ptr_to_u64(license); 165 attr.insns = ptr_to_u64(load_attr->insns);
166 attr.license = ptr_to_u64(load_attr->license);
164 attr.log_buf = ptr_to_u64(NULL); 167 attr.log_buf = ptr_to_u64(NULL);
165 attr.log_size = 0; 168 attr.log_size = 0;
166 attr.log_level = 0; 169 attr.log_level = 0;
167 attr.kern_version = kern_version; 170 attr.kern_version = load_attr->kern_version;
168 memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); 171 memcpy(attr.prog_name, load_attr->name,
172 min(name_len, BPF_OBJ_NAME_LEN - 1));
169 173
170 fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 174 fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
171 if (fd >= 0 || !log_buf || !log_buf_sz) 175 if (fd >= 0 || !log_buf || !log_buf_sz)
@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
184 __u32 kern_version, char *log_buf, 188 __u32 kern_version, char *log_buf,
185 size_t log_buf_sz) 189 size_t log_buf_sz)
186{ 190{
187 return bpf_load_program_name(type, NULL, insns, insns_cnt, license, 191 struct bpf_load_program_attr load_attr;
188 kern_version, log_buf, log_buf_sz); 192
193 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
194 load_attr.prog_type = type;
195 load_attr.expected_attach_type = 0;
196 load_attr.name = NULL;
197 load_attr.insns = insns;
198 load_attr.insns_cnt = insns_cnt;
199 load_attr.license = license;
200 load_attr.kern_version = kern_version;
201
202 return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
189} 203}
190 204
191int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, 205int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
@@ -428,6 +442,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
428 return err; 442 return err;
429} 443}
430 444
445int bpf_raw_tracepoint_open(const char *name, int prog_fd)
446{
447 union bpf_attr attr;
448
449 bzero(&attr, sizeof(attr));
450 attr.raw_tracepoint.name = ptr_to_u64(name);
451 attr.raw_tracepoint.prog_fd = prog_fd;
452
453 return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
454}
455
431int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) 456int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
432{ 457{
433 struct sockaddr_nl sa; 458 struct sockaddr_nl sa;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 8d18fb73d7fb..39f6a0d64a3b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -41,13 +41,20 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
41 int key_size, int inner_map_fd, int max_entries, 41 int key_size, int inner_map_fd, int max_entries,
42 __u32 map_flags); 42 __u32 map_flags);
43 43
44struct bpf_load_program_attr {
45 enum bpf_prog_type prog_type;
46 enum bpf_attach_type expected_attach_type;
47 const char *name;
48 const struct bpf_insn *insns;
49 size_t insns_cnt;
50 const char *license;
51 __u32 kern_version;
52};
53
44/* Recommend log buffer size */ 54/* Recommend log buffer size */
45#define BPF_LOG_BUF_SIZE (256 * 1024) 55#define BPF_LOG_BUF_SIZE (256 * 1024)
46int bpf_load_program_name(enum bpf_prog_type type, const char *name, 56int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
47 const struct bpf_insn *insns, 57 char *log_buf, size_t log_buf_sz);
48 size_t insns_cnt, const char *license,
49 __u32 kern_version, char *log_buf,
50 size_t log_buf_sz);
51int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, 58int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
52 size_t insns_cnt, const char *license, 59 size_t insns_cnt, const char *license,
53 __u32 kern_version, char *log_buf, 60 __u32 kern_version, char *log_buf,
@@ -79,4 +86,5 @@ int bpf_map_get_fd_by_id(__u32 id);
79int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 86int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
80int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, 87int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
81 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); 88 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
89int bpf_raw_tracepoint_open(const char *name, int prog_fd);
82#endif 90#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 64a8fc384186..5922443063f0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -203,6 +203,8 @@ struct bpf_program {
203 struct bpf_object *obj; 203 struct bpf_object *obj;
204 void *priv; 204 void *priv;
205 bpf_program_clear_priv_t clear_priv; 205 bpf_program_clear_priv_t clear_priv;
206
207 enum bpf_attach_type expected_attach_type;
206}; 208};
207 209
208struct bpf_map { 210struct bpf_map {
@@ -1162,21 +1164,31 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
1162} 1164}
1163 1165
1164static int 1166static int
1165load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, 1167load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1166 int insns_cnt, char *license, u32 kern_version, int *pfd) 1168 const char *name, struct bpf_insn *insns, int insns_cnt,
1169 char *license, u32 kern_version, int *pfd)
1167{ 1170{
1168 int ret; 1171 struct bpf_load_program_attr load_attr;
1169 char *log_buf; 1172 char *log_buf;
1173 int ret;
1170 1174
1171 if (!insns || !insns_cnt) 1175 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
1176 load_attr.prog_type = type;
1177 load_attr.expected_attach_type = expected_attach_type;
1178 load_attr.name = name;
1179 load_attr.insns = insns;
1180 load_attr.insns_cnt = insns_cnt;
1181 load_attr.license = license;
1182 load_attr.kern_version = kern_version;
1183
1184 if (!load_attr.insns || !load_attr.insns_cnt)
1172 return -EINVAL; 1185 return -EINVAL;
1173 1186
1174 log_buf = malloc(BPF_LOG_BUF_SIZE); 1187 log_buf = malloc(BPF_LOG_BUF_SIZE);
1175 if (!log_buf) 1188 if (!log_buf)
1176 pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); 1189 pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
1177 1190
1178 ret = bpf_load_program_name(type, name, insns, insns_cnt, license, 1191 ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
1179 kern_version, log_buf, BPF_LOG_BUF_SIZE);
1180 1192
1181 if (ret >= 0) { 1193 if (ret >= 0) {
1182 *pfd = ret; 1194 *pfd = ret;
@@ -1192,18 +1204,18 @@ load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
1192 pr_warning("-- BEGIN DUMP LOG ---\n"); 1204 pr_warning("-- BEGIN DUMP LOG ---\n");
1193 pr_warning("\n%s\n", log_buf); 1205 pr_warning("\n%s\n", log_buf);
1194 pr_warning("-- END LOG --\n"); 1206 pr_warning("-- END LOG --\n");
1195 } else if (insns_cnt >= BPF_MAXINSNS) { 1207 } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
1196 pr_warning("Program too large (%d insns), at most %d insns\n", 1208 pr_warning("Program too large (%zu insns), at most %d insns\n",
1197 insns_cnt, BPF_MAXINSNS); 1209 load_attr.insns_cnt, BPF_MAXINSNS);
1198 ret = -LIBBPF_ERRNO__PROG2BIG; 1210 ret = -LIBBPF_ERRNO__PROG2BIG;
1199 } else { 1211 } else {
1200 /* Wrong program type? */ 1212 /* Wrong program type? */
1201 if (type != BPF_PROG_TYPE_KPROBE) { 1213 if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
1202 int fd; 1214 int fd;
1203 1215
1204 fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, 1216 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
1205 insns, insns_cnt, license, 1217 load_attr.expected_attach_type = 0;
1206 kern_version, NULL, 0); 1218 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
1207 if (fd >= 0) { 1219 if (fd >= 0) {
1208 close(fd); 1220 close(fd);
1209 ret = -LIBBPF_ERRNO__PROGTYPE; 1221 ret = -LIBBPF_ERRNO__PROGTYPE;
@@ -1247,8 +1259,9 @@ bpf_program__load(struct bpf_program *prog,
1247 pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", 1259 pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
1248 prog->section_name, prog->instances.nr); 1260 prog->section_name, prog->instances.nr);
1249 } 1261 }
1250 err = load_program(prog->type, prog->name, prog->insns, 1262 err = load_program(prog->type, prog->expected_attach_type,
1251 prog->insns_cnt, license, kern_version, &fd); 1263 prog->name, prog->insns, prog->insns_cnt,
1264 license, kern_version, &fd);
1252 if (!err) 1265 if (!err)
1253 prog->instances.fds[0] = fd; 1266 prog->instances.fds[0] = fd;
1254 goto out; 1267 goto out;
@@ -1276,8 +1289,8 @@ bpf_program__load(struct bpf_program *prog,
1276 continue; 1289 continue;
1277 } 1290 }
1278 1291
1279 err = load_program(prog->type, prog->name, 1292 err = load_program(prog->type, prog->expected_attach_type,
1280 result.new_insn_ptr, 1293 prog->name, result.new_insn_ptr,
1281 result.new_insn_cnt, 1294 result.new_insn_cnt,
1282 license, kern_version, &fd); 1295 license, kern_version, &fd);
1283 1296
@@ -1835,11 +1848,25 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
1835BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); 1848BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
1836BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); 1849BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
1837 1850
1838#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type } 1851static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
1852 enum bpf_attach_type type)
1853{
1854 prog->expected_attach_type = type;
1855}
1856
1857#define BPF_PROG_SEC_FULL(string, ptype, atype) \
1858 { string, sizeof(string) - 1, ptype, atype }
1859
1860#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
1861
1862#define BPF_SA_PROG_SEC(string, ptype) \
1863 BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
1864
1839static const struct { 1865static const struct {
1840 const char *sec; 1866 const char *sec;
1841 size_t len; 1867 size_t len;
1842 enum bpf_prog_type prog_type; 1868 enum bpf_prog_type prog_type;
1869 enum bpf_attach_type expected_attach_type;
1843} section_names[] = { 1870} section_names[] = {
1844 BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), 1871 BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
1845 BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), 1872 BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
@@ -1858,10 +1885,17 @@ static const struct {
1858 BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), 1885 BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
1859 BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), 1886 BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
1860 BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), 1887 BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
1888 BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
1889 BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
1890 BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
1891 BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
1861}; 1892};
1893
1862#undef BPF_PROG_SEC 1894#undef BPF_PROG_SEC
1895#undef BPF_PROG_SEC_FULL
1896#undef BPF_SA_PROG_SEC
1863 1897
1864static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) 1898static int bpf_program__identify_section(struct bpf_program *prog)
1865{ 1899{
1866 int i; 1900 int i;
1867 1901
@@ -1871,13 +1905,13 @@ static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
1871 for (i = 0; i < ARRAY_SIZE(section_names); i++) 1905 for (i = 0; i < ARRAY_SIZE(section_names); i++)
1872 if (strncmp(prog->section_name, section_names[i].sec, 1906 if (strncmp(prog->section_name, section_names[i].sec,
1873 section_names[i].len) == 0) 1907 section_names[i].len) == 0)
1874 return section_names[i].prog_type; 1908 return i;
1875 1909
1876err: 1910err:
1877 pr_warning("failed to guess program type based on section name %s\n", 1911 pr_warning("failed to guess program type based on section name %s\n",
1878 prog->section_name); 1912 prog->section_name);
1879 1913
1880 return BPF_PROG_TYPE_UNSPEC; 1914 return -1;
1881} 1915}
1882 1916
1883int bpf_map__fd(struct bpf_map *map) 1917int bpf_map__fd(struct bpf_map *map)
@@ -1977,11 +2011,30 @@ long libbpf_get_error(const void *ptr)
1977int bpf_prog_load(const char *file, enum bpf_prog_type type, 2011int bpf_prog_load(const char *file, enum bpf_prog_type type,
1978 struct bpf_object **pobj, int *prog_fd) 2012 struct bpf_object **pobj, int *prog_fd)
1979{ 2013{
2014 struct bpf_prog_load_attr attr;
2015
2016 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
2017 attr.file = file;
2018 attr.prog_type = type;
2019 attr.expected_attach_type = 0;
2020
2021 return bpf_prog_load_xattr(&attr, pobj, prog_fd);
2022}
2023
2024int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2025 struct bpf_object **pobj, int *prog_fd)
2026{
1980 struct bpf_program *prog, *first_prog = NULL; 2027 struct bpf_program *prog, *first_prog = NULL;
2028 enum bpf_attach_type expected_attach_type;
2029 enum bpf_prog_type prog_type;
1981 struct bpf_object *obj; 2030 struct bpf_object *obj;
2031 int section_idx;
1982 int err; 2032 int err;
1983 2033
1984 obj = bpf_object__open(file); 2034 if (!attr)
2035 return -EINVAL;
2036
2037 obj = bpf_object__open(attr->file);
1985 if (IS_ERR(obj)) 2038 if (IS_ERR(obj))
1986 return -ENOENT; 2039 return -ENOENT;
1987 2040
@@ -1990,15 +2043,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
1990 * If type is not specified, try to guess it based on 2043 * If type is not specified, try to guess it based on
1991 * section name. 2044 * section name.
1992 */ 2045 */
1993 if (type == BPF_PROG_TYPE_UNSPEC) { 2046 prog_type = attr->prog_type;
1994 type = bpf_program__guess_type(prog); 2047 expected_attach_type = attr->expected_attach_type;
1995 if (type == BPF_PROG_TYPE_UNSPEC) { 2048 if (prog_type == BPF_PROG_TYPE_UNSPEC) {
2049 section_idx = bpf_program__identify_section(prog);
2050 if (section_idx < 0) {
1996 bpf_object__close(obj); 2051 bpf_object__close(obj);
1997 return -EINVAL; 2052 return -EINVAL;
1998 } 2053 }
2054 prog_type = section_names[section_idx].prog_type;
2055 expected_attach_type =
2056 section_names[section_idx].expected_attach_type;
1999 } 2057 }
2000 2058
2001 bpf_program__set_type(prog, type); 2059 bpf_program__set_type(prog, prog_type);
2060 bpf_program__set_expected_attach_type(prog,
2061 expected_attach_type);
2062
2002 if (prog->idx != obj->efile.text_shndx && !first_prog) 2063 if (prog->idx != obj->efile.text_shndx && !first_prog)
2003 first_prog = prog; 2064 first_prog = prog;
2004 } 2065 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f85906533cdd..a3a62a583f27 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
248 248
249long libbpf_get_error(const void *ptr); 249long libbpf_get_error(const void *ptr);
250 250
251struct bpf_prog_load_attr {
252 const char *file;
253 enum bpf_prog_type prog_type;
254 enum bpf_attach_type expected_attach_type;
255};
256
257int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
258 struct bpf_object **pobj, int *prog_fd);
251int bpf_prog_load(const char *file, enum bpf_prog_type type, 259int bpf_prog_load(const char *file, enum bpf_prog_type type,
252 struct bpf_object **pobj, int *prog_fd); 260 struct bpf_object **pobj, int *prog_fd);
253 261
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f35fb02bdf56..0a315ddabbf4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,21 +23,23 @@ urandom_read: urandom_read.c
23 23
24# Order correspond to 'make run_tests' order 24# Order correspond to 'make run_tests' order
25TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 25TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
26 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user 26 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
27 test_sock test_sock_addr
27 28
28TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 29TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
29 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 30 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
30 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 31 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
31 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ 32 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
32 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ 33 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
33 sockmap_tcp_msg_prog.o 34 sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
34 35
35# Order correspond to 'make run_tests' order 36# Order correspond to 'make run_tests' order
36TEST_PROGS := test_kmod.sh \ 37TEST_PROGS := test_kmod.sh \
37 test_libbpf.sh \ 38 test_libbpf.sh \
38 test_xdp_redirect.sh \ 39 test_xdp_redirect.sh \
39 test_xdp_meta.sh \ 40 test_xdp_meta.sh \
40 test_offload.py 41 test_offload.py \
42 test_sock_addr.sh
41 43
42# Compile but not part of 'make run_tests' 44# Compile but not part of 'make run_tests'
43TEST_GEN_PROGS_EXTENDED = test_libbpf_open 45TEST_GEN_PROGS_EXTENDED = test_libbpf_open
@@ -51,6 +53,8 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
51$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a 53$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
52 54
53$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c 55$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
56$(OUTPUT)/test_sock: cgroup_helpers.c
57$(OUTPUT)/test_sock_addr: cgroup_helpers.c
54 58
55.PHONY: force 59.PHONY: force
56 60
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 7cae376d8d0c..d8223d99f96d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
94 (void *) BPF_FUNC_msg_cork_bytes; 94 (void *) BPF_FUNC_msg_cork_bytes;
95static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = 95static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
96 (void *) BPF_FUNC_msg_pull_data; 96 (void *) BPF_FUNC_msg_pull_data;
97static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
98 (void *) BPF_FUNC_bind;
97 99
98/* llvm builtin functions that eBPF C program may use to 100/* llvm builtin functions that eBPF C program may use to
99 * emit BPF_LD_ABS and BPF_LD_IND instructions 101 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000000..5a88a681d2ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <string.h>
5
6#include <linux/stddef.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/in6.h>
10#include <sys/socket.h>
11
12#include "bpf_helpers.h"
13#include "bpf_endian.h"
14
15#define SRC_REWRITE_IP4 0x7f000004U
16#define DST_REWRITE_IP4 0x7f000001U
17#define DST_REWRITE_PORT4 4444
18
19int _version SEC("version") = 1;
20
21SEC("cgroup/connect4")
22int connect_v4_prog(struct bpf_sock_addr *ctx)
23{
24 struct sockaddr_in sa;
25
26 /* Rewrite destination. */
27 ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
28 ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
29
30 if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
31 ///* Rewrite source. */
32 memset(&sa, 0, sizeof(sa));
33
34 sa.sin_family = AF_INET;
35 sa.sin_port = bpf_htons(0);
36 sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
37
38 if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
39 return 0;
40 }
41
42 return 1;
43}
44
45char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000000..8ea3f7d12dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <string.h>
5
6#include <linux/stddef.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/in6.h>
10#include <sys/socket.h>
11
12#include "bpf_helpers.h"
13#include "bpf_endian.h"
14
15#define SRC_REWRITE_IP6_0 0
16#define SRC_REWRITE_IP6_1 0
17#define SRC_REWRITE_IP6_2 0
18#define SRC_REWRITE_IP6_3 6
19
20#define DST_REWRITE_IP6_0 0
21#define DST_REWRITE_IP6_1 0
22#define DST_REWRITE_IP6_2 0
23#define DST_REWRITE_IP6_3 1
24
25#define DST_REWRITE_PORT6 6666
26
27int _version SEC("version") = 1;
28
29SEC("cgroup/connect6")
30int connect_v6_prog(struct bpf_sock_addr *ctx)
31{
32 struct sockaddr_in6 sa;
33
34 /* Rewrite destination. */
35 ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
36 ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
37 ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
38 ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
39
40 ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
41
42 if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
43 /* Rewrite source. */
44 memset(&sa, 0, sizeof(sa));
45
46 sa.sin6_family = AF_INET6;
47 sa.sin6_port = bpf_htons(0);
48
49 sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
50 sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
51 sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
52 sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
53
54 if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
55 return 0;
56 }
57
58 return 1;
59}
60
61char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index e9df48b306df..faadbe233966 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -877,7 +877,7 @@ static void test_stacktrace_map()
877 877
878 err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); 878 err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
879 if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) 879 if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
880 goto out; 880 return;
881 881
882 /* Get the ID for the sched/sched_switch tracepoint */ 882 /* Get the ID for the sched/sched_switch tracepoint */
883 snprintf(buf, sizeof(buf), 883 snprintf(buf, sizeof(buf),
@@ -888,8 +888,7 @@ static void test_stacktrace_map()
888 888
889 bytes = read(efd, buf, sizeof(buf)); 889 bytes = read(efd, buf, sizeof(buf));
890 close(efd); 890 close(efd);
891 if (CHECK(bytes <= 0 || bytes >= sizeof(buf), 891 if (bytes <= 0 || bytes >= sizeof(buf))
892 "read", "bytes %d errno %d\n", bytes, errno))
893 goto close_prog; 892 goto close_prog;
894 893
895 /* Open the perf event and attach bpf progrram */ 894 /* Open the perf event and attach bpf progrram */
@@ -906,29 +905,24 @@ static void test_stacktrace_map()
906 goto close_prog; 905 goto close_prog;
907 906
908 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); 907 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
909 if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", 908 if (err)
910 err, errno)) 909 goto disable_pmu;
911 goto close_pmu;
912 910
913 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); 911 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
914 if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", 912 if (err)
915 err, errno))
916 goto disable_pmu; 913 goto disable_pmu;
917 914
918 /* find map fds */ 915 /* find map fds */
919 control_map_fd = bpf_find_map(__func__, obj, "control_map"); 916 control_map_fd = bpf_find_map(__func__, obj, "control_map");
920 if (CHECK(control_map_fd < 0, "bpf_find_map control_map", 917 if (control_map_fd < 0)
921 "err %d errno %d\n", err, errno))
922 goto disable_pmu; 918 goto disable_pmu;
923 919
924 stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); 920 stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
925 if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", 921 if (stackid_hmap_fd < 0)
926 "err %d errno %d\n", err, errno))
927 goto disable_pmu; 922 goto disable_pmu;
928 923
929 stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); 924 stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
930 if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", 925 if (stackmap_fd < 0)
931 err, errno))
932 goto disable_pmu; 926 goto disable_pmu;
933 927
934 /* give some time for bpf program run */ 928 /* give some time for bpf program run */
@@ -945,24 +939,78 @@ static void test_stacktrace_map()
945 err = compare_map_keys(stackid_hmap_fd, stackmap_fd); 939 err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
946 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", 940 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
947 "err %d errno %d\n", err, errno)) 941 "err %d errno %d\n", err, errno))
948 goto disable_pmu; 942 goto disable_pmu_noerr;
949 943
950 err = compare_map_keys(stackmap_fd, stackid_hmap_fd); 944 err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
951 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", 945 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
952 "err %d errno %d\n", err, errno)) 946 "err %d errno %d\n", err, errno))
953 ; /* fall through */ 947 goto disable_pmu_noerr;
954 948
949 goto disable_pmu_noerr;
955disable_pmu: 950disable_pmu:
951 error_cnt++;
952disable_pmu_noerr:
956 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); 953 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
957
958close_pmu:
959 close(pmu_fd); 954 close(pmu_fd);
960
961close_prog: 955close_prog:
962 bpf_object__close(obj); 956 bpf_object__close(obj);
957}
963 958
964out: 959static void test_stacktrace_map_raw_tp()
965 return; 960{
961 int control_map_fd, stackid_hmap_fd, stackmap_fd;
962 const char *file = "./test_stacktrace_map.o";
963 int efd, err, prog_fd;
964 __u32 key, val, duration = 0;
965 struct bpf_object *obj;
966
967 err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
968 if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
969 return;
970
971 efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
972 if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
973 goto close_prog;
974
975 /* find map fds */
976 control_map_fd = bpf_find_map(__func__, obj, "control_map");
977 if (control_map_fd < 0)
978 goto close_prog;
979
980 stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
981 if (stackid_hmap_fd < 0)
982 goto close_prog;
983
984 stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
985 if (stackmap_fd < 0)
986 goto close_prog;
987
988 /* give some time for bpf program run */
989 sleep(1);
990
991 /* disable stack trace collection */
992 key = 0;
993 val = 1;
994 bpf_map_update_elem(control_map_fd, &key, &val, 0);
995
996 /* for every element in stackid_hmap, we can find a corresponding one
997 * in stackmap, and vise versa.
998 */
999 err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
1000 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
1001 "err %d errno %d\n", err, errno))
1002 goto close_prog;
1003
1004 err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
1005 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
1006 "err %d errno %d\n", err, errno))
1007 goto close_prog;
1008
1009 goto close_prog_noerr;
1010close_prog:
1011 error_cnt++;
1012close_prog_noerr:
1013 bpf_object__close(obj);
966} 1014}
967 1015
968static int extract_build_id(char *build_id, size_t size) 1016static int extract_build_id(char *build_id, size_t size)
@@ -1138,6 +1186,7 @@ int main(void)
1138 test_tp_attach_query(); 1186 test_tp_attach_query();
1139 test_stacktrace_map(); 1187 test_stacktrace_map();
1140 test_stacktrace_build_id(); 1188 test_stacktrace_build_id();
1189 test_stacktrace_map_raw_tp();
1141 1190
1142 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 1191 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
1143 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; 1192 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000000..73bb20cfb9b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,479 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <stdio.h>
5#include <unistd.h>
6
7#include <arpa/inet.h>
8#include <sys/types.h>
9#include <sys/socket.h>
10
11#include <linux/filter.h>
12
13#include <bpf/bpf.h>
14
15#include "cgroup_helpers.h"
16
17#ifndef ARRAY_SIZE
18# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#endif
20
21#define CG_PATH "/foo"
22#define MAX_INSNS 512
23
24char bpf_log_buf[BPF_LOG_BUF_SIZE];
25
26struct sock_test {
27 const char *descr;
28 /* BPF prog properties */
29 struct bpf_insn insns[MAX_INSNS];
30 enum bpf_attach_type expected_attach_type;
31 enum bpf_attach_type attach_type;
32 /* Socket properties */
33 int domain;
34 int type;
35 /* Endpoint to bind() to */
36 const char *ip;
37 unsigned short port;
38 /* Expected test result */
39 enum {
40 LOAD_REJECT,
41 ATTACH_REJECT,
42 BIND_REJECT,
43 SUCCESS,
44 } result;
45};
46
47static struct sock_test tests[] = {
48 {
49 "bind4 load with invalid access: src_ip6",
50 .insns = {
51 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
52 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
53 offsetof(struct bpf_sock, src_ip6[0])),
54 BPF_MOV64_IMM(BPF_REG_0, 1),
55 BPF_EXIT_INSN(),
56 },
57 BPF_CGROUP_INET4_POST_BIND,
58 BPF_CGROUP_INET4_POST_BIND,
59 0,
60 0,
61 NULL,
62 0,
63 LOAD_REJECT,
64 },
65 {
66 "bind4 load with invalid access: mark",
67 .insns = {
68 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
69 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
70 offsetof(struct bpf_sock, mark)),
71 BPF_MOV64_IMM(BPF_REG_0, 1),
72 BPF_EXIT_INSN(),
73 },
74 BPF_CGROUP_INET4_POST_BIND,
75 BPF_CGROUP_INET4_POST_BIND,
76 0,
77 0,
78 NULL,
79 0,
80 LOAD_REJECT,
81 },
82 {
83 "bind6 load with invalid access: src_ip4",
84 .insns = {
85 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
86 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
87 offsetof(struct bpf_sock, src_ip4)),
88 BPF_MOV64_IMM(BPF_REG_0, 1),
89 BPF_EXIT_INSN(),
90 },
91 BPF_CGROUP_INET6_POST_BIND,
92 BPF_CGROUP_INET6_POST_BIND,
93 0,
94 0,
95 NULL,
96 0,
97 LOAD_REJECT,
98 },
99 {
100 "sock_create load with invalid access: src_port",
101 .insns = {
102 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
103 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
104 offsetof(struct bpf_sock, src_port)),
105 BPF_MOV64_IMM(BPF_REG_0, 1),
106 BPF_EXIT_INSN(),
107 },
108 BPF_CGROUP_INET_SOCK_CREATE,
109 BPF_CGROUP_INET_SOCK_CREATE,
110 0,
111 0,
112 NULL,
113 0,
114 LOAD_REJECT,
115 },
116 {
117 "sock_create load w/o expected_attach_type (compat mode)",
118 .insns = {
119 BPF_MOV64_IMM(BPF_REG_0, 1),
120 BPF_EXIT_INSN(),
121 },
122 0,
123 BPF_CGROUP_INET_SOCK_CREATE,
124 AF_INET,
125 SOCK_STREAM,
126 "127.0.0.1",
127 8097,
128 SUCCESS,
129 },
130 {
131 "sock_create load w/ expected_attach_type",
132 .insns = {
133 BPF_MOV64_IMM(BPF_REG_0, 1),
134 BPF_EXIT_INSN(),
135 },
136 BPF_CGROUP_INET_SOCK_CREATE,
137 BPF_CGROUP_INET_SOCK_CREATE,
138 AF_INET,
139 SOCK_STREAM,
140 "127.0.0.1",
141 8097,
142 SUCCESS,
143 },
144 {
145 "attach type mismatch bind4 vs bind6",
146 .insns = {
147 BPF_MOV64_IMM(BPF_REG_0, 1),
148 BPF_EXIT_INSN(),
149 },
150 BPF_CGROUP_INET4_POST_BIND,
151 BPF_CGROUP_INET6_POST_BIND,
152 0,
153 0,
154 NULL,
155 0,
156 ATTACH_REJECT,
157 },
158 {
159 "attach type mismatch bind6 vs bind4",
160 .insns = {
161 BPF_MOV64_IMM(BPF_REG_0, 1),
162 BPF_EXIT_INSN(),
163 },
164 BPF_CGROUP_INET6_POST_BIND,
165 BPF_CGROUP_INET4_POST_BIND,
166 0,
167 0,
168 NULL,
169 0,
170 ATTACH_REJECT,
171 },
172 {
173 "attach type mismatch default vs bind4",
174 .insns = {
175 BPF_MOV64_IMM(BPF_REG_0, 1),
176 BPF_EXIT_INSN(),
177 },
178 0,
179 BPF_CGROUP_INET4_POST_BIND,
180 0,
181 0,
182 NULL,
183 0,
184 ATTACH_REJECT,
185 },
186 {
187 "attach type mismatch bind6 vs sock_create",
188 .insns = {
189 BPF_MOV64_IMM(BPF_REG_0, 1),
190 BPF_EXIT_INSN(),
191 },
192 BPF_CGROUP_INET6_POST_BIND,
193 BPF_CGROUP_INET_SOCK_CREATE,
194 0,
195 0,
196 NULL,
197 0,
198 ATTACH_REJECT,
199 },
200 {
201 "bind4 reject all",
202 .insns = {
203 BPF_MOV64_IMM(BPF_REG_0, 0),
204 BPF_EXIT_INSN(),
205 },
206 BPF_CGROUP_INET4_POST_BIND,
207 BPF_CGROUP_INET4_POST_BIND,
208 AF_INET,
209 SOCK_STREAM,
210 "0.0.0.0",
211 0,
212 BIND_REJECT,
213 },
214 {
215 "bind6 reject all",
216 .insns = {
217 BPF_MOV64_IMM(BPF_REG_0, 0),
218 BPF_EXIT_INSN(),
219 },
220 BPF_CGROUP_INET6_POST_BIND,
221 BPF_CGROUP_INET6_POST_BIND,
222 AF_INET6,
223 SOCK_STREAM,
224 "::",
225 0,
226 BIND_REJECT,
227 },
228 {
229 "bind6 deny specific IP & port",
230 .insns = {
231 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
232
233 /* if (ip == expected && port == expected) */
234 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
235 offsetof(struct bpf_sock, src_ip6[3])),
236 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
237 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
238 offsetof(struct bpf_sock, src_port)),
239 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
240
241 /* return DENY; */
242 BPF_MOV64_IMM(BPF_REG_0, 0),
243 BPF_JMP_A(1),
244
245 /* else return ALLOW; */
246 BPF_MOV64_IMM(BPF_REG_0, 1),
247 BPF_EXIT_INSN(),
248 },
249 BPF_CGROUP_INET6_POST_BIND,
250 BPF_CGROUP_INET6_POST_BIND,
251 AF_INET6,
252 SOCK_STREAM,
253 "::1",
254 8193,
255 BIND_REJECT,
256 },
257 {
258 "bind4 allow specific IP & port",
259 .insns = {
260 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
261
262 /* if (ip == expected && port == expected) */
263 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
264 offsetof(struct bpf_sock, src_ip4)),
265 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
266 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
267 offsetof(struct bpf_sock, src_port)),
268 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
269
270 /* return ALLOW; */
271 BPF_MOV64_IMM(BPF_REG_0, 1),
272 BPF_JMP_A(1),
273
274 /* else return DENY; */
275 BPF_MOV64_IMM(BPF_REG_0, 0),
276 BPF_EXIT_INSN(),
277 },
278 BPF_CGROUP_INET4_POST_BIND,
279 BPF_CGROUP_INET4_POST_BIND,
280 AF_INET,
281 SOCK_STREAM,
282 "127.0.0.1",
283 4098,
284 SUCCESS,
285 },
286 {
287 "bind4 allow all",
288 .insns = {
289 BPF_MOV64_IMM(BPF_REG_0, 1),
290 BPF_EXIT_INSN(),
291 },
292 BPF_CGROUP_INET4_POST_BIND,
293 BPF_CGROUP_INET4_POST_BIND,
294 AF_INET,
295 SOCK_STREAM,
296 "0.0.0.0",
297 0,
298 SUCCESS,
299 },
300 {
301 "bind6 allow all",
302 .insns = {
303 BPF_MOV64_IMM(BPF_REG_0, 1),
304 BPF_EXIT_INSN(),
305 },
306 BPF_CGROUP_INET6_POST_BIND,
307 BPF_CGROUP_INET6_POST_BIND,
308 AF_INET6,
309 SOCK_STREAM,
310 "::",
311 0,
312 SUCCESS,
313 },
314};
315
316static size_t probe_prog_length(const struct bpf_insn *fp)
317{
318 size_t len;
319
320 for (len = MAX_INSNS - 1; len > 0; --len)
321 if (fp[len].code != 0 || fp[len].imm != 0)
322 break;
323 return len + 1;
324}
325
326static int load_sock_prog(const struct bpf_insn *prog,
327 enum bpf_attach_type attach_type)
328{
329 struct bpf_load_program_attr attr;
330
331 memset(&attr, 0, sizeof(struct bpf_load_program_attr));
332 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
333 attr.expected_attach_type = attach_type;
334 attr.insns = prog;
335 attr.insns_cnt = probe_prog_length(attr.insns);
336 attr.license = "GPL";
337
338 return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
339}
340
341static int attach_sock_prog(int cgfd, int progfd,
342 enum bpf_attach_type attach_type)
343{
344 return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
345}
346
347static int bind_sock(int domain, int type, const char *ip, unsigned short port)
348{
349 struct sockaddr_storage addr;
350 struct sockaddr_in6 *addr6;
351 struct sockaddr_in *addr4;
352 int sockfd = -1;
353 socklen_t len;
354 int err = 0;
355
356 sockfd = socket(domain, type, 0);
357 if (sockfd < 0)
358 goto err;
359
360 memset(&addr, 0, sizeof(addr));
361
362 if (domain == AF_INET) {
363 len = sizeof(struct sockaddr_in);
364 addr4 = (struct sockaddr_in *)&addr;
365 addr4->sin_family = domain;
366 addr4->sin_port = htons(port);
367 if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
368 goto err;
369 } else if (domain == AF_INET6) {
370 len = sizeof(struct sockaddr_in6);
371 addr6 = (struct sockaddr_in6 *)&addr;
372 addr6->sin6_family = domain;
373 addr6->sin6_port = htons(port);
374 if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
375 goto err;
376 } else {
377 goto err;
378 }
379
380 if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
381 goto err;
382
383 goto out;
384err:
385 err = -1;
386out:
387 close(sockfd);
388 return err;
389}
390
391static int run_test_case(int cgfd, const struct sock_test *test)
392{
393 int progfd = -1;
394 int err = 0;
395
396 printf("Test case: %s .. ", test->descr);
397 progfd = load_sock_prog(test->insns, test->expected_attach_type);
398 if (progfd < 0) {
399 if (test->result == LOAD_REJECT)
400 goto out;
401 else
402 goto err;
403 }
404
405 if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
406 if (test->result == ATTACH_REJECT)
407 goto out;
408 else
409 goto err;
410 }
411
412 if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
413 /* sys_bind() may fail for different reasons, errno has to be
414 * checked to confirm that BPF program rejected it.
415 */
416 if (test->result == BIND_REJECT && errno == EPERM)
417 goto out;
418 else
419 goto err;
420 }
421
422
423 if (test->result != SUCCESS)
424 goto err;
425
426 goto out;
427err:
428 err = -1;
429out:
430 /* Detaching w/o checking return code: best effort attempt. */
431 if (progfd != -1)
432 bpf_prog_detach(cgfd, test->attach_type);
433 close(progfd);
434 printf("[%s]\n", err ? "FAIL" : "PASS");
435 return err;
436}
437
438static int run_tests(int cgfd)
439{
440 int passes = 0;
441 int fails = 0;
442 int i;
443
444 for (i = 0; i < ARRAY_SIZE(tests); ++i) {
445 if (run_test_case(cgfd, &tests[i]))
446 ++fails;
447 else
448 ++passes;
449 }
450 printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
451 return fails ? -1 : 0;
452}
453
454int main(int argc, char **argv)
455{
456 int cgfd = -1;
457 int err = 0;
458
459 if (setup_cgroup_environment())
460 goto err;
461
462 cgfd = create_and_get_cgroup(CG_PATH);
463 if (!cgfd)
464 goto err;
465
466 if (join_cgroup(CG_PATH))
467 goto err;
468
469 if (run_tests(cgfd))
470 goto err;
471
472 goto out;
473err:
474 err = -1;
475out:
476 close(cgfd);
477 cleanup_cgroup_environment();
478 return err;
479}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000000..d488f20926e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,588 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
7
8#include <arpa/inet.h>
9#include <sys/types.h>
10#include <sys/socket.h>
11
12#include <linux/filter.h>
13
14#include <bpf/bpf.h>
15#include <bpf/libbpf.h>
16
17#include "cgroup_helpers.h"
18
19#define CG_PATH "/foo"
20#define CONNECT4_PROG_PATH "./connect4_prog.o"
21#define CONNECT6_PROG_PATH "./connect6_prog.o"
22
23#define SERV4_IP "192.168.1.254"
24#define SERV4_REWRITE_IP "127.0.0.1"
25#define SERV4_PORT 4040
26#define SERV4_REWRITE_PORT 4444
27
28#define SERV6_IP "face:b00c:1234:5678::abcd"
29#define SERV6_REWRITE_IP "::1"
30#define SERV6_PORT 6060
31#define SERV6_REWRITE_PORT 6666
32
33#define INET_NTOP_BUF 40
34
35typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
36typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
37
38struct program {
39 enum bpf_attach_type type;
40 load_fn loadfn;
41 int fd;
42 const char *name;
43 enum bpf_attach_type invalid_type;
44};
45
46char bpf_log_buf[BPF_LOG_BUF_SIZE];
47
48static int mk_sockaddr(int domain, const char *ip, unsigned short port,
49 struct sockaddr *addr, socklen_t addr_len)
50{
51 struct sockaddr_in6 *addr6;
52 struct sockaddr_in *addr4;
53
54 if (domain != AF_INET && domain != AF_INET6) {
55 log_err("Unsupported address family");
56 return -1;
57 }
58
59 memset(addr, 0, addr_len);
60
61 if (domain == AF_INET) {
62 if (addr_len < sizeof(struct sockaddr_in))
63 return -1;
64 addr4 = (struct sockaddr_in *)addr;
65 addr4->sin_family = domain;
66 addr4->sin_port = htons(port);
67 if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
68 log_err("Invalid IPv4: %s", ip);
69 return -1;
70 }
71 } else if (domain == AF_INET6) {
72 if (addr_len < sizeof(struct sockaddr_in6))
73 return -1;
74 addr6 = (struct sockaddr_in6 *)addr;
75 addr6->sin6_family = domain;
76 addr6->sin6_port = htons(port);
77 if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
78 log_err("Invalid IPv6: %s", ip);
79 return -1;
80 }
81 }
82
83 return 0;
84}
85
86static int load_insns(enum bpf_attach_type attach_type,
87 const struct bpf_insn *insns, size_t insns_cnt,
88 const char *comment)
89{
90 struct bpf_load_program_attr load_attr;
91 int ret;
92
93 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
94 load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
95 load_attr.expected_attach_type = attach_type;
96 load_attr.insns = insns;
97 load_attr.insns_cnt = insns_cnt;
98 load_attr.license = "GPL";
99
100 ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
101 if (ret < 0 && comment) {
102 log_err(">>> Loading %s program error.\n"
103 ">>> Output from verifier:\n%s\n-------\n",
104 comment, bpf_log_buf);
105 }
106
107 return ret;
108}
109
110/* [1] These testing programs try to read different context fields, including
111 * narrow loads of different sizes from user_ip4 and user_ip6, and write to
112 * those allowed to be overridden.
113 *
114 * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
115 * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
116 * in such cases since it accepts only _signed_ 32bit integer as IMM
117 * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
118 * to count jumps properly.
119 */
120
121static int bind4_prog_load(enum bpf_attach_type attach_type,
122 const char *comment)
123{
124 union {
125 uint8_t u4_addr8[4];
126 uint16_t u4_addr16[2];
127 uint32_t u4_addr32;
128 } ip4;
129 struct sockaddr_in addr4_rw;
130
131 if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
132 log_err("Invalid IPv4: %s", SERV4_IP);
133 return -1;
134 }
135
136 if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
137 (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
138 return -1;
139
140 /* See [1]. */
141 struct bpf_insn insns[] = {
142 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
143
144 /* if (sk.family == AF_INET && */
145 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
146 offsetof(struct bpf_sock_addr, family)),
147 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
148
149 /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
150 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
151 offsetof(struct bpf_sock_addr, type)),
152 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
153 BPF_JMP_A(1),
154 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
155
156 /* 1st_byte_of_user_ip4 == expected && */
157 BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
158 offsetof(struct bpf_sock_addr, user_ip4)),
159 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
160
161 /* 1st_half_of_user_ip4 == expected && */
162 BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
163 offsetof(struct bpf_sock_addr, user_ip4)),
164 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
165
166 /* whole_user_ip4 == expected) { */
167 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
168 offsetof(struct bpf_sock_addr, user_ip4)),
169 BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
170 BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
171
172 /* user_ip4 = addr4_rw.sin_addr */
173 BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
174 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
175 offsetof(struct bpf_sock_addr, user_ip4)),
176
177 /* user_port = addr4_rw.sin_port */
178 BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
179 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
180 offsetof(struct bpf_sock_addr, user_port)),
181 /* } */
182
183 /* return 1 */
184 BPF_MOV64_IMM(BPF_REG_0, 1),
185 BPF_EXIT_INSN(),
186 };
187
188 return load_insns(attach_type, insns,
189 sizeof(insns) / sizeof(struct bpf_insn), comment);
190}
191
192static int bind6_prog_load(enum bpf_attach_type attach_type,
193 const char *comment)
194{
195 struct sockaddr_in6 addr6_rw;
196 struct in6_addr ip6;
197
198 if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
199 log_err("Invalid IPv6: %s", SERV6_IP);
200 return -1;
201 }
202
203 if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
204 (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
205 return -1;
206
207 /* See [1]. */
208 struct bpf_insn insns[] = {
209 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
210
211 /* if (sk.family == AF_INET6 && */
212 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
213 offsetof(struct bpf_sock_addr, family)),
214 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
215
216 /* 5th_byte_of_user_ip6 == expected && */
217 BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
218 offsetof(struct bpf_sock_addr, user_ip6[1])),
219 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
220
221 /* 3rd_half_of_user_ip6 == expected && */
222 BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
223 offsetof(struct bpf_sock_addr, user_ip6[1])),
224 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
225
226 /* last_word_of_user_ip6 == expected) { */
227 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
228 offsetof(struct bpf_sock_addr, user_ip6[3])),
229 BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
230 BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
231
232
233#define STORE_IPV6_WORD(N) \
234 BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
235 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
236 offsetof(struct bpf_sock_addr, user_ip6[N]))
237
238 /* user_ip6 = addr6_rw.sin6_addr */
239 STORE_IPV6_WORD(0),
240 STORE_IPV6_WORD(1),
241 STORE_IPV6_WORD(2),
242 STORE_IPV6_WORD(3),
243
244 /* user_port = addr6_rw.sin6_port */
245 BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
246 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
247 offsetof(struct bpf_sock_addr, user_port)),
248
249 /* } */
250
251 /* return 1 */
252 BPF_MOV64_IMM(BPF_REG_0, 1),
253 BPF_EXIT_INSN(),
254 };
255
256 return load_insns(attach_type, insns,
257 sizeof(insns) / sizeof(struct bpf_insn), comment);
258}
259
260static int connect_prog_load_path(const char *path,
261 enum bpf_attach_type attach_type,
262 const char *comment)
263{
264 struct bpf_prog_load_attr attr;
265 struct bpf_object *obj;
266 int prog_fd;
267
268 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
269 attr.file = path;
270 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
271 attr.expected_attach_type = attach_type;
272
273 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
274 if (comment)
275 log_err(">>> Loading %s program at %s error.\n",
276 comment, path);
277 return -1;
278 }
279
280 return prog_fd;
281}
282
283static int connect4_prog_load(enum bpf_attach_type attach_type,
284 const char *comment)
285{
286 return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
287}
288
289static int connect6_prog_load(enum bpf_attach_type attach_type,
290 const char *comment)
291{
292 return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
293}
294
295static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
296{
297 char addr_buf[INET_NTOP_BUF];
298 struct sockaddr_storage addr;
299 struct sockaddr_in6 *addr6;
300 struct sockaddr_in *addr4;
301 socklen_t addr_len;
302 unsigned short port;
303 void *nip;
304
305 addr_len = sizeof(struct sockaddr_storage);
306 memset(&addr, 0, addr_len);
307
308 if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
309 if (addr.ss_family == AF_INET) {
310 addr4 = (struct sockaddr_in *)&addr;
311 nip = (void *)&addr4->sin_addr;
312 port = ntohs(addr4->sin_port);
313 } else if (addr.ss_family == AF_INET6) {
314 addr6 = (struct sockaddr_in6 *)&addr;
315 nip = (void *)&addr6->sin6_addr;
316 port = ntohs(addr6->sin6_port);
317 } else {
318 return;
319 }
320 const char *addr_str =
321 inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
322 printf(fmt, addr_str ? addr_str : "??", port);
323 }
324}
325
326static void print_local_ip_port(int sockfd, const char *fmt)
327{
328 print_ip_port(sockfd, getsockname, fmt);
329}
330
331static void print_remote_ip_port(int sockfd, const char *fmt)
332{
333 print_ip_port(sockfd, getpeername, fmt);
334}
335
336static int start_server(int type, const struct sockaddr_storage *addr,
337 socklen_t addr_len)
338{
339
340 int fd;
341
342 fd = socket(addr->ss_family, type, 0);
343 if (fd == -1) {
344 log_err("Failed to create server socket");
345 goto out;
346 }
347
348 if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
349 log_err("Failed to bind server socket");
350 goto close_out;
351 }
352
353 if (type == SOCK_STREAM) {
354 if (listen(fd, 128) == -1) {
355 log_err("Failed to listen on server socket");
356 goto close_out;
357 }
358 }
359
360 print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n");
361
362 goto out;
363close_out:
364 close(fd);
365 fd = -1;
366out:
367 return fd;
368}
369
370static int connect_to_server(int type, const struct sockaddr_storage *addr,
371 socklen_t addr_len)
372{
373 int domain;
374 int fd;
375
376 domain = addr->ss_family;
377
378 if (domain != AF_INET && domain != AF_INET6) {
379 log_err("Unsupported address family");
380 return -1;
381 }
382
383 fd = socket(domain, type, 0);
384 if (fd == -1) {
385 log_err("Failed to creating client socket");
386 return -1;
387 }
388
389 if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
390 log_err("Fail to connect to server");
391 goto err;
392 }
393
394 print_remote_ip_port(fd, "\t Actual: connect(%s, %d)");
395 print_local_ip_port(fd, " from (%s, %d)\n");
396
397 return 0;
398err:
399 close(fd);
400 return -1;
401}
402
403static void print_test_case_num(int domain, int type)
404{
405 static int test_num;
406
407 printf("Test case #%d (%s/%s):\n", ++test_num,
408 (domain == AF_INET ? "IPv4" :
409 domain == AF_INET6 ? "IPv6" :
410 "unknown_domain"),
411 (type == SOCK_STREAM ? "TCP" :
412 type == SOCK_DGRAM ? "UDP" :
413 "unknown_type"));
414}
415
416static int run_test_case(int domain, int type, const char *ip,
417 unsigned short port)
418{
419 struct sockaddr_storage addr;
420 socklen_t addr_len = sizeof(addr);
421 int servfd = -1;
422 int err = 0;
423
424 print_test_case_num(domain, type);
425
426 if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
427 addr_len) == -1)
428 return -1;
429
430 printf("\tRequested: bind(%s, %d) ..\n", ip, port);
431 servfd = start_server(type, &addr, addr_len);
432 if (servfd == -1)
433 goto err;
434
435 printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
436 if (connect_to_server(type, &addr, addr_len))
437 goto err;
438
439 goto out;
440err:
441 err = -1;
442out:
443 close(servfd);
444 return err;
445}
446
447static void close_progs_fds(struct program *progs, size_t prog_cnt)
448{
449 size_t i;
450
451 for (i = 0; i < prog_cnt; ++i) {
452 close(progs[i].fd);
453 progs[i].fd = -1;
454 }
455}
456
457static int load_and_attach_progs(int cgfd, struct program *progs,
458 size_t prog_cnt)
459{
460 size_t i;
461
462 for (i = 0; i < prog_cnt; ++i) {
463 printf("Load %s with invalid type (can pollute stderr) ",
464 progs[i].name);
465 fflush(stdout);
466 progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
467 if (progs[i].fd != -1) {
468 log_err("Load with invalid type accepted for %s",
469 progs[i].name);
470 goto err;
471 }
472 printf("... REJECTED\n");
473
474 printf("Load %s with valid type", progs[i].name);
475 progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
476 if (progs[i].fd == -1) {
477 log_err("Failed to load program %s", progs[i].name);
478 goto err;
479 }
480 printf(" ... OK\n");
481
482 printf("Attach %s with invalid type", progs[i].name);
483 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
484 BPF_F_ALLOW_OVERRIDE) != -1) {
485 log_err("Attach with invalid type accepted for %s",
486 progs[i].name);
487 goto err;
488 }
489 printf(" ... REJECTED\n");
490
491 printf("Attach %s with valid type", progs[i].name);
492 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
493 BPF_F_ALLOW_OVERRIDE) == -1) {
494 log_err("Failed to attach program %s", progs[i].name);
495 goto err;
496 }
497 printf(" ... OK\n");
498 }
499
500 return 0;
501err:
502 close_progs_fds(progs, prog_cnt);
503 return -1;
504}
505
506static int run_domain_test(int domain, int cgfd, struct program *progs,
507 size_t prog_cnt, const char *ip, unsigned short port)
508{
509 int err = 0;
510
511 if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
512 goto err;
513
514 if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
515 goto err;
516
517 if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
518 goto err;
519
520 goto out;
521err:
522 err = -1;
523out:
524 close_progs_fds(progs, prog_cnt);
525 return err;
526}
527
528static int run_test(void)
529{
530 size_t inet6_prog_cnt;
531 size_t inet_prog_cnt;
532 int cgfd = -1;
533 int err = 0;
534
535 struct program inet6_progs[] = {
536 {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
537 BPF_CGROUP_INET4_BIND},
538 {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
539 BPF_CGROUP_INET4_CONNECT},
540 };
541 inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
542
543 struct program inet_progs[] = {
544 {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
545 BPF_CGROUP_INET6_BIND},
546 {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
547 BPF_CGROUP_INET6_CONNECT},
548 };
549 inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
550
551 if (setup_cgroup_environment())
552 goto err;
553
554 cgfd = create_and_get_cgroup(CG_PATH);
555 if (!cgfd)
556 goto err;
557
558 if (join_cgroup(CG_PATH))
559 goto err;
560
561 if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
562 SERV4_PORT) == -1)
563 goto err;
564
565 if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
566 SERV6_IP, SERV6_PORT) == -1)
567 goto err;
568
569 goto out;
570err:
571 err = -1;
572out:
573 close(cgfd);
574 cleanup_cgroup_environment();
575 printf(err ? "### FAIL\n" : "### SUCCESS\n");
576 return err;
577}
578
579int main(int argc, char **argv)
580{
581 if (argc < 2) {
582 fprintf(stderr,
583 "%s has to be run via %s.sh. Skip direct run.\n",
584 argv[0], argv[0]);
585 exit(0);
586 }
587 return run_test();
588}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000000..c6e1dcf992c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
1#!/bin/sh
2
3set -eu
4
5ping_once()
6{
7 ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
8}
9
10wait_for_ip()
11{
12 local _i
13 echo -n "Wait for testing IPv4/IPv6 to become available "
14 for _i in $(seq ${MAX_PING_TRIES}); do
15 echo -n "."
16 if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
17 echo " OK"
18 return
19 fi
20 done
21 echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
22 exit 1
23}
24
25setup()
26{
27 # Create testing interfaces not to interfere with current environment.
28 ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
29 ip link set ${TEST_IF} up
30 ip link set ${TEST_IF_PEER} up
31
32 ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
33 ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
34 wait_for_ip
35}
36
37cleanup()
38{
39 ip link del ${TEST_IF} 2>/dev/null || :
40 ip link del ${TEST_IF_PEER} 2>/dev/null || :
41}
42
43main()
44{
45 trap cleanup EXIT 2 3 6 15
46 setup
47 ./test_sock_addr setup_done
48}
49
50BASENAME=$(basename $0 .sh)
51TEST_IF="${BASENAME}1"
52TEST_IF_PEER="${BASENAME}2"
53TEST_IPv4="127.0.0.4/8"
54TEST_IPv6="::6/128"
55MAX_PING_TRIES=5
56
57main