aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-03-30 20:17:57 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-03-30 20:18:07 -0400
commit7828f20e3779e4e85e55371e0e43f5006a15fb41 (patch)
tree48f4977b0b8e69bd6432b18556ad9ac7ca7728eb
parent807ae7daf5fb9ba9ef688344ae7c0d8cbebd211c (diff)
parent1d436885b23bf4474617914d7eb15e039c83ed99 (diff)
Merge branch 'bpf-cgroup-bind-connect'
Andrey Ignatov says: ==================== v2->v3: - rebase due to conflicts - fix ipv6=m build v1->v2: - support expected_attach_type at prog load time so that prog (incl. context accesses and calls to helpers) can be validated with regard to specific attach point it is supposed to be attached to. Later, at attach time, attach type is checked so that it must be same as at load time if it was provided - reworked hooks to rely on expected_attach_type, and reduced number of new prog types from 6 to just 1: BPF_PROG_TYPE_CGROUP_SOCK_ADDR - reused BPF_PROG_TYPE_CGROUP_SOCK for sys_bind post-hooks - add selftests for post-sys_bind hook For our container management we've been using complicated and fragile setup consisting of LD_PRELOAD wrapper intercepting bind and connect calls from all containerized applications. Unfortunately it doesn't work for apps that don't use glibc and changing all applications that run in the datacenter is not possible due to 3rd party code and libraries (despite being open source code) and sheer amount of legacy code that has to be rewritten (we're rewriting what we can in parallel) These applications are written without containers in mind and have builtin assumptions about network services. Like an application X expects to connect localhost:special_port and find service Y in there. To move application X and service Y into two different containers LD_PRELOAD approach is used to help one service connect to another without rewriting them. Moving these two applications into different L2 (netns) or L3 (vrf) network isolation scopes doesn't help to solve the problem, since applications need to see each other like they were running on the host without containers. So if app X and app Y would run in different netns something would need to punch a connectivity hole in those namespaces. That would be real layering violation (with corresponding network debugging pains), since clean l2, l3 abstraction would suddenly support something that breaks through the layers. Instead we used LD_PRELOAD (and now bpf programs) at bind/connect time to help applications discover and connect to each other. All applications are running in init_nens and there are no vrfs. After bind/connect the normal fib/neighbor core networking logic works as it should always do and the whole system is clean from network point of view and can be debugged with standard tools. We also considered resurrecting Hannes's afnetns work, but all hierarchical namespace abstraction don't work due to these builtin networking assumptions inside the apps. To run an application inside cgroup container that was not written with containers in mind we have to make an illusion of running in non-containerized environment. In some cases we remember the port and container id in the post-bind hook in a bpf map and when some other task in a different container is trying to connect to a service we need to know where this service is running. It can be remote and can be local. Both client and service may or may not be written with containers in mind and this sockaddr rewrite is providing connectivity and load balancing feature. BPF+cgroup looks to be the best solution for this problem. Hence we introduce 3 hooks: - at entry into sys_bind and sys_connect to let bpf prog look and modify 'struct sockaddr' provided by user space and fail bind/connect when appropriate - post sys_bind after port is allocated The approach works great and has zero overhead for anyone who doesn't use it and very low overhead when deployed. Different use case for this feature is to do low overhead firewall that doesn't need to inspect all packets and works at bind/connect time. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/bpf-cgroup.h68
-rw-r--r--include/linux/bpf.h5
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h11
-rw-r--r--include/net/addrconf.h7
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/udp.h1
-rw-r--r--include/uapi/linux/bpf.h51
-rw-r--r--kernel/bpf/cgroup.c39
-rw-r--r--kernel/bpf/syscall.c102
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/trace/bpf_trace.c27
-rw-r--r--net/core/filter.c442
-rw-r--r--net/ipv4/af_inet.c71
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/af_inet6.c66
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/udp.c20
-rw-r--r--tools/include/uapi/linux/bpf.h51
-rw-r--r--tools/lib/bpf/bpf.c44
-rw-r--r--tools/lib/bpf/bpf.h17
-rw-r--r--tools/lib/bpf/libbpf.c113
-rw-r--r--tools/lib/bpf/libbpf.h8
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/connect4_prog.c45
-rw-r--r--tools/testing/selftests/bpf/connect6_prog.c61
-rw-r--r--tools/testing/selftests/bpf/test_sock.c479
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c588
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh57
33 files changed, 2314 insertions, 132 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8a4566691c8f..30d15e64b993 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
6#include <uapi/linux/bpf.h> 6#include <uapi/linux/bpf.h>
7 7
8struct sock; 8struct sock;
9struct sockaddr;
9struct cgroup; 10struct cgroup;
10struct sk_buff; 11struct sk_buff;
11struct bpf_sock_ops_kern; 12struct bpf_sock_ops_kern;
@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
63int __cgroup_bpf_run_filter_sk(struct sock *sk, 64int __cgroup_bpf_run_filter_sk(struct sock *sk,
64 enum bpf_attach_type type); 65 enum bpf_attach_type type);
65 66
67int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
68 struct sockaddr *uaddr,
69 enum bpf_attach_type type);
70
66int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 71int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
67 struct bpf_sock_ops_kern *sock_ops, 72 struct bpf_sock_ops_kern *sock_ops,
68 enum bpf_attach_type type); 73 enum bpf_attach_type type);
@@ -93,16 +98,64 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
93 __ret; \ 98 __ret; \
94}) 99})
95 100
96#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ 101#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
97({ \ 102({ \
98 int __ret = 0; \ 103 int __ret = 0; \
99 if (cgroup_bpf_enabled) { \ 104 if (cgroup_bpf_enabled) { \
100 __ret = __cgroup_bpf_run_filter_sk(sk, \ 105 __ret = __cgroup_bpf_run_filter_sk(sk, type); \
101 BPF_CGROUP_INET_SOCK_CREATE); \ 106 } \
107 __ret; \
108})
109
110#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
111 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
112
113#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
114 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
115
116#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
117 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
118
119#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
120({ \
121 int __ret = 0; \
122 if (cgroup_bpf_enabled) \
123 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
124 __ret; \
125})
126
127#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
128({ \
129 int __ret = 0; \
130 if (cgroup_bpf_enabled) { \
131 lock_sock(sk); \
132 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
133 release_sock(sk); \
102 } \ 134 } \
103 __ret; \ 135 __ret; \
104}) 136})
105 137
138#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
139 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
140
141#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
142 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
143
144#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
145 sk->sk_prot->pre_connect)
146
147#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
148 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
149
150#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
151 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
152
153#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
154 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
155
156#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
157 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
158
106#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ 159#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
107({ \ 160({ \
108 int __ret = 0; \ 161 int __ret = 0; \
@@ -132,9 +185,18 @@ struct cgroup_bpf {};
132static inline void cgroup_bpf_put(struct cgroup *cgrp) {} 185static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
133static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 186static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
134 187
188#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
135#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) 189#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
136#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) 190#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
137#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) 191#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
192#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
193#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
194#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
195#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
196#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
197#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
198#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
199#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
138#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) 200#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
139#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) 201#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
140 202
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..95a7abd0ee92 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -208,12 +208,15 @@ struct bpf_prog_ops {
208 208
209struct bpf_verifier_ops { 209struct bpf_verifier_ops {
210 /* return eBPF function prototype for verification */ 210 /* return eBPF function prototype for verification */
211 const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); 211 const struct bpf_func_proto *
212 (*get_func_proto)(enum bpf_func_id func_id,
213 const struct bpf_prog *prog);
212 214
213 /* return true if 'size' wide access at offset 'off' within bpf_context 215 /* return true if 'size' wide access at offset 'off' within bpf_context
214 * with 'type' (read or write) is allowed 216 * with 'type' (read or write) is allowed
215 */ 217 */
216 bool (*is_valid_access)(int off, int size, enum bpf_access_type type, 218 bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
219 const struct bpf_prog *prog,
217 struct bpf_insn_access_aux *info); 220 struct bpf_insn_access_aux *info);
218 int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, 221 int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
219 const struct bpf_prog *prog); 222 const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 6d7243bfb0ff..2b28fcf6f6ae 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
8BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) 8BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
9BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) 9BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
10BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) 10BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
11BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
11BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) 12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) 13BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
13BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) 14BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 897ff3d95968..fc4e8f91b03d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,6 +469,7 @@ struct bpf_prog {
469 is_func:1, /* program is a bpf function */ 469 is_func:1, /* program is a bpf function */
470 kprobe_override:1; /* Do we override a kprobe? */ 470 kprobe_override:1; /* Do we override a kprobe? */
471 enum bpf_prog_type type; /* Type of BPF program */ 471 enum bpf_prog_type type; /* Type of BPF program */
472 enum bpf_attach_type expected_attach_type; /* For some prog types */
472 u32 len; /* Number of filter blocks */ 473 u32 len; /* Number of filter blocks */
473 u32 jited_len; /* Size of jited insns in bytes */ 474 u32 jited_len; /* Size of jited insns in bytes */
474 u8 tag[BPF_TAG_SIZE]; 475 u8 tag[BPF_TAG_SIZE];
@@ -1020,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
1020 return SKF_AD_MAX; 1021 return SKF_AD_MAX;
1021} 1022}
1022 1023
1024struct bpf_sock_addr_kern {
1025 struct sock *sk;
1026 struct sockaddr *uaddr;
1027 /* Temporary "register" to make indirect stores to nested structures
1028 * defined above. We need three registers to make such a store, but
1029 * only two (src and dst) are available at convert_ctx_access time
1030 */
1031 u64 tmp_reg;
1032};
1033
1023struct bpf_sock_ops_kern { 1034struct bpf_sock_ops_kern {
1024 struct sock *sk; 1035 struct sock *sk;
1025 u32 op; 1036 u32 op;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 132e5b95167a..378d601258be 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -231,6 +231,13 @@ struct ipv6_stub {
231}; 231};
232extern const struct ipv6_stub *ipv6_stub __read_mostly; 232extern const struct ipv6_stub *ipv6_stub __read_mostly;
233 233
234/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
235struct ipv6_bpf_stub {
236 int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
237 bool force_bind_address_no_port, bool with_lock);
238};
239extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
240
234/* 241/*
235 * identify MLD packets for MLD filter exceptions 242 * identify MLD packets for MLD filter exceptions
236 */ 243 */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 500f81375200..384b90c62c0b 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
32int inet_listen(struct socket *sock, int backlog); 32int inet_listen(struct socket *sock, int backlog);
33void inet_sock_destruct(struct sock *sk); 33void inet_sock_destruct(struct sock *sk);
34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 34int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
35int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
36 bool force_bind_address_no_port, bool with_lock);
35int inet_getname(struct socket *sock, struct sockaddr *uaddr, 37int inet_getname(struct socket *sock, struct sockaddr *uaddr,
36 int peer); 38 int peer);
37int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 39int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 50a6f0ddb878..2e5fedc56e59 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
1066void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); 1066void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
1067 1067
1068int inet6_release(struct socket *sock); 1068int inet6_release(struct socket *sock);
1069int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
1070 bool force_bind_address_no_port, bool with_lock);
1069int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 1071int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
1070int inet6_getname(struct socket *sock, struct sockaddr *uaddr, 1072int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
1071 int peer); 1073 int peer);
diff --git a/include/net/sock.h b/include/net/sock.h
index b8ff435fa96e..49bd2c1796b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1026struct proto { 1026struct proto {
1027 void (*close)(struct sock *sk, 1027 void (*close)(struct sock *sk,
1028 long timeout); 1028 long timeout);
1029 int (*pre_connect)(struct sock *sk,
1030 struct sockaddr *uaddr,
1031 int addr_len);
1029 int (*connect)(struct sock *sk, 1032 int (*connect)(struct sock *sk,
1030 struct sockaddr *uaddr, 1033 struct sockaddr *uaddr,
1031 int addr_len); 1034 int addr_len);
diff --git a/include/net/udp.h b/include/net/udp.h
index 850a8e581cce..0676b272f6ac 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
273int udp_rcv(struct sk_buff *skb); 273int udp_rcv(struct sk_buff *skb);
274int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); 274int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
275int udp_init_sock(struct sock *sk); 275int udp_init_sock(struct sock *sk);
276int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
276int __udp_disconnect(struct sock *sk, int flags); 277int __udp_disconnect(struct sock *sk, int flags);
277int udp_disconnect(struct sock *sk, int flags); 278int udp_disconnect(struct sock *sk, int flags);
278__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); 279__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1878201c2d77..c5ec89732a8d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
136 BPF_PROG_TYPE_CGROUP_DEVICE, 136 BPF_PROG_TYPE_CGROUP_DEVICE,
137 BPF_PROG_TYPE_SK_MSG, 137 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT, 138 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
139}; 140};
140 141
141enum bpf_attach_type { 142enum bpf_attach_type {
@@ -147,6 +148,12 @@ enum bpf_attach_type {
147 BPF_SK_SKB_STREAM_VERDICT, 148 BPF_SK_SKB_STREAM_VERDICT,
148 BPF_CGROUP_DEVICE, 149 BPF_CGROUP_DEVICE,
149 BPF_SK_MSG_VERDICT, 150 BPF_SK_MSG_VERDICT,
151 BPF_CGROUP_INET4_BIND,
152 BPF_CGROUP_INET6_BIND,
153 BPF_CGROUP_INET4_CONNECT,
154 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND,
150 __MAX_BPF_ATTACH_TYPE 157 __MAX_BPF_ATTACH_TYPE
151}; 158};
152 159
@@ -296,6 +303,11 @@ union bpf_attr {
296 __u32 prog_flags; 303 __u32 prog_flags;
297 char prog_name[BPF_OBJ_NAME_LEN]; 304 char prog_name[BPF_OBJ_NAME_LEN];
298 __u32 prog_ifindex; /* ifindex of netdev to prep for */ 305 __u32 prog_ifindex; /* ifindex of netdev to prep for */
306 /* For some prog types expected attach type must be known at
307 * load time to verify attach type specific parts of prog
308 * (context accesses, allowed helpers, etc).
309 */
310 __u32 expected_attach_type;
299 }; 311 };
300 312
301 struct { /* anonymous struct used by BPF_OBJ_* commands */ 313 struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -736,6 +748,13 @@ union bpf_attr {
736 * @flags: reserved for future use 748 * @flags: reserved for future use
737 * Return: SK_PASS 749 * Return: SK_PASS
738 * 750 *
751 * int bpf_bind(ctx, addr, addr_len)
752 * Bind socket to address. Only binding to IP is supported, no port can be
753 * set in addr.
754 * @ctx: pointer to context of type bpf_sock_addr
755 * @addr: pointer to struct sockaddr to bind socket to
756 * @addr_len: length of sockaddr structure
757 * Return: 0 on success or negative error code
739 */ 758 */
740#define __BPF_FUNC_MAPPER(FN) \ 759#define __BPF_FUNC_MAPPER(FN) \
741 FN(unspec), \ 760 FN(unspec), \
@@ -801,7 +820,8 @@ union bpf_attr {
801 FN(msg_redirect_map), \ 820 FN(msg_redirect_map), \
802 FN(msg_apply_bytes), \ 821 FN(msg_apply_bytes), \
803 FN(msg_cork_bytes), \ 822 FN(msg_cork_bytes), \
804 FN(msg_pull_data), 823 FN(msg_pull_data), \
824 FN(bind),
805 825
806/* integer value in 'imm' field of BPF_CALL instruction selects which helper 826/* integer value in 'imm' field of BPF_CALL instruction selects which helper
807 * function eBPF program intends to call 827 * function eBPF program intends to call
@@ -930,6 +950,15 @@ struct bpf_sock {
930 __u32 protocol; 950 __u32 protocol;
931 __u32 mark; 951 __u32 mark;
932 __u32 priority; 952 __u32 priority;
953 __u32 src_ip4; /* Allows 1,2,4-byte read.
954 * Stored in network byte order.
955 */
956 __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
957 * Stored in network byte order.
958 */
959 __u32 src_port; /* Allows 4-byte read.
960 * Stored in host byte order
961 */
933}; 962};
934 963
935#define XDP_PACKET_HEADROOM 256 964#define XDP_PACKET_HEADROOM 256
@@ -1005,6 +1034,26 @@ struct bpf_map_info {
1005 __u64 netns_ino; 1034 __u64 netns_ino;
1006} __attribute__((aligned(8))); 1035} __attribute__((aligned(8)));
1007 1036
1037/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
1038 * by user and intended to be used by socket (e.g. to bind to, depends on
1039 * attach attach type).
1040 */
1041struct bpf_sock_addr {
1042 __u32 user_family; /* Allows 4-byte read, but no write. */
1043 __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
1044 * Stored in network byte order.
1045 */
1046 __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
1047 * Stored in network byte order.
1048 */
1049 __u32 user_port; /* Allows 4-byte read and write.
1050 * Stored in network byte order
1051 */
1052 __u32 family; /* Allows 4-byte read, but no write */
1053 __u32 type; /* Allows 4-byte read, but no write */
1054 __u32 protocol; /* Allows 4-byte read, but no write */
1055};
1056
1008/* User bpf_sock_ops struct to access socket values and specify request ops 1057/* User bpf_sock_ops struct to access socket values and specify request ops
1009 * and their replies. 1058 * and their replies.
1010 * Some of this fields are in network (bigendian) byte order and may need 1059 * Some of this fields are in network (bigendian) byte order and may need
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c1c0b60d3f2f..43171a0bb02b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -495,6 +495,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
495EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 495EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
496 496
497/** 497/**
498 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
499 * provided by user sockaddr
500 * @sk: sock struct that will use sockaddr
501 * @uaddr: sockaddr struct provided by user
502 * @type: The type of program to be exectuted
503 *
504 * socket is expected to be of type INET or INET6.
505 *
506 * This function will return %-EPERM if an attached program is found and
507 * returned value != 1 during execution. In all other cases, 0 is returned.
508 */
509int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
510 struct sockaddr *uaddr,
511 enum bpf_attach_type type)
512{
513 struct bpf_sock_addr_kern ctx = {
514 .sk = sk,
515 .uaddr = uaddr,
516 };
517 struct cgroup *cgrp;
518 int ret;
519
520 /* Check socket family since not all sockets represent network
521 * endpoint (e.g. AF_UNIX).
522 */
523 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
524 return 0;
525
526 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
527 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
528
529 return ret == 1 ? 0 : -EPERM;
530}
531EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
532
533/**
498 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 534 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
499 * @sk: socket to get cgroup from 535 * @sk: socket to get cgroup from
500 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 536 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
545EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); 581EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
546 582
547static const struct bpf_func_proto * 583static const struct bpf_func_proto *
548cgroup_dev_func_proto(enum bpf_func_id func_id) 584cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
549{ 585{
550 switch (func_id) { 586 switch (func_id) {
551 case BPF_FUNC_map_lookup_elem: 587 case BPF_FUNC_map_lookup_elem:
@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
566 602
567static bool cgroup_dev_is_valid_access(int off, int size, 603static bool cgroup_dev_is_valid_access(int off, int size,
568 enum bpf_access_type type, 604 enum bpf_access_type type,
605 const struct bpf_prog *prog,
569 struct bpf_insn_access_aux *info) 606 struct bpf_insn_access_aux *info)
570{ 607{
571 const int size_default = sizeof(__u32); 608 const int size_default = sizeof(__u32);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 95ca2523fa6e..0244973ee544 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1171,8 +1171,75 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
1171} 1171}
1172EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1172EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
1173 1173
1174/* Initially all BPF programs could be loaded w/o specifying
1175 * expected_attach_type. Later for some of them specifying expected_attach_type
1176 * at load time became required so that program could be validated properly.
1177 * Programs of types that are allowed to be loaded both w/ and w/o (for
1178 * backward compatibility) expected_attach_type, should have the default attach
1179 * type assigned to expected_attach_type for the latter case, so that it can be
1180 * validated later at attach time.
1181 *
1182 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1183 * prog type requires it but has some attach types that have to be backward
1184 * compatible.
1185 */
1186static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
1187{
1188 switch (attr->prog_type) {
1189 case BPF_PROG_TYPE_CGROUP_SOCK:
1190 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
1191 * exist so checking for non-zero is the way to go here.
1192 */
1193 if (!attr->expected_attach_type)
1194 attr->expected_attach_type =
1195 BPF_CGROUP_INET_SOCK_CREATE;
1196 break;
1197 }
1198}
1199
1200static int
1201bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
1202 enum bpf_attach_type expected_attach_type)
1203{
1204 switch (prog_type) {
1205 case BPF_PROG_TYPE_CGROUP_SOCK:
1206 switch (expected_attach_type) {
1207 case BPF_CGROUP_INET_SOCK_CREATE:
1208 case BPF_CGROUP_INET4_POST_BIND:
1209 case BPF_CGROUP_INET6_POST_BIND:
1210 return 0;
1211 default:
1212 return -EINVAL;
1213 }
1214 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1215 switch (expected_attach_type) {
1216 case BPF_CGROUP_INET4_BIND:
1217 case BPF_CGROUP_INET6_BIND:
1218 case BPF_CGROUP_INET4_CONNECT:
1219 case BPF_CGROUP_INET6_CONNECT:
1220 return 0;
1221 default:
1222 return -EINVAL;
1223 }
1224 default:
1225 return 0;
1226 }
1227}
1228
1229static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
1230 enum bpf_attach_type attach_type)
1231{
1232 switch (prog->type) {
1233 case BPF_PROG_TYPE_CGROUP_SOCK:
1234 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1235 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
1236 default:
1237 return 0;
1238 }
1239}
1240
1174/* last field in 'union bpf_attr' used by this command */ 1241/* last field in 'union bpf_attr' used by this command */
1175#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1242#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
1176 1243
1177static int bpf_prog_load(union bpf_attr *attr) 1244static int bpf_prog_load(union bpf_attr *attr)
1178{ 1245{
@@ -1209,11 +1276,17 @@ static int bpf_prog_load(union bpf_attr *attr)
1209 !capable(CAP_SYS_ADMIN)) 1276 !capable(CAP_SYS_ADMIN))
1210 return -EPERM; 1277 return -EPERM;
1211 1278
1279 bpf_prog_load_fixup_attach_type(attr);
1280 if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
1281 return -EINVAL;
1282
1212 /* plain bpf_prog allocation */ 1283 /* plain bpf_prog allocation */
1213 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1284 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
1214 if (!prog) 1285 if (!prog)
1215 return -ENOMEM; 1286 return -ENOMEM;
1216 1287
1288 prog->expected_attach_type = attr->expected_attach_type;
1289
1217 prog->aux->offload_requested = !!attr->prog_ifindex; 1290 prog->aux->offload_requested = !!attr->prog_ifindex;
1218 1291
1219 err = security_bpf_prog_alloc(prog->aux); 1292 err = security_bpf_prog_alloc(prog->aux);
@@ -1453,8 +1526,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1453 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1526 ptype = BPF_PROG_TYPE_CGROUP_SKB;
1454 break; 1527 break;
1455 case BPF_CGROUP_INET_SOCK_CREATE: 1528 case BPF_CGROUP_INET_SOCK_CREATE:
1529 case BPF_CGROUP_INET4_POST_BIND:
1530 case BPF_CGROUP_INET6_POST_BIND:
1456 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1531 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1457 break; 1532 break;
1533 case BPF_CGROUP_INET4_BIND:
1534 case BPF_CGROUP_INET6_BIND:
1535 case BPF_CGROUP_INET4_CONNECT:
1536 case BPF_CGROUP_INET6_CONNECT:
1537 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1538 break;
1458 case BPF_CGROUP_SOCK_OPS: 1539 case BPF_CGROUP_SOCK_OPS:
1459 ptype = BPF_PROG_TYPE_SOCK_OPS; 1540 ptype = BPF_PROG_TYPE_SOCK_OPS;
1460 break; 1541 break;
@@ -1474,6 +1555,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1474 if (IS_ERR(prog)) 1555 if (IS_ERR(prog))
1475 return PTR_ERR(prog); 1556 return PTR_ERR(prog);
1476 1557
1558 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
1559 bpf_prog_put(prog);
1560 return -EINVAL;
1561 }
1562
1477 cgrp = cgroup_get_from_fd(attr->target_fd); 1563 cgrp = cgroup_get_from_fd(attr->target_fd);
1478 if (IS_ERR(cgrp)) { 1564 if (IS_ERR(cgrp)) {
1479 bpf_prog_put(prog); 1565 bpf_prog_put(prog);
@@ -1510,8 +1596,16 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1510 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1596 ptype = BPF_PROG_TYPE_CGROUP_SKB;
1511 break; 1597 break;
1512 case BPF_CGROUP_INET_SOCK_CREATE: 1598 case BPF_CGROUP_INET_SOCK_CREATE:
1599 case BPF_CGROUP_INET4_POST_BIND:
1600 case BPF_CGROUP_INET6_POST_BIND:
1513 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1601 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1514 break; 1602 break;
1603 case BPF_CGROUP_INET4_BIND:
1604 case BPF_CGROUP_INET6_BIND:
1605 case BPF_CGROUP_INET4_CONNECT:
1606 case BPF_CGROUP_INET6_CONNECT:
1607 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1608 break;
1515 case BPF_CGROUP_SOCK_OPS: 1609 case BPF_CGROUP_SOCK_OPS:
1516 ptype = BPF_PROG_TYPE_SOCK_OPS; 1610 ptype = BPF_PROG_TYPE_SOCK_OPS;
1517 break; 1611 break;
@@ -1561,6 +1655,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
1561 case BPF_CGROUP_INET_INGRESS: 1655 case BPF_CGROUP_INET_INGRESS:
1562 case BPF_CGROUP_INET_EGRESS: 1656 case BPF_CGROUP_INET_EGRESS:
1563 case BPF_CGROUP_INET_SOCK_CREATE: 1657 case BPF_CGROUP_INET_SOCK_CREATE:
1658 case BPF_CGROUP_INET4_BIND:
1659 case BPF_CGROUP_INET6_BIND:
1660 case BPF_CGROUP_INET4_POST_BIND:
1661 case BPF_CGROUP_INET6_POST_BIND:
1662 case BPF_CGROUP_INET4_CONNECT:
1663 case BPF_CGROUP_INET6_CONNECT:
1564 case BPF_CGROUP_SOCK_OPS: 1664 case BPF_CGROUP_SOCK_OPS:
1565 case BPF_CGROUP_DEVICE: 1665 case BPF_CGROUP_DEVICE:
1566 break; 1666 break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8acd2207e412..5dd1dcb902bf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1323,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
1323 }; 1323 };
1324 1324
1325 if (env->ops->is_valid_access && 1325 if (env->ops->is_valid_access &&
1326 env->ops->is_valid_access(off, size, t, &info)) { 1326 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1327 /* A non zero info.ctx_field_size indicates that this field is a 1327 /* A non zero info.ctx_field_size indicates that this field is a
1328 * candidate for later verifier transformation to load the whole 1328 * candidate for later verifier transformation to load the whole
1329 * field and then apply a mask when accessed with a narrower 1329 * field and then apply a mask when accessed with a narrower
@@ -2349,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2349 } 2349 }
2350 2350
2351 if (env->ops->get_func_proto) 2351 if (env->ops->get_func_proto)
2352 fn = env->ops->get_func_proto(func_id); 2352 fn = env->ops->get_func_proto(func_id, env->prog);
2353 if (!fn) { 2353 if (!fn) {
2354 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 2354 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2355 func_id); 2355 func_id);
@@ -3887,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
3887 switch (env->prog->type) { 3887 switch (env->prog->type) {
3888 case BPF_PROG_TYPE_CGROUP_SKB: 3888 case BPF_PROG_TYPE_CGROUP_SKB:
3889 case BPF_PROG_TYPE_CGROUP_SOCK: 3889 case BPF_PROG_TYPE_CGROUP_SOCK:
3890 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3890 case BPF_PROG_TYPE_SOCK_OPS: 3891 case BPF_PROG_TYPE_SOCK_OPS:
3891 case BPF_PROG_TYPE_CGROUP_DEVICE: 3892 case BPF_PROG_TYPE_CGROUP_DEVICE:
3892 break; 3893 break;
@@ -5572,7 +5573,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
5572 insn = new_prog->insnsi + i + delta; 5573 insn = new_prog->insnsi + i + delta;
5573 } 5574 }
5574patch_call_imm: 5575patch_call_imm:
5575 fn = env->ops->get_func_proto(insn->imm); 5576 fn = env->ops->get_func_proto(insn->imm, env->prog);
5576 /* all functions that have prototype and verifier allowed 5577 /* all functions that have prototype and verifier allowed
5577 * programs to call them, must be real in-kernel functions 5578 * programs to call them, must be real in-kernel functions
5578 */ 5579 */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 463e72d18c4c..d88e96d4e12c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
524 .arg3_type = ARG_ANYTHING, 524 .arg3_type = ARG_ANYTHING,
525}; 525};
526 526
527static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) 527static const struct bpf_func_proto *
528tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
528{ 529{
529 switch (func_id) { 530 switch (func_id) {
530 case BPF_FUNC_map_lookup_elem: 531 case BPF_FUNC_map_lookup_elem:
@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
568 } 569 }
569} 570}
570 571
571static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) 572static const struct bpf_func_proto *
573kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
572{ 574{
573 switch (func_id) { 575 switch (func_id) {
574 case BPF_FUNC_perf_event_output: 576 case BPF_FUNC_perf_event_output:
@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
582 return &bpf_override_return_proto; 584 return &bpf_override_return_proto;
583#endif 585#endif
584 default: 586 default:
585 return tracing_func_proto(func_id); 587 return tracing_func_proto(func_id, prog);
586 } 588 }
587} 589}
588 590
589/* bpf+kprobe programs can access fields of 'struct pt_regs' */ 591/* bpf+kprobe programs can access fields of 'struct pt_regs' */
590static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 592static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
593 const struct bpf_prog *prog,
591 struct bpf_insn_access_aux *info) 594 struct bpf_insn_access_aux *info)
592{ 595{
593 if (off < 0 || off >= sizeof(struct pt_regs)) 596 if (off < 0 || off >= sizeof(struct pt_regs))
@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
661 .arg3_type = ARG_ANYTHING, 664 .arg3_type = ARG_ANYTHING,
662}; 665};
663 666
664static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) 667static const struct bpf_func_proto *
668tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
665{ 669{
666 switch (func_id) { 670 switch (func_id) {
667 case BPF_FUNC_perf_event_output: 671 case BPF_FUNC_perf_event_output:
@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
669 case BPF_FUNC_get_stackid: 673 case BPF_FUNC_get_stackid:
670 return &bpf_get_stackid_proto_tp; 674 return &bpf_get_stackid_proto_tp;
671 default: 675 default:
672 return tracing_func_proto(func_id); 676 return tracing_func_proto(func_id, prog);
673 } 677 }
674} 678}
675 679
676static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 680static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
681 const struct bpf_prog *prog,
677 struct bpf_insn_access_aux *info) 682 struct bpf_insn_access_aux *info)
678{ 683{
679 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 684 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
721 .arg3_type = ARG_CONST_SIZE, 726 .arg3_type = ARG_CONST_SIZE,
722}; 727};
723 728
724static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id) 729static const struct bpf_func_proto *
730pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
725{ 731{
726 switch (func_id) { 732 switch (func_id) {
727 case BPF_FUNC_perf_event_output: 733 case BPF_FUNC_perf_event_output:
@@ -731,7 +737,7 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
731 case BPF_FUNC_perf_prog_read_value: 737 case BPF_FUNC_perf_prog_read_value:
732 return &bpf_perf_prog_read_value_proto; 738 return &bpf_perf_prog_read_value_proto;
733 default: 739 default:
734 return tracing_func_proto(func_id); 740 return tracing_func_proto(func_id, prog);
735 } 741 }
736} 742}
737 743
@@ -781,7 +787,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
781 .arg3_type = ARG_ANYTHING, 787 .arg3_type = ARG_ANYTHING,
782}; 788};
783 789
784static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id) 790static const struct bpf_func_proto *
791raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
785{ 792{
786 switch (func_id) { 793 switch (func_id) {
787 case BPF_FUNC_perf_event_output: 794 case BPF_FUNC_perf_event_output:
@@ -789,12 +796,13 @@ static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func
789 case BPF_FUNC_get_stackid: 796 case BPF_FUNC_get_stackid:
790 return &bpf_get_stackid_proto_raw_tp; 797 return &bpf_get_stackid_proto_raw_tp;
791 default: 798 default:
792 return tracing_func_proto(func_id); 799 return tracing_func_proto(func_id, prog);
793 } 800 }
794} 801}
795 802
796static bool raw_tp_prog_is_valid_access(int off, int size, 803static bool raw_tp_prog_is_valid_access(int off, int size,
797 enum bpf_access_type type, 804 enum bpf_access_type type,
805 const struct bpf_prog *prog,
798 struct bpf_insn_access_aux *info) 806 struct bpf_insn_access_aux *info)
799{ 807{
800 /* largest tracepoint in the kernel has 12 args */ 808 /* largest tracepoint in the kernel has 12 args */
@@ -816,6 +824,7 @@ const struct bpf_prog_ops raw_tracepoint_prog_ops = {
816}; 824};
817 825
818static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 826static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
827 const struct bpf_prog *prog,
819 struct bpf_insn_access_aux *info) 828 struct bpf_insn_access_aux *info)
820{ 829{
821 const int size_u64 = sizeof(u64); 830 const int size_u64 = sizeof(u64);
diff --git a/net/core/filter.c b/net/core/filter.c
index e989bf313195..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
33#include <linux/if_packet.h> 33#include <linux/if_packet.h>
34#include <linux/if_arp.h> 34#include <linux/if_arp.h>
35#include <linux/gfp.h> 35#include <linux/gfp.h>
36#include <net/inet_common.h>
36#include <net/ip.h> 37#include <net/ip.h>
37#include <net/protocol.h> 38#include <net/protocol.h>
38#include <net/netlink.h> 39#include <net/netlink.h>
@@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
3656 .arg2_type = ARG_ANYTHING, 3657 .arg2_type = ARG_ANYTHING,
3657}; 3658};
3658 3659
3660const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
3661EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
3662
3663BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
3664 int, addr_len)
3665{
3666#ifdef CONFIG_INET
3667 struct sock *sk = ctx->sk;
3668 int err;
3669
3670 /* Binding to port can be expensive so it's prohibited in the helper.
3671 * Only binding to IP is supported.
3672 */
3673 err = -EINVAL;
3674 if (addr->sa_family == AF_INET) {
3675 if (addr_len < sizeof(struct sockaddr_in))
3676 return err;
3677 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
3678 return err;
3679 return __inet_bind(sk, addr, addr_len, true, false);
3680#if IS_ENABLED(CONFIG_IPV6)
3681 } else if (addr->sa_family == AF_INET6) {
3682 if (addr_len < SIN6_LEN_RFC2133)
3683 return err;
3684 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
3685 return err;
3686 /* ipv6_bpf_stub cannot be NULL, since it's called from
3687 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
3688 */
3689 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
3690#endif /* CONFIG_IPV6 */
3691 }
3692#endif /* CONFIG_INET */
3693
3694 return -EAFNOSUPPORT;
3695}
3696
3697static const struct bpf_func_proto bpf_bind_proto = {
3698 .func = bpf_bind,
3699 .gpl_only = false,
3700 .ret_type = RET_INTEGER,
3701 .arg1_type = ARG_PTR_TO_CTX,
3702 .arg2_type = ARG_PTR_TO_MEM,
3703 .arg3_type = ARG_CONST_SIZE,
3704};
3705
3659static const struct bpf_func_proto * 3706static const struct bpf_func_proto *
3660bpf_base_func_proto(enum bpf_func_id func_id) 3707bpf_base_func_proto(enum bpf_func_id func_id)
3661{ 3708{
@@ -3685,7 +3732,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
3685} 3732}
3686 3733
3687static const struct bpf_func_proto * 3734static const struct bpf_func_proto *
3688sock_filter_func_proto(enum bpf_func_id func_id) 3735sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3689{ 3736{
3690 switch (func_id) { 3737 switch (func_id) {
3691 /* inet and inet6 sockets are created in a process 3738 /* inet and inet6 sockets are created in a process
@@ -3699,7 +3746,29 @@ sock_filter_func_proto(enum bpf_func_id func_id)
3699} 3746}
3700 3747
3701static const struct bpf_func_proto * 3748static const struct bpf_func_proto *
3702sk_filter_func_proto(enum bpf_func_id func_id) 3749sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3750{
3751 switch (func_id) {
3752 /* inet and inet6 sockets are created in a process
3753 * context so there is always a valid uid/gid
3754 */
3755 case BPF_FUNC_get_current_uid_gid:
3756 return &bpf_get_current_uid_gid_proto;
3757 case BPF_FUNC_bind:
3758 switch (prog->expected_attach_type) {
3759 case BPF_CGROUP_INET4_CONNECT:
3760 case BPF_CGROUP_INET6_CONNECT:
3761 return &bpf_bind_proto;
3762 default:
3763 return NULL;
3764 }
3765 default:
3766 return bpf_base_func_proto(func_id);
3767 }
3768}
3769
3770static const struct bpf_func_proto *
3771sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3703{ 3772{
3704 switch (func_id) { 3773 switch (func_id) {
3705 case BPF_FUNC_skb_load_bytes: 3774 case BPF_FUNC_skb_load_bytes:
@@ -3714,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
3714} 3783}
3715 3784
3716static const struct bpf_func_proto * 3785static const struct bpf_func_proto *
3717tc_cls_act_func_proto(enum bpf_func_id func_id) 3786tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3718{ 3787{
3719 switch (func_id) { 3788 switch (func_id) {
3720 case BPF_FUNC_skb_store_bytes: 3789 case BPF_FUNC_skb_store_bytes:
@@ -3781,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
3781} 3850}
3782 3851
3783static const struct bpf_func_proto * 3852static const struct bpf_func_proto *
3784xdp_func_proto(enum bpf_func_id func_id) 3853xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3785{ 3854{
3786 switch (func_id) { 3855 switch (func_id) {
3787 case BPF_FUNC_perf_event_output: 3856 case BPF_FUNC_perf_event_output:
@@ -3804,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
3804} 3873}
3805 3874
3806static const struct bpf_func_proto * 3875static const struct bpf_func_proto *
3807lwt_inout_func_proto(enum bpf_func_id func_id) 3876lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3808{ 3877{
3809 switch (func_id) { 3878 switch (func_id) {
3810 case BPF_FUNC_skb_load_bytes: 3879 case BPF_FUNC_skb_load_bytes:
@@ -3831,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
3831} 3900}
3832 3901
3833static const struct bpf_func_proto * 3902static const struct bpf_func_proto *
3834 sock_ops_func_proto(enum bpf_func_id func_id) 3903sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3835{ 3904{
3836 switch (func_id) { 3905 switch (func_id) {
3837 case BPF_FUNC_setsockopt: 3906 case BPF_FUNC_setsockopt:
@@ -3847,7 +3916,8 @@ static const struct bpf_func_proto *
3847 } 3916 }
3848} 3917}
3849 3918
3850static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) 3919static const struct bpf_func_proto *
3920sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3851{ 3921{
3852 switch (func_id) { 3922 switch (func_id) {
3853 case BPF_FUNC_msg_redirect_map: 3923 case BPF_FUNC_msg_redirect_map:
@@ -3863,7 +3933,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
3863 } 3933 }
3864} 3934}
3865 3935
3866static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) 3936static const struct bpf_func_proto *
3937sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3867{ 3938{
3868 switch (func_id) { 3939 switch (func_id) {
3869 case BPF_FUNC_skb_store_bytes: 3940 case BPF_FUNC_skb_store_bytes:
@@ -3888,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
3888} 3959}
3889 3960
3890static const struct bpf_func_proto * 3961static const struct bpf_func_proto *
3891lwt_xmit_func_proto(enum bpf_func_id func_id) 3962lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3892{ 3963{
3893 switch (func_id) { 3964 switch (func_id) {
3894 case BPF_FUNC_skb_get_tunnel_key: 3965 case BPF_FUNC_skb_get_tunnel_key:
@@ -3918,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
3918 case BPF_FUNC_set_hash_invalid: 3989 case BPF_FUNC_set_hash_invalid:
3919 return &bpf_set_hash_invalid_proto; 3990 return &bpf_set_hash_invalid_proto;
3920 default: 3991 default:
3921 return lwt_inout_func_proto(func_id); 3992 return lwt_inout_func_proto(func_id, prog);
3922 } 3993 }
3923} 3994}
3924 3995
3925static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, 3996static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
3997 const struct bpf_prog *prog,
3926 struct bpf_insn_access_aux *info) 3998 struct bpf_insn_access_aux *info)
3927{ 3999{
3928 const int size_default = sizeof(__u32); 4000 const int size_default = sizeof(__u32);
@@ -3966,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
3966 4038
3967static bool sk_filter_is_valid_access(int off, int size, 4039static bool sk_filter_is_valid_access(int off, int size,
3968 enum bpf_access_type type, 4040 enum bpf_access_type type,
4041 const struct bpf_prog *prog,
3969 struct bpf_insn_access_aux *info) 4042 struct bpf_insn_access_aux *info)
3970{ 4043{
3971 switch (off) { 4044 switch (off) {
@@ -3986,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
3986 } 4059 }
3987 } 4060 }
3988 4061
3989 return bpf_skb_is_valid_access(off, size, type, info); 4062 return bpf_skb_is_valid_access(off, size, type, prog, info);
3990} 4063}
3991 4064
3992static bool lwt_is_valid_access(int off, int size, 4065static bool lwt_is_valid_access(int off, int size,
3993 enum bpf_access_type type, 4066 enum bpf_access_type type,
4067 const struct bpf_prog *prog,
3994 struct bpf_insn_access_aux *info) 4068 struct bpf_insn_access_aux *info)
3995{ 4069{
3996 switch (off) { 4070 switch (off) {
@@ -4020,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
4020 break; 4094 break;
4021 } 4095 }
4022 4096
4023 return bpf_skb_is_valid_access(off, size, type, info); 4097 return bpf_skb_is_valid_access(off, size, type, prog, info);
4024} 4098}
4025 4099
4026static bool sock_filter_is_valid_access(int off, int size, 4100
4027 enum bpf_access_type type, 4101/* Attach type specific accesses */
4028 struct bpf_insn_access_aux *info) 4102static bool __sock_filter_check_attach_type(int off,
4103 enum bpf_access_type access_type,
4104 enum bpf_attach_type attach_type)
4029{ 4105{
4030 if (type == BPF_WRITE) { 4106 switch (off) {
4031 switch (off) { 4107 case offsetof(struct bpf_sock, bound_dev_if):
4032 case offsetof(struct bpf_sock, bound_dev_if): 4108 case offsetof(struct bpf_sock, mark):
4033 case offsetof(struct bpf_sock, mark): 4109 case offsetof(struct bpf_sock, priority):
4034 case offsetof(struct bpf_sock, priority): 4110 switch (attach_type) {
4035 break; 4111 case BPF_CGROUP_INET_SOCK_CREATE:
4112 goto full_access;
4113 default:
4114 return false;
4115 }
4116 case bpf_ctx_range(struct bpf_sock, src_ip4):
4117 switch (attach_type) {
4118 case BPF_CGROUP_INET4_POST_BIND:
4119 goto read_only;
4120 default:
4121 return false;
4122 }
4123 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4124 switch (attach_type) {
4125 case BPF_CGROUP_INET6_POST_BIND:
4126 goto read_only;
4036 default: 4127 default:
4037 return false; 4128 return false;
4038 } 4129 }
4130 case bpf_ctx_range(struct bpf_sock, src_port):
4131 switch (attach_type) {
4132 case BPF_CGROUP_INET4_POST_BIND:
4133 case BPF_CGROUP_INET6_POST_BIND:
4134 goto read_only;
4135 default:
4136 return false;
4137 }
4138 }
4139read_only:
4140 return access_type == BPF_READ;
4141full_access:
4142 return true;
4143}
4144
4145static bool __sock_filter_check_size(int off, int size,
4146 struct bpf_insn_access_aux *info)
4147{
4148 const int size_default = sizeof(__u32);
4149
4150 switch (off) {
4151 case bpf_ctx_range(struct bpf_sock, src_ip4):
4152 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4153 bpf_ctx_record_field_size(info, size_default);
4154 return bpf_ctx_narrow_access_ok(off, size, size_default);
4039 } 4155 }
4040 4156
4041 if (off < 0 || off + size > sizeof(struct bpf_sock)) 4157 return size == size_default;
4158}
4159
4160static bool sock_filter_is_valid_access(int off, int size,
4161 enum bpf_access_type type,
4162 const struct bpf_prog *prog,
4163 struct bpf_insn_access_aux *info)
4164{
4165 if (off < 0 || off >= sizeof(struct bpf_sock))
4042 return false; 4166 return false;
4043 /* The verifier guarantees that size > 0. */
4044 if (off % size != 0) 4167 if (off % size != 0)
4045 return false; 4168 return false;
4046 if (size != sizeof(__u32)) 4169 if (!__sock_filter_check_attach_type(off, type,
4170 prog->expected_attach_type))
4171 return false;
4172 if (!__sock_filter_check_size(off, size, info))
4047 return false; 4173 return false;
4048
4049 return true; 4174 return true;
4050} 4175}
4051 4176
@@ -4096,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
4096 4221
4097static bool tc_cls_act_is_valid_access(int off, int size, 4222static bool tc_cls_act_is_valid_access(int off, int size,
4098 enum bpf_access_type type, 4223 enum bpf_access_type type,
4224 const struct bpf_prog *prog,
4099 struct bpf_insn_access_aux *info) 4225 struct bpf_insn_access_aux *info)
4100{ 4226{
4101 if (type == BPF_WRITE) { 4227 if (type == BPF_WRITE) {
@@ -4125,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
4125 return false; 4251 return false;
4126 } 4252 }
4127 4253
4128 return bpf_skb_is_valid_access(off, size, type, info); 4254 return bpf_skb_is_valid_access(off, size, type, prog, info);
4129} 4255}
4130 4256
4131static bool __is_valid_xdp_access(int off, int size) 4257static bool __is_valid_xdp_access(int off, int size)
@@ -4142,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
4142 4268
4143static bool xdp_is_valid_access(int off, int size, 4269static bool xdp_is_valid_access(int off, int size,
4144 enum bpf_access_type type, 4270 enum bpf_access_type type,
4271 const struct bpf_prog *prog,
4145 struct bpf_insn_access_aux *info) 4272 struct bpf_insn_access_aux *info)
4146{ 4273{
4147 if (type == BPF_WRITE) 4274 if (type == BPF_WRITE)
@@ -4172,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
4172} 4299}
4173EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 4300EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
4174 4301
4302static bool sock_addr_is_valid_access(int off, int size,
4303 enum bpf_access_type type,
4304 const struct bpf_prog *prog,
4305 struct bpf_insn_access_aux *info)
4306{
4307 const int size_default = sizeof(__u32);
4308
4309 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
4310 return false;
4311 if (off % size != 0)
4312 return false;
4313
4314 /* Disallow access to IPv6 fields from IPv4 contex and vise
4315 * versa.
4316 */
4317 switch (off) {
4318 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4319 switch (prog->expected_attach_type) {
4320 case BPF_CGROUP_INET4_BIND:
4321 case BPF_CGROUP_INET4_CONNECT:
4322 break;
4323 default:
4324 return false;
4325 }
4326 break;
4327 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4328 switch (prog->expected_attach_type) {
4329 case BPF_CGROUP_INET6_BIND:
4330 case BPF_CGROUP_INET6_CONNECT:
4331 break;
4332 default:
4333 return false;
4334 }
4335 break;
4336 }
4337
4338 switch (off) {
4339 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4340 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4341 /* Only narrow read access allowed for now. */
4342 if (type == BPF_READ) {
4343 bpf_ctx_record_field_size(info, size_default);
4344 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
4345 return false;
4346 } else {
4347 if (size != size_default)
4348 return false;
4349 }
4350 break;
4351 case bpf_ctx_range(struct bpf_sock_addr, user_port):
4352 if (size != size_default)
4353 return false;
4354 break;
4355 default:
4356 if (type == BPF_READ) {
4357 if (size != size_default)
4358 return false;
4359 } else {
4360 return false;
4361 }
4362 }
4363
4364 return true;
4365}
4366
4175static bool sock_ops_is_valid_access(int off, int size, 4367static bool sock_ops_is_valid_access(int off, int size,
4176 enum bpf_access_type type, 4368 enum bpf_access_type type,
4369 const struct bpf_prog *prog,
4177 struct bpf_insn_access_aux *info) 4370 struct bpf_insn_access_aux *info)
4178{ 4371{
4179 const int size_default = sizeof(__u32); 4372 const int size_default = sizeof(__u32);
@@ -4220,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
4220 4413
4221static bool sk_skb_is_valid_access(int off, int size, 4414static bool sk_skb_is_valid_access(int off, int size,
4222 enum bpf_access_type type, 4415 enum bpf_access_type type,
4416 const struct bpf_prog *prog,
4223 struct bpf_insn_access_aux *info) 4417 struct bpf_insn_access_aux *info)
4224{ 4418{
4225 switch (off) { 4419 switch (off) {
@@ -4249,11 +4443,12 @@ static bool sk_skb_is_valid_access(int off, int size,
4249 break; 4443 break;
4250 } 4444 }
4251 4445
4252 return bpf_skb_is_valid_access(off, size, type, info); 4446 return bpf_skb_is_valid_access(off, size, type, prog, info);
4253} 4447}
4254 4448
4255static bool sk_msg_is_valid_access(int off, int size, 4449static bool sk_msg_is_valid_access(int off, int size,
4256 enum bpf_access_type type, 4450 enum bpf_access_type type,
4451 const struct bpf_prog *prog,
4257 struct bpf_insn_access_aux *info) 4452 struct bpf_insn_access_aux *info)
4258{ 4453{
4259 if (type == BPF_WRITE) 4454 if (type == BPF_WRITE)
@@ -4583,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
4583 struct bpf_prog *prog, u32 *target_size) 4778 struct bpf_prog *prog, u32 *target_size)
4584{ 4779{
4585 struct bpf_insn *insn = insn_buf; 4780 struct bpf_insn *insn = insn_buf;
4781 int off;
4586 4782
4587 switch (si->off) { 4783 switch (si->off) {
4588 case offsetof(struct bpf_sock, bound_dev_if): 4784 case offsetof(struct bpf_sock, bound_dev_if):
@@ -4638,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
4638 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 4834 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
4639 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); 4835 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
4640 break; 4836 break;
4837
4838 case offsetof(struct bpf_sock, src_ip4):
4839 *insn++ = BPF_LDX_MEM(
4840 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4841 bpf_target_off(struct sock_common, skc_rcv_saddr,
4842 FIELD_SIZEOF(struct sock_common,
4843 skc_rcv_saddr),
4844 target_size));
4845 break;
4846
4847 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4848#if IS_ENABLED(CONFIG_IPV6)
4849 off = si->off;
4850 off -= offsetof(struct bpf_sock, src_ip6[0]);
4851 *insn++ = BPF_LDX_MEM(
4852 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4853 bpf_target_off(
4854 struct sock_common,
4855 skc_v6_rcv_saddr.s6_addr32[0],
4856 FIELD_SIZEOF(struct sock_common,
4857 skc_v6_rcv_saddr.s6_addr32[0]),
4858 target_size) + off);
4859#else
4860 (void)off;
4861 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4862#endif
4863 break;
4864
4865 case offsetof(struct bpf_sock, src_port):
4866 *insn++ = BPF_LDX_MEM(
4867 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
4868 si->dst_reg, si->src_reg,
4869 bpf_target_off(struct sock_common, skc_num,
4870 FIELD_SIZEOF(struct sock_common,
4871 skc_num),
4872 target_size));
4873 break;
4641 } 4874 }
4642 4875
4643 return insn - insn_buf; 4876 return insn - insn_buf;
@@ -4713,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4713 return insn - insn_buf; 4946 return insn - insn_buf;
4714} 4947}
4715 4948
4949/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
4950 * context Structure, F is Field in context structure that contains a pointer
4951 * to Nested Structure of type NS that has the field NF.
4952 *
4953 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
4954 * sure that SIZE is not greater than actual size of S.F.NF.
4955 *
4956 * If offset OFF is provided, the load happens from that offset relative to
4957 * offset of NF.
4958 */
4959#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
4960 do { \
4961 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
4962 si->src_reg, offsetof(S, F)); \
4963 *insn++ = BPF_LDX_MEM( \
4964 SIZE, si->dst_reg, si->dst_reg, \
4965 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
4966 target_size) \
4967 + OFF); \
4968 } while (0)
4969
4970#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
4971 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
4972 BPF_FIELD_SIZEOF(NS, NF), 0)
4973
4974/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
4975 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
4976 *
4977 * It doesn't support SIZE argument though since narrow stores are not
4978 * supported for now.
4979 *
4980 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
4981 * "register" since two registers available in convert_ctx_access are not
4982 * enough: we can't override neither SRC, since it contains value to store, nor
4983 * DST since it contains pointer to context that may be used by later
4984 * instructions. But we need a temporary place to save pointer to nested
4985 * structure whose field we want to store to.
4986 */
4987#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
4988 do { \
4989 int tmp_reg = BPF_REG_9; \
4990 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
4991 --tmp_reg; \
4992 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
4993 --tmp_reg; \
4994 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
4995 offsetof(S, TF)); \
4996 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
4997 si->dst_reg, offsetof(S, F)); \
4998 *insn++ = BPF_STX_MEM( \
4999 BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
5000 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
5001 target_size) \
5002 + OFF); \
5003 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
5004 offsetof(S, TF)); \
5005 } while (0)
5006
5007#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
5008 TF) \
5009 do { \
5010 if (type == BPF_WRITE) { \
5011 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
5012 TF); \
5013 } else { \
5014 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
5015 S, NS, F, NF, SIZE, OFF); \
5016 } \
5017 } while (0)
5018
5019#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
5020 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
5021 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
5022
5023static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
5024 const struct bpf_insn *si,
5025 struct bpf_insn *insn_buf,
5026 struct bpf_prog *prog, u32 *target_size)
5027{
5028 struct bpf_insn *insn = insn_buf;
5029 int off;
5030
5031 switch (si->off) {
5032 case offsetof(struct bpf_sock_addr, user_family):
5033 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5034 struct sockaddr, uaddr, sa_family);
5035 break;
5036
5037 case offsetof(struct bpf_sock_addr, user_ip4):
5038 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5039 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
5040 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
5041 break;
5042
5043 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5044 off = si->off;
5045 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
5046 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5047 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
5048 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
5049 tmp_reg);
5050 break;
5051
5052 case offsetof(struct bpf_sock_addr, user_port):
5053 /* To get port we need to know sa_family first and then treat
5054 * sockaddr as either sockaddr_in or sockaddr_in6.
5055 * Though we can simplify since port field has same offset and
5056 * size in both structures.
5057 * Here we check this invariant and use just one of the
5058 * structures if it's true.
5059 */
5060 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
5061 offsetof(struct sockaddr_in6, sin6_port));
5062 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
5063 FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
5064 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
5065 struct sockaddr_in6, uaddr,
5066 sin6_port, tmp_reg);
5067 break;
5068
5069 case offsetof(struct bpf_sock_addr, family):
5070 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5071 struct sock, sk, sk_family);
5072 break;
5073
5074 case offsetof(struct bpf_sock_addr, type):
5075 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5076 struct bpf_sock_addr_kern, struct sock, sk,
5077 __sk_flags_offset, BPF_W, 0);
5078 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
5079 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
5080 break;
5081
5082 case offsetof(struct bpf_sock_addr, protocol):
5083 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5084 struct bpf_sock_addr_kern, struct sock, sk,
5085 __sk_flags_offset, BPF_W, 0);
5086 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
5087 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
5088 SK_FL_PROTO_SHIFT);
5089 break;
5090 }
5091
5092 return insn - insn_buf;
5093}
5094
4716static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, 5095static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
4717 const struct bpf_insn *si, 5096 const struct bpf_insn *si,
4718 struct bpf_insn *insn_buf, 5097 struct bpf_insn *insn_buf,
@@ -5170,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
5170const struct bpf_prog_ops cg_sock_prog_ops = { 5549const struct bpf_prog_ops cg_sock_prog_ops = {
5171}; 5550};
5172 5551
5552const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
5553 .get_func_proto = sock_addr_func_proto,
5554 .is_valid_access = sock_addr_is_valid_access,
5555 .convert_ctx_access = sock_addr_convert_ctx_access,
5556};
5557
5558const struct bpf_prog_ops cg_sock_addr_prog_ops = {
5559};
5560
5173const struct bpf_verifier_ops sock_ops_verifier_ops = { 5561const struct bpf_verifier_ops sock_ops_verifier_ops = {
5174 .get_func_proto = sock_ops_func_proto, 5562 .get_func_proto = sock_ops_func_proto,
5175 .is_valid_access = sock_ops_is_valid_access, 5563 .is_valid_access = sock_ops_is_valid_access,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e8c7fad8c329..142d4c35b493 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
432 432
433int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 433int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
434{ 434{
435 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
436 struct sock *sk = sock->sk; 435 struct sock *sk = sock->sk;
437 struct inet_sock *inet = inet_sk(sk);
438 struct net *net = sock_net(sk);
439 unsigned short snum;
440 int chk_addr_ret;
441 u32 tb_id = RT_TABLE_LOCAL;
442 int err; 436 int err;
443 437
444 /* If the socket has its own bind function then use it. (RAW) */ 438 /* If the socket has its own bind function then use it. (RAW) */
445 if (sk->sk_prot->bind) { 439 if (sk->sk_prot->bind) {
446 err = sk->sk_prot->bind(sk, uaddr, addr_len); 440 return sk->sk_prot->bind(sk, uaddr, addr_len);
447 goto out;
448 } 441 }
449 err = -EINVAL;
450 if (addr_len < sizeof(struct sockaddr_in)) 442 if (addr_len < sizeof(struct sockaddr_in))
451 goto out; 443 return -EINVAL;
444
445 /* BPF prog is run before any checks are done so that if the prog
446 * changes context in a wrong way it will be caught.
447 */
448 err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
449 if (err)
450 return err;
451
452 return __inet_bind(sk, uaddr, addr_len, false, true);
453}
454EXPORT_SYMBOL(inet_bind);
455
456int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
457 bool force_bind_address_no_port, bool with_lock)
458{
459 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
460 struct inet_sock *inet = inet_sk(sk);
461 struct net *net = sock_net(sk);
462 unsigned short snum;
463 int chk_addr_ret;
464 u32 tb_id = RT_TABLE_LOCAL;
465 int err;
452 466
453 if (addr->sin_family != AF_INET) { 467 if (addr->sin_family != AF_INET) {
454 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) 468 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
492 * would be illegal to use them (multicast/broadcast) in 506 * would be illegal to use them (multicast/broadcast) in
493 * which case the sending device address is used. 507 * which case the sending device address is used.
494 */ 508 */
495 lock_sock(sk); 509 if (with_lock)
510 lock_sock(sk);
496 511
497 /* Check these errors (active socket, double bind). */ 512 /* Check these errors (active socket, double bind). */
498 err = -EINVAL; 513 err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
504 inet->inet_saddr = 0; /* Use device */ 519 inet->inet_saddr = 0; /* Use device */
505 520
506 /* Make sure we are allowed to bind here. */ 521 /* Make sure we are allowed to bind here. */
507 if ((snum || !inet->bind_address_no_port) && 522 if (snum || !(inet->bind_address_no_port ||
508 sk->sk_prot->get_port(sk, snum)) { 523 force_bind_address_no_port)) {
509 inet->inet_saddr = inet->inet_rcv_saddr = 0; 524 if (sk->sk_prot->get_port(sk, snum)) {
510 err = -EADDRINUSE; 525 inet->inet_saddr = inet->inet_rcv_saddr = 0;
511 goto out_release_sock; 526 err = -EADDRINUSE;
527 goto out_release_sock;
528 }
529 err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
530 if (err) {
531 inet->inet_saddr = inet->inet_rcv_saddr = 0;
532 goto out_release_sock;
533 }
512 } 534 }
513 535
514 if (inet->inet_rcv_saddr) 536 if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
521 sk_dst_reset(sk); 543 sk_dst_reset(sk);
522 err = 0; 544 err = 0;
523out_release_sock: 545out_release_sock:
524 release_sock(sk); 546 if (with_lock)
547 release_sock(sk);
525out: 548out:
526 return err; 549 return err;
527} 550}
528EXPORT_SYMBOL(inet_bind);
529 551
530int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, 552int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
531 int addr_len, int flags) 553 int addr_len, int flags)
532{ 554{
533 struct sock *sk = sock->sk; 555 struct sock *sk = sock->sk;
556 int err;
534 557
535 if (addr_len < sizeof(uaddr->sa_family)) 558 if (addr_len < sizeof(uaddr->sa_family))
536 return -EINVAL; 559 return -EINVAL;
537 if (uaddr->sa_family == AF_UNSPEC) 560 if (uaddr->sa_family == AF_UNSPEC)
538 return sk->sk_prot->disconnect(sk, flags); 561 return sk->sk_prot->disconnect(sk, flags);
539 562
563 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
564 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
565 if (err)
566 return err;
567 }
568
540 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 569 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
541 return -EAGAIN; 570 return -EAGAIN;
542 return sk->sk_prot->connect(sk, uaddr, addr_len); 571 return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
617 if (sk->sk_state != TCP_CLOSE) 646 if (sk->sk_state != TCP_CLOSE)
618 goto out; 647 goto out;
619 648
649 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
650 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
651 if (err)
652 goto out;
653 }
654
620 err = sk->sk_prot->connect(sk, uaddr, addr_len); 655 err = sk->sk_prot->connect(sk, uaddr, addr_len);
621 if (err < 0) 656 if (err < 0)
622 goto out; 657 goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c6aec2643e8..3c11d992d784 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
140} 140}
141EXPORT_SYMBOL_GPL(tcp_twsk_unique); 141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142 142
143static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
144 int addr_len)
145{
146 /* This check is replicated from tcp_v4_connect() and intended to
147 * prevent BPF program called below from accessing bytes that are out
148 * of the bound specified by user in addr_len.
149 */
150 if (addr_len < sizeof(struct sockaddr_in))
151 return -EINVAL;
152
153 sock_owned_by_me(sk);
154
155 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
156}
157
143/* This will initiate an outgoing connection. */ 158/* This will initiate an outgoing connection. */
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 159int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{ 160{
@@ -2409,6 +2424,7 @@ struct proto tcp_prot = {
2409 .name = "TCP", 2424 .name = "TCP",
2410 .owner = THIS_MODULE, 2425 .owner = THIS_MODULE,
2411 .close = tcp_close, 2426 .close = tcp_close,
2427 .pre_connect = tcp_v4_pre_connect,
2412 .connect = tcp_v4_connect, 2428 .connect = tcp_v4_connect,
2413 .disconnect = tcp_disconnect, 2429 .disconnect = tcp_disconnect,
2414 .accept = inet_csk_accept, 2430 .accept = inet_csk_accept,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 908fc02fb4f8..9c6c77fec963 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1658,6 +1658,19 @@ csum_copy_err:
1658 goto try_again; 1658 goto try_again;
1659} 1659}
1660 1660
1661int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1662{
1663 /* This check is replicated from __ip4_datagram_connect() and
1664 * intended to prevent BPF program called below from accessing bytes
1665 * that are out of the bound specified by user in addr_len.
1666 */
1667 if (addr_len < sizeof(struct sockaddr_in))
1668 return -EINVAL;
1669
1670 return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
1671}
1672EXPORT_SYMBOL(udp_pre_connect);
1673
1661int __udp_disconnect(struct sock *sk, int flags) 1674int __udp_disconnect(struct sock *sk, int flags)
1662{ 1675{
1663 struct inet_sock *inet = inet_sk(sk); 1676 struct inet_sock *inet = inet_sk(sk);
@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
2530 .name = "UDP", 2543 .name = "UDP",
2531 .owner = THIS_MODULE, 2544 .owner = THIS_MODULE,
2532 .close = udp_lib_close, 2545 .close = udp_lib_close,
2546 .pre_connect = udp_pre_connect,
2533 .connect = ip4_datagram_connect, 2547 .connect = ip4_datagram_connect,
2534 .disconnect = udp_disconnect, 2548 .disconnect = udp_disconnect,
2535 .ioctl = udp_ioctl, 2549 .ioctl = udp_ioctl,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dbbe04018813..41f50472679d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
277/* bind for INET6 API */ 277/* bind for INET6 API */
278int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 278int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
279{ 279{
280 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
281 struct sock *sk = sock->sk; 280 struct sock *sk = sock->sk;
282 struct inet_sock *inet = inet_sk(sk);
283 struct ipv6_pinfo *np = inet6_sk(sk);
284 struct net *net = sock_net(sk);
285 __be32 v4addr = 0;
286 unsigned short snum;
287 bool saved_ipv6only;
288 int addr_type = 0;
289 int err = 0; 281 int err = 0;
290 282
291 /* If the socket has its own bind function then use it. */ 283 /* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
295 if (addr_len < SIN6_LEN_RFC2133) 287 if (addr_len < SIN6_LEN_RFC2133)
296 return -EINVAL; 288 return -EINVAL;
297 289
290 /* BPF prog is run before any checks are done so that if the prog
291 * changes context in a wrong way it will be caught.
292 */
293 err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
294 if (err)
295 return err;
296
297 return __inet6_bind(sk, uaddr, addr_len, false, true);
298}
299EXPORT_SYMBOL(inet6_bind);
300
301int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
302 bool force_bind_address_no_port, bool with_lock)
303{
304 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
305 struct inet_sock *inet = inet_sk(sk);
306 struct ipv6_pinfo *np = inet6_sk(sk);
307 struct net *net = sock_net(sk);
308 __be32 v4addr = 0;
309 unsigned short snum;
310 bool saved_ipv6only;
311 int addr_type = 0;
312 int err = 0;
313
298 if (addr->sin6_family != AF_INET6) 314 if (addr->sin6_family != AF_INET6)
299 return -EAFNOSUPPORT; 315 return -EAFNOSUPPORT;
300 316
301 addr_type = ipv6_addr_type(&addr->sin6_addr); 317 addr_type = ipv6_addr_type(&addr->sin6_addr);
302 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) 318 if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
303 return -EINVAL; 319 return -EINVAL;
304 320
305 snum = ntohs(addr->sin6_port); 321 snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
307 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) 323 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
308 return -EACCES; 324 return -EACCES;
309 325
310 lock_sock(sk); 326 if (with_lock)
327 lock_sock(sk);
311 328
312 /* Check these errors (active socket, double bind). */ 329 /* Check these errors (active socket, double bind). */
313 if (sk->sk_state != TCP_CLOSE || inet->inet_num) { 330 if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
395 sk->sk_ipv6only = 1; 412 sk->sk_ipv6only = 1;
396 413
397 /* Make sure we are allowed to bind here. */ 414 /* Make sure we are allowed to bind here. */
398 if ((snum || !inet->bind_address_no_port) && 415 if (snum || !(inet->bind_address_no_port ||
399 sk->sk_prot->get_port(sk, snum)) { 416 force_bind_address_no_port)) {
400 sk->sk_ipv6only = saved_ipv6only; 417 if (sk->sk_prot->get_port(sk, snum)) {
401 inet_reset_saddr(sk); 418 sk->sk_ipv6only = saved_ipv6only;
402 err = -EADDRINUSE; 419 inet_reset_saddr(sk);
403 goto out; 420 err = -EADDRINUSE;
421 goto out;
422 }
423 err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
424 if (err) {
425 sk->sk_ipv6only = saved_ipv6only;
426 inet_reset_saddr(sk);
427 goto out;
428 }
404 } 429 }
405 430
406 if (addr_type != IPV6_ADDR_ANY) 431 if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
411 inet->inet_dport = 0; 436 inet->inet_dport = 0;
412 inet->inet_daddr = 0; 437 inet->inet_daddr = 0;
413out: 438out:
414 release_sock(sk); 439 if (with_lock)
440 release_sock(sk);
415 return err; 441 return err;
416out_unlock: 442out_unlock:
417 rcu_read_unlock(); 443 rcu_read_unlock();
418 goto out; 444 goto out;
419} 445}
420EXPORT_SYMBOL(inet6_bind);
421 446
422int inet6_release(struct socket *sock) 447int inet6_release(struct socket *sock)
423{ 448{
@@ -869,6 +894,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
869 .nd_tbl = &nd_tbl, 894 .nd_tbl = &nd_tbl,
870}; 895};
871 896
897static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
898 .inet6_bind = __inet6_bind,
899};
900
872static int __init inet6_init(void) 901static int __init inet6_init(void)
873{ 902{
874 struct list_head *r; 903 struct list_head *r;
@@ -1025,6 +1054,7 @@ static int __init inet6_init(void)
1025 /* ensure that ipv6 stubs are visible only after ipv6 is ready */ 1054 /* ensure that ipv6 stubs are visible only after ipv6 is ready */
1026 wmb(); 1055 wmb();
1027 ipv6_stub = &ipv6_stub_impl; 1056 ipv6_stub = &ipv6_stub_impl;
1057 ipv6_bpf_stub = &ipv6_bpf_stub_impl;
1028out: 1058out:
1029 return err; 1059 return err;
1030 1060
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5425d7b100ee..6469b741cf5a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
117 ipv6_hdr(skb)->saddr.s6_addr32); 117 ipv6_hdr(skb)->saddr.s6_addr32);
118} 118}
119 119
120static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
121 int addr_len)
122{
123 /* This check is replicated from tcp_v6_connect() and intended to
124 * prevent BPF program called below from accessing bytes that are out
125 * of the bound specified by user in addr_len.
126 */
127 if (addr_len < SIN6_LEN_RFC2133)
128 return -EINVAL;
129
130 sock_owned_by_me(sk);
131
132 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
133}
134
120static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 135static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 int addr_len) 136 int addr_len)
122{ 137{
@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
1925 .name = "TCPv6", 1940 .name = "TCPv6",
1926 .owner = THIS_MODULE, 1941 .owner = THIS_MODULE,
1927 .close = tcp_close, 1942 .close = tcp_close,
1943 .pre_connect = tcp_v6_pre_connect,
1928 .connect = tcp_v6_connect, 1944 .connect = tcp_v6_connect,
1929 .disconnect = tcp_disconnect, 1945 .disconnect = tcp_disconnect,
1930 .accept = inet_csk_accept, 1946 .accept = inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ad30f5e31969..6861ed479469 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
957 } 957 }
958} 958}
959 959
960static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
961 int addr_len)
962{
963 /* The following checks are replicated from __ip6_datagram_connect()
964 * and intended to prevent BPF program called below from accessing
965 * bytes that are out of the bound specified by user in addr_len.
966 */
967 if (uaddr->sa_family == AF_INET) {
968 if (__ipv6_only_sock(sk))
969 return -EAFNOSUPPORT;
970 return udp_pre_connect(sk, uaddr, addr_len);
971 }
972
973 if (addr_len < SIN6_LEN_RFC2133)
974 return -EINVAL;
975
976 return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
977}
978
960/** 979/**
961 * udp6_hwcsum_outgoing - handle outgoing HW checksumming 980 * udp6_hwcsum_outgoing - handle outgoing HW checksumming
962 * @sk: socket we are sending on 981 * @sk: socket we are sending on
@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
1512 .name = "UDPv6", 1531 .name = "UDPv6",
1513 .owner = THIS_MODULE, 1532 .owner = THIS_MODULE,
1514 .close = udp_lib_close, 1533 .close = udp_lib_close,
1534 .pre_connect = udpv6_pre_connect,
1515 .connect = ip6_datagram_connect, 1535 .connect = ip6_datagram_connect,
1516 .disconnect = udp_disconnect, 1536 .disconnect = udp_disconnect,
1517 .ioctl = udp_ioctl, 1537 .ioctl = udp_ioctl,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 58060bec999d..9d07465023a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
136 BPF_PROG_TYPE_CGROUP_DEVICE, 136 BPF_PROG_TYPE_CGROUP_DEVICE,
137 BPF_PROG_TYPE_SK_MSG, 137 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT, 138 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
139}; 140};
140 141
141enum bpf_attach_type { 142enum bpf_attach_type {
@@ -147,6 +148,12 @@ enum bpf_attach_type {
147 BPF_SK_SKB_STREAM_VERDICT, 148 BPF_SK_SKB_STREAM_VERDICT,
148 BPF_CGROUP_DEVICE, 149 BPF_CGROUP_DEVICE,
149 BPF_SK_MSG_VERDICT, 150 BPF_SK_MSG_VERDICT,
151 BPF_CGROUP_INET4_BIND,
152 BPF_CGROUP_INET6_BIND,
153 BPF_CGROUP_INET4_CONNECT,
154 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND,
150 __MAX_BPF_ATTACH_TYPE 157 __MAX_BPF_ATTACH_TYPE
151}; 158};
152 159
@@ -296,6 +303,11 @@ union bpf_attr {
296 __u32 prog_flags; 303 __u32 prog_flags;
297 char prog_name[BPF_OBJ_NAME_LEN]; 304 char prog_name[BPF_OBJ_NAME_LEN];
298 __u32 prog_ifindex; /* ifindex of netdev to prep for */ 305 __u32 prog_ifindex; /* ifindex of netdev to prep for */
306 /* For some prog types expected attach type must be known at
307 * load time to verify attach type specific parts of prog
308 * (context accesses, allowed helpers, etc).
309 */
310 __u32 expected_attach_type;
299 }; 311 };
300 312
301 struct { /* anonymous struct used by BPF_OBJ_* commands */ 313 struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -736,6 +748,13 @@ union bpf_attr {
736 * @flags: reserved for future use 748 * @flags: reserved for future use
737 * Return: SK_PASS 749 * Return: SK_PASS
738 * 750 *
751 * int bpf_bind(ctx, addr, addr_len)
752 * Bind socket to address. Only binding to IP is supported, no port can be
753 * set in addr.
754 * @ctx: pointer to context of type bpf_sock_addr
755 * @addr: pointer to struct sockaddr to bind socket to
756 * @addr_len: length of sockaddr structure
757 * Return: 0 on success or negative error code
739 */ 758 */
740#define __BPF_FUNC_MAPPER(FN) \ 759#define __BPF_FUNC_MAPPER(FN) \
741 FN(unspec), \ 760 FN(unspec), \
@@ -801,7 +820,8 @@ union bpf_attr {
801 FN(msg_redirect_map), \ 820 FN(msg_redirect_map), \
802 FN(msg_apply_bytes), \ 821 FN(msg_apply_bytes), \
803 FN(msg_cork_bytes), \ 822 FN(msg_cork_bytes), \
804 FN(msg_pull_data), 823 FN(msg_pull_data), \
824 FN(bind),
805 825
806/* integer value in 'imm' field of BPF_CALL instruction selects which helper 826/* integer value in 'imm' field of BPF_CALL instruction selects which helper
807 * function eBPF program intends to call 827 * function eBPF program intends to call
@@ -929,6 +949,15 @@ struct bpf_sock {
929 __u32 protocol; 949 __u32 protocol;
930 __u32 mark; 950 __u32 mark;
931 __u32 priority; 951 __u32 priority;
952 __u32 src_ip4; /* Allows 1,2,4-byte read.
953 * Stored in network byte order.
954 */
955 __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
956 * Stored in network byte order.
957 */
958 __u32 src_port; /* Allows 4-byte read.
959 * Stored in host byte order
960 */
932}; 961};
933 962
934#define XDP_PACKET_HEADROOM 256 963#define XDP_PACKET_HEADROOM 256
@@ -1004,6 +1033,26 @@ struct bpf_map_info {
1004 __u64 netns_ino; 1033 __u64 netns_ino;
1005} __attribute__((aligned(8))); 1034} __attribute__((aligned(8)));
1006 1035
1036/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
1037 * by user and intended to be used by socket (e.g. to bind to, depends on
1038 * attach attach type).
1039 */
1040struct bpf_sock_addr {
1041 __u32 user_family; /* Allows 4-byte read, but no write. */
1042 __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
1043 * Stored in network byte order.
1044 */
1045 __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
1046 * Stored in network byte order.
1047 */
1048 __u32 user_port; /* Allows 4-byte read and write.
1049 * Stored in network byte order
1050 */
1051 __u32 family; /* Allows 4-byte read, but no write */
1052 __u32 type; /* Allows 4-byte read, but no write */
1053 __u32 protocol; /* Allows 4-byte read, but no write */
1054};
1055
1007/* User bpf_sock_ops struct to access socket values and specify request ops 1056/* User bpf_sock_ops struct to access socket values and specify request ops
1008 * and their replies. 1057 * and their replies.
1009 * Some of this fields are in network (bigendian) byte order and may need 1058 * Some of this fields are in network (bigendian) byte order and may need
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e0500055f1a6..acbb3f8b3bec 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
146 -1); 146 -1);
147} 147}
148 148
149int bpf_load_program_name(enum bpf_prog_type type, const char *name, 149int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
150 const struct bpf_insn *insns, 150 char *log_buf, size_t log_buf_sz)
151 size_t insns_cnt, const char *license,
152 __u32 kern_version, char *log_buf,
153 size_t log_buf_sz)
154{ 151{
155 int fd;
156 union bpf_attr attr; 152 union bpf_attr attr;
157 __u32 name_len = name ? strlen(name) : 0; 153 __u32 name_len;
154 int fd;
155
156 if (!load_attr)
157 return -EINVAL;
158
159 name_len = load_attr->name ? strlen(load_attr->name) : 0;
158 160
159 bzero(&attr, sizeof(attr)); 161 bzero(&attr, sizeof(attr));
160 attr.prog_type = type; 162 attr.prog_type = load_attr->prog_type;
161 attr.insn_cnt = (__u32)insns_cnt; 163 attr.expected_attach_type = load_attr->expected_attach_type;
162 attr.insns = ptr_to_u64(insns); 164 attr.insn_cnt = (__u32)load_attr->insns_cnt;
163 attr.license = ptr_to_u64(license); 165 attr.insns = ptr_to_u64(load_attr->insns);
166 attr.license = ptr_to_u64(load_attr->license);
164 attr.log_buf = ptr_to_u64(NULL); 167 attr.log_buf = ptr_to_u64(NULL);
165 attr.log_size = 0; 168 attr.log_size = 0;
166 attr.log_level = 0; 169 attr.log_level = 0;
167 attr.kern_version = kern_version; 170 attr.kern_version = load_attr->kern_version;
168 memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); 171 memcpy(attr.prog_name, load_attr->name,
172 min(name_len, BPF_OBJ_NAME_LEN - 1));
169 173
170 fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 174 fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
171 if (fd >= 0 || !log_buf || !log_buf_sz) 175 if (fd >= 0 || !log_buf || !log_buf_sz)
@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
184 __u32 kern_version, char *log_buf, 188 __u32 kern_version, char *log_buf,
185 size_t log_buf_sz) 189 size_t log_buf_sz)
186{ 190{
187 return bpf_load_program_name(type, NULL, insns, insns_cnt, license, 191 struct bpf_load_program_attr load_attr;
188 kern_version, log_buf, log_buf_sz); 192
193 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
194 load_attr.prog_type = type;
195 load_attr.expected_attach_type = 0;
196 load_attr.name = NULL;
197 load_attr.insns = insns;
198 load_attr.insns_cnt = insns_cnt;
199 load_attr.license = license;
200 load_attr.kern_version = kern_version;
201
202 return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
189} 203}
190 204
191int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, 205int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ee59342c6f42..39f6a0d64a3b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -41,13 +41,20 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
41 int key_size, int inner_map_fd, int max_entries, 41 int key_size, int inner_map_fd, int max_entries,
42 __u32 map_flags); 42 __u32 map_flags);
43 43
44struct bpf_load_program_attr {
45 enum bpf_prog_type prog_type;
46 enum bpf_attach_type expected_attach_type;
47 const char *name;
48 const struct bpf_insn *insns;
49 size_t insns_cnt;
50 const char *license;
51 __u32 kern_version;
52};
53
44/* Recommend log buffer size */ 54/* Recommend log buffer size */
45#define BPF_LOG_BUF_SIZE (256 * 1024) 55#define BPF_LOG_BUF_SIZE (256 * 1024)
46int bpf_load_program_name(enum bpf_prog_type type, const char *name, 56int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
47 const struct bpf_insn *insns, 57 char *log_buf, size_t log_buf_sz);
48 size_t insns_cnt, const char *license,
49 __u32 kern_version, char *log_buf,
50 size_t log_buf_sz);
51int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, 58int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
52 size_t insns_cnt, const char *license, 59 size_t insns_cnt, const char *license,
53 __u32 kern_version, char *log_buf, 60 __u32 kern_version, char *log_buf,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 64a8fc384186..5922443063f0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -203,6 +203,8 @@ struct bpf_program {
203 struct bpf_object *obj; 203 struct bpf_object *obj;
204 void *priv; 204 void *priv;
205 bpf_program_clear_priv_t clear_priv; 205 bpf_program_clear_priv_t clear_priv;
206
207 enum bpf_attach_type expected_attach_type;
206}; 208};
207 209
208struct bpf_map { 210struct bpf_map {
@@ -1162,21 +1164,31 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
1162} 1164}
1163 1165
1164static int 1166static int
1165load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, 1167load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1166 int insns_cnt, char *license, u32 kern_version, int *pfd) 1168 const char *name, struct bpf_insn *insns, int insns_cnt,
1169 char *license, u32 kern_version, int *pfd)
1167{ 1170{
1168 int ret; 1171 struct bpf_load_program_attr load_attr;
1169 char *log_buf; 1172 char *log_buf;
1173 int ret;
1170 1174
1171 if (!insns || !insns_cnt) 1175 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
1176 load_attr.prog_type = type;
1177 load_attr.expected_attach_type = expected_attach_type;
1178 load_attr.name = name;
1179 load_attr.insns = insns;
1180 load_attr.insns_cnt = insns_cnt;
1181 load_attr.license = license;
1182 load_attr.kern_version = kern_version;
1183
1184 if (!load_attr.insns || !load_attr.insns_cnt)
1172 return -EINVAL; 1185 return -EINVAL;
1173 1186
1174 log_buf = malloc(BPF_LOG_BUF_SIZE); 1187 log_buf = malloc(BPF_LOG_BUF_SIZE);
1175 if (!log_buf) 1188 if (!log_buf)
1176 pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); 1189 pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
1177 1190
1178 ret = bpf_load_program_name(type, name, insns, insns_cnt, license, 1191 ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
1179 kern_version, log_buf, BPF_LOG_BUF_SIZE);
1180 1192
1181 if (ret >= 0) { 1193 if (ret >= 0) {
1182 *pfd = ret; 1194 *pfd = ret;
@@ -1192,18 +1204,18 @@ load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
1192 pr_warning("-- BEGIN DUMP LOG ---\n"); 1204 pr_warning("-- BEGIN DUMP LOG ---\n");
1193 pr_warning("\n%s\n", log_buf); 1205 pr_warning("\n%s\n", log_buf);
1194 pr_warning("-- END LOG --\n"); 1206 pr_warning("-- END LOG --\n");
1195 } else if (insns_cnt >= BPF_MAXINSNS) { 1207 } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
1196 pr_warning("Program too large (%d insns), at most %d insns\n", 1208 pr_warning("Program too large (%zu insns), at most %d insns\n",
1197 insns_cnt, BPF_MAXINSNS); 1209 load_attr.insns_cnt, BPF_MAXINSNS);
1198 ret = -LIBBPF_ERRNO__PROG2BIG; 1210 ret = -LIBBPF_ERRNO__PROG2BIG;
1199 } else { 1211 } else {
1200 /* Wrong program type? */ 1212 /* Wrong program type? */
1201 if (type != BPF_PROG_TYPE_KPROBE) { 1213 if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
1202 int fd; 1214 int fd;
1203 1215
1204 fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, 1216 load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
1205 insns, insns_cnt, license, 1217 load_attr.expected_attach_type = 0;
1206 kern_version, NULL, 0); 1218 fd = bpf_load_program_xattr(&load_attr, NULL, 0);
1207 if (fd >= 0) { 1219 if (fd >= 0) {
1208 close(fd); 1220 close(fd);
1209 ret = -LIBBPF_ERRNO__PROGTYPE; 1221 ret = -LIBBPF_ERRNO__PROGTYPE;
@@ -1247,8 +1259,9 @@ bpf_program__load(struct bpf_program *prog,
1247 pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", 1259 pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
1248 prog->section_name, prog->instances.nr); 1260 prog->section_name, prog->instances.nr);
1249 } 1261 }
1250 err = load_program(prog->type, prog->name, prog->insns, 1262 err = load_program(prog->type, prog->expected_attach_type,
1251 prog->insns_cnt, license, kern_version, &fd); 1263 prog->name, prog->insns, prog->insns_cnt,
1264 license, kern_version, &fd);
1252 if (!err) 1265 if (!err)
1253 prog->instances.fds[0] = fd; 1266 prog->instances.fds[0] = fd;
1254 goto out; 1267 goto out;
@@ -1276,8 +1289,8 @@ bpf_program__load(struct bpf_program *prog,
1276 continue; 1289 continue;
1277 } 1290 }
1278 1291
1279 err = load_program(prog->type, prog->name, 1292 err = load_program(prog->type, prog->expected_attach_type,
1280 result.new_insn_ptr, 1293 prog->name, result.new_insn_ptr,
1281 result.new_insn_cnt, 1294 result.new_insn_cnt,
1282 license, kern_version, &fd); 1295 license, kern_version, &fd);
1283 1296
@@ -1835,11 +1848,25 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
1835BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); 1848BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
1836BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); 1849BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
1837 1850
1838#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type } 1851static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
1852 enum bpf_attach_type type)
1853{
1854 prog->expected_attach_type = type;
1855}
1856
1857#define BPF_PROG_SEC_FULL(string, ptype, atype) \
1858 { string, sizeof(string) - 1, ptype, atype }
1859
1860#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
1861
1862#define BPF_SA_PROG_SEC(string, ptype) \
1863 BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
1864
1839static const struct { 1865static const struct {
1840 const char *sec; 1866 const char *sec;
1841 size_t len; 1867 size_t len;
1842 enum bpf_prog_type prog_type; 1868 enum bpf_prog_type prog_type;
1869 enum bpf_attach_type expected_attach_type;
1843} section_names[] = { 1870} section_names[] = {
1844 BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), 1871 BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
1845 BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), 1872 BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
@@ -1858,10 +1885,17 @@ static const struct {
1858 BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), 1885 BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
1859 BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), 1886 BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
1860 BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), 1887 BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
1888 BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
1889 BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
1890 BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
1891 BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
1861}; 1892};
1893
1862#undef BPF_PROG_SEC 1894#undef BPF_PROG_SEC
1895#undef BPF_PROG_SEC_FULL
1896#undef BPF_SA_PROG_SEC
1863 1897
1864static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) 1898static int bpf_program__identify_section(struct bpf_program *prog)
1865{ 1899{
1866 int i; 1900 int i;
1867 1901
@@ -1871,13 +1905,13 @@ static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
1871 for (i = 0; i < ARRAY_SIZE(section_names); i++) 1905 for (i = 0; i < ARRAY_SIZE(section_names); i++)
1872 if (strncmp(prog->section_name, section_names[i].sec, 1906 if (strncmp(prog->section_name, section_names[i].sec,
1873 section_names[i].len) == 0) 1907 section_names[i].len) == 0)
1874 return section_names[i].prog_type; 1908 return i;
1875 1909
1876err: 1910err:
1877 pr_warning("failed to guess program type based on section name %s\n", 1911 pr_warning("failed to guess program type based on section name %s\n",
1878 prog->section_name); 1912 prog->section_name);
1879 1913
1880 return BPF_PROG_TYPE_UNSPEC; 1914 return -1;
1881} 1915}
1882 1916
1883int bpf_map__fd(struct bpf_map *map) 1917int bpf_map__fd(struct bpf_map *map)
@@ -1977,11 +2011,30 @@ long libbpf_get_error(const void *ptr)
1977int bpf_prog_load(const char *file, enum bpf_prog_type type, 2011int bpf_prog_load(const char *file, enum bpf_prog_type type,
1978 struct bpf_object **pobj, int *prog_fd) 2012 struct bpf_object **pobj, int *prog_fd)
1979{ 2013{
2014 struct bpf_prog_load_attr attr;
2015
2016 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
2017 attr.file = file;
2018 attr.prog_type = type;
2019 attr.expected_attach_type = 0;
2020
2021 return bpf_prog_load_xattr(&attr, pobj, prog_fd);
2022}
2023
2024int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2025 struct bpf_object **pobj, int *prog_fd)
2026{
1980 struct bpf_program *prog, *first_prog = NULL; 2027 struct bpf_program *prog, *first_prog = NULL;
2028 enum bpf_attach_type expected_attach_type;
2029 enum bpf_prog_type prog_type;
1981 struct bpf_object *obj; 2030 struct bpf_object *obj;
2031 int section_idx;
1982 int err; 2032 int err;
1983 2033
1984 obj = bpf_object__open(file); 2034 if (!attr)
2035 return -EINVAL;
2036
2037 obj = bpf_object__open(attr->file);
1985 if (IS_ERR(obj)) 2038 if (IS_ERR(obj))
1986 return -ENOENT; 2039 return -ENOENT;
1987 2040
@@ -1990,15 +2043,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
1990 * If type is not specified, try to guess it based on 2043 * If type is not specified, try to guess it based on
1991 * section name. 2044 * section name.
1992 */ 2045 */
1993 if (type == BPF_PROG_TYPE_UNSPEC) { 2046 prog_type = attr->prog_type;
1994 type = bpf_program__guess_type(prog); 2047 expected_attach_type = attr->expected_attach_type;
1995 if (type == BPF_PROG_TYPE_UNSPEC) { 2048 if (prog_type == BPF_PROG_TYPE_UNSPEC) {
2049 section_idx = bpf_program__identify_section(prog);
2050 if (section_idx < 0) {
1996 bpf_object__close(obj); 2051 bpf_object__close(obj);
1997 return -EINVAL; 2052 return -EINVAL;
1998 } 2053 }
2054 prog_type = section_names[section_idx].prog_type;
2055 expected_attach_type =
2056 section_names[section_idx].expected_attach_type;
1999 } 2057 }
2000 2058
2001 bpf_program__set_type(prog, type); 2059 bpf_program__set_type(prog, prog_type);
2060 bpf_program__set_expected_attach_type(prog,
2061 expected_attach_type);
2062
2002 if (prog->idx != obj->efile.text_shndx && !first_prog) 2063 if (prog->idx != obj->efile.text_shndx && !first_prog)
2003 first_prog = prog; 2064 first_prog = prog;
2004 } 2065 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f85906533cdd..a3a62a583f27 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
248 248
249long libbpf_get_error(const void *ptr); 249long libbpf_get_error(const void *ptr);
250 250
251struct bpf_prog_load_attr {
252 const char *file;
253 enum bpf_prog_type prog_type;
254 enum bpf_attach_type expected_attach_type;
255};
256
257int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
258 struct bpf_object **pobj, int *prog_fd);
251int bpf_prog_load(const char *file, enum bpf_prog_type type, 259int bpf_prog_load(const char *file, enum bpf_prog_type type,
252 struct bpf_object **pobj, int *prog_fd); 260 struct bpf_object **pobj, int *prog_fd);
253 261
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f35fb02bdf56..0a315ddabbf4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,21 +23,23 @@ urandom_read: urandom_read.c
23 23
24# Order correspond to 'make run_tests' order 24# Order correspond to 'make run_tests' order
25TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 25TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
26 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user 26 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
27 test_sock test_sock_addr
27 28
28TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 29TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
29 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 30 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
30 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 31 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
31 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ 32 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
32 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ 33 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
33 sockmap_tcp_msg_prog.o 34 sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
34 35
35# Order correspond to 'make run_tests' order 36# Order correspond to 'make run_tests' order
36TEST_PROGS := test_kmod.sh \ 37TEST_PROGS := test_kmod.sh \
37 test_libbpf.sh \ 38 test_libbpf.sh \
38 test_xdp_redirect.sh \ 39 test_xdp_redirect.sh \
39 test_xdp_meta.sh \ 40 test_xdp_meta.sh \
40 test_offload.py 41 test_offload.py \
42 test_sock_addr.sh
41 43
42# Compile but not part of 'make run_tests' 44# Compile but not part of 'make run_tests'
43TEST_GEN_PROGS_EXTENDED = test_libbpf_open 45TEST_GEN_PROGS_EXTENDED = test_libbpf_open
@@ -51,6 +53,8 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
51$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a 53$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
52 54
53$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c 55$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
56$(OUTPUT)/test_sock: cgroup_helpers.c
57$(OUTPUT)/test_sock_addr: cgroup_helpers.c
54 58
55.PHONY: force 59.PHONY: force
56 60
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 7cae376d8d0c..d8223d99f96d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
94 (void *) BPF_FUNC_msg_cork_bytes; 94 (void *) BPF_FUNC_msg_cork_bytes;
95static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = 95static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
96 (void *) BPF_FUNC_msg_pull_data; 96 (void *) BPF_FUNC_msg_pull_data;
97static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
98 (void *) BPF_FUNC_bind;
97 99
98/* llvm builtin functions that eBPF C program may use to 100/* llvm builtin functions that eBPF C program may use to
99 * emit BPF_LD_ABS and BPF_LD_IND instructions 101 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000000..5a88a681d2ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <string.h>
5
6#include <linux/stddef.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/in6.h>
10#include <sys/socket.h>
11
12#include "bpf_helpers.h"
13#include "bpf_endian.h"
14
15#define SRC_REWRITE_IP4 0x7f000004U
16#define DST_REWRITE_IP4 0x7f000001U
17#define DST_REWRITE_PORT4 4444
18
19int _version SEC("version") = 1;
20
21SEC("cgroup/connect4")
22int connect_v4_prog(struct bpf_sock_addr *ctx)
23{
24 struct sockaddr_in sa;
25
26 /* Rewrite destination. */
27 ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
28 ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
29
30 if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
31 ///* Rewrite source. */
32 memset(&sa, 0, sizeof(sa));
33
34 sa.sin_family = AF_INET;
35 sa.sin_port = bpf_htons(0);
36 sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
37
38 if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
39 return 0;
40 }
41
42 return 1;
43}
44
45char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000000..8ea3f7d12dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <string.h>
5
6#include <linux/stddef.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/in6.h>
10#include <sys/socket.h>
11
12#include "bpf_helpers.h"
13#include "bpf_endian.h"
14
15#define SRC_REWRITE_IP6_0 0
16#define SRC_REWRITE_IP6_1 0
17#define SRC_REWRITE_IP6_2 0
18#define SRC_REWRITE_IP6_3 6
19
20#define DST_REWRITE_IP6_0 0
21#define DST_REWRITE_IP6_1 0
22#define DST_REWRITE_IP6_2 0
23#define DST_REWRITE_IP6_3 1
24
25#define DST_REWRITE_PORT6 6666
26
27int _version SEC("version") = 1;
28
29SEC("cgroup/connect6")
30int connect_v6_prog(struct bpf_sock_addr *ctx)
31{
32 struct sockaddr_in6 sa;
33
34 /* Rewrite destination. */
35 ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
36 ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
37 ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
38 ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
39
40 ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
41
42 if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
43 /* Rewrite source. */
44 memset(&sa, 0, sizeof(sa));
45
46 sa.sin6_family = AF_INET6;
47 sa.sin6_port = bpf_htons(0);
48
49 sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
50 sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
51 sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
52 sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
53
54 if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
55 return 0;
56 }
57
58 return 1;
59}
60
61char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000000..73bb20cfb9b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,479 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <stdio.h>
5#include <unistd.h>
6
7#include <arpa/inet.h>
8#include <sys/types.h>
9#include <sys/socket.h>
10
11#include <linux/filter.h>
12
13#include <bpf/bpf.h>
14
15#include "cgroup_helpers.h"
16
17#ifndef ARRAY_SIZE
18# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#endif
20
21#define CG_PATH "/foo"
22#define MAX_INSNS 512
23
24char bpf_log_buf[BPF_LOG_BUF_SIZE];
25
26struct sock_test {
27 const char *descr;
28 /* BPF prog properties */
29 struct bpf_insn insns[MAX_INSNS];
30 enum bpf_attach_type expected_attach_type;
31 enum bpf_attach_type attach_type;
32 /* Socket properties */
33 int domain;
34 int type;
35 /* Endpoint to bind() to */
36 const char *ip;
37 unsigned short port;
38 /* Expected test result */
39 enum {
40 LOAD_REJECT,
41 ATTACH_REJECT,
42 BIND_REJECT,
43 SUCCESS,
44 } result;
45};
46
47static struct sock_test tests[] = {
48 {
49 "bind4 load with invalid access: src_ip6",
50 .insns = {
51 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
52 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
53 offsetof(struct bpf_sock, src_ip6[0])),
54 BPF_MOV64_IMM(BPF_REG_0, 1),
55 BPF_EXIT_INSN(),
56 },
57 BPF_CGROUP_INET4_POST_BIND,
58 BPF_CGROUP_INET4_POST_BIND,
59 0,
60 0,
61 NULL,
62 0,
63 LOAD_REJECT,
64 },
65 {
66 "bind4 load with invalid access: mark",
67 .insns = {
68 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
69 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
70 offsetof(struct bpf_sock, mark)),
71 BPF_MOV64_IMM(BPF_REG_0, 1),
72 BPF_EXIT_INSN(),
73 },
74 BPF_CGROUP_INET4_POST_BIND,
75 BPF_CGROUP_INET4_POST_BIND,
76 0,
77 0,
78 NULL,
79 0,
80 LOAD_REJECT,
81 },
82 {
83 "bind6 load with invalid access: src_ip4",
84 .insns = {
85 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
86 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
87 offsetof(struct bpf_sock, src_ip4)),
88 BPF_MOV64_IMM(BPF_REG_0, 1),
89 BPF_EXIT_INSN(),
90 },
91 BPF_CGROUP_INET6_POST_BIND,
92 BPF_CGROUP_INET6_POST_BIND,
93 0,
94 0,
95 NULL,
96 0,
97 LOAD_REJECT,
98 },
99 {
100 "sock_create load with invalid access: src_port",
101 .insns = {
102 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
103 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
104 offsetof(struct bpf_sock, src_port)),
105 BPF_MOV64_IMM(BPF_REG_0, 1),
106 BPF_EXIT_INSN(),
107 },
108 BPF_CGROUP_INET_SOCK_CREATE,
109 BPF_CGROUP_INET_SOCK_CREATE,
110 0,
111 0,
112 NULL,
113 0,
114 LOAD_REJECT,
115 },
116 {
117 "sock_create load w/o expected_attach_type (compat mode)",
118 .insns = {
119 BPF_MOV64_IMM(BPF_REG_0, 1),
120 BPF_EXIT_INSN(),
121 },
122 0,
123 BPF_CGROUP_INET_SOCK_CREATE,
124 AF_INET,
125 SOCK_STREAM,
126 "127.0.0.1",
127 8097,
128 SUCCESS,
129 },
130 {
131 "sock_create load w/ expected_attach_type",
132 .insns = {
133 BPF_MOV64_IMM(BPF_REG_0, 1),
134 BPF_EXIT_INSN(),
135 },
136 BPF_CGROUP_INET_SOCK_CREATE,
137 BPF_CGROUP_INET_SOCK_CREATE,
138 AF_INET,
139 SOCK_STREAM,
140 "127.0.0.1",
141 8097,
142 SUCCESS,
143 },
144 {
145 "attach type mismatch bind4 vs bind6",
146 .insns = {
147 BPF_MOV64_IMM(BPF_REG_0, 1),
148 BPF_EXIT_INSN(),
149 },
150 BPF_CGROUP_INET4_POST_BIND,
151 BPF_CGROUP_INET6_POST_BIND,
152 0,
153 0,
154 NULL,
155 0,
156 ATTACH_REJECT,
157 },
158 {
159 "attach type mismatch bind6 vs bind4",
160 .insns = {
161 BPF_MOV64_IMM(BPF_REG_0, 1),
162 BPF_EXIT_INSN(),
163 },
164 BPF_CGROUP_INET6_POST_BIND,
165 BPF_CGROUP_INET4_POST_BIND,
166 0,
167 0,
168 NULL,
169 0,
170 ATTACH_REJECT,
171 },
172 {
173 "attach type mismatch default vs bind4",
174 .insns = {
175 BPF_MOV64_IMM(BPF_REG_0, 1),
176 BPF_EXIT_INSN(),
177 },
178 0,
179 BPF_CGROUP_INET4_POST_BIND,
180 0,
181 0,
182 NULL,
183 0,
184 ATTACH_REJECT,
185 },
186 {
187 "attach type mismatch bind6 vs sock_create",
188 .insns = {
189 BPF_MOV64_IMM(BPF_REG_0, 1),
190 BPF_EXIT_INSN(),
191 },
192 BPF_CGROUP_INET6_POST_BIND,
193 BPF_CGROUP_INET_SOCK_CREATE,
194 0,
195 0,
196 NULL,
197 0,
198 ATTACH_REJECT,
199 },
200 {
201 "bind4 reject all",
202 .insns = {
203 BPF_MOV64_IMM(BPF_REG_0, 0),
204 BPF_EXIT_INSN(),
205 },
206 BPF_CGROUP_INET4_POST_BIND,
207 BPF_CGROUP_INET4_POST_BIND,
208 AF_INET,
209 SOCK_STREAM,
210 "0.0.0.0",
211 0,
212 BIND_REJECT,
213 },
214 {
215 "bind6 reject all",
216 .insns = {
217 BPF_MOV64_IMM(BPF_REG_0, 0),
218 BPF_EXIT_INSN(),
219 },
220 BPF_CGROUP_INET6_POST_BIND,
221 BPF_CGROUP_INET6_POST_BIND,
222 AF_INET6,
223 SOCK_STREAM,
224 "::",
225 0,
226 BIND_REJECT,
227 },
228 {
229 "bind6 deny specific IP & port",
230 .insns = {
231 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
232
233 /* if (ip == expected && port == expected) */
234 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
235 offsetof(struct bpf_sock, src_ip6[3])),
236 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
237 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
238 offsetof(struct bpf_sock, src_port)),
239 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
240
241 /* return DENY; */
242 BPF_MOV64_IMM(BPF_REG_0, 0),
243 BPF_JMP_A(1),
244
245 /* else return ALLOW; */
246 BPF_MOV64_IMM(BPF_REG_0, 1),
247 BPF_EXIT_INSN(),
248 },
249 BPF_CGROUP_INET6_POST_BIND,
250 BPF_CGROUP_INET6_POST_BIND,
251 AF_INET6,
252 SOCK_STREAM,
253 "::1",
254 8193,
255 BIND_REJECT,
256 },
257 {
258 "bind4 allow specific IP & port",
259 .insns = {
260 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
261
262 /* if (ip == expected && port == expected) */
263 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
264 offsetof(struct bpf_sock, src_ip4)),
265 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
266 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
267 offsetof(struct bpf_sock, src_port)),
268 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
269
270 /* return ALLOW; */
271 BPF_MOV64_IMM(BPF_REG_0, 1),
272 BPF_JMP_A(1),
273
274 /* else return DENY; */
275 BPF_MOV64_IMM(BPF_REG_0, 0),
276 BPF_EXIT_INSN(),
277 },
278 BPF_CGROUP_INET4_POST_BIND,
279 BPF_CGROUP_INET4_POST_BIND,
280 AF_INET,
281 SOCK_STREAM,
282 "127.0.0.1",
283 4098,
284 SUCCESS,
285 },
286 {
287 "bind4 allow all",
288 .insns = {
289 BPF_MOV64_IMM(BPF_REG_0, 1),
290 BPF_EXIT_INSN(),
291 },
292 BPF_CGROUP_INET4_POST_BIND,
293 BPF_CGROUP_INET4_POST_BIND,
294 AF_INET,
295 SOCK_STREAM,
296 "0.0.0.0",
297 0,
298 SUCCESS,
299 },
300 {
301 "bind6 allow all",
302 .insns = {
303 BPF_MOV64_IMM(BPF_REG_0, 1),
304 BPF_EXIT_INSN(),
305 },
306 BPF_CGROUP_INET6_POST_BIND,
307 BPF_CGROUP_INET6_POST_BIND,
308 AF_INET6,
309 SOCK_STREAM,
310 "::",
311 0,
312 SUCCESS,
313 },
314};
315
316static size_t probe_prog_length(const struct bpf_insn *fp)
317{
318 size_t len;
319
320 for (len = MAX_INSNS - 1; len > 0; --len)
321 if (fp[len].code != 0 || fp[len].imm != 0)
322 break;
323 return len + 1;
324}
325
326static int load_sock_prog(const struct bpf_insn *prog,
327 enum bpf_attach_type attach_type)
328{
329 struct bpf_load_program_attr attr;
330
331 memset(&attr, 0, sizeof(struct bpf_load_program_attr));
332 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
333 attr.expected_attach_type = attach_type;
334 attr.insns = prog;
335 attr.insns_cnt = probe_prog_length(attr.insns);
336 attr.license = "GPL";
337
338 return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
339}
340
341static int attach_sock_prog(int cgfd, int progfd,
342 enum bpf_attach_type attach_type)
343{
344 return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
345}
346
347static int bind_sock(int domain, int type, const char *ip, unsigned short port)
348{
349 struct sockaddr_storage addr;
350 struct sockaddr_in6 *addr6;
351 struct sockaddr_in *addr4;
352 int sockfd = -1;
353 socklen_t len;
354 int err = 0;
355
356 sockfd = socket(domain, type, 0);
357 if (sockfd < 0)
358 goto err;
359
360 memset(&addr, 0, sizeof(addr));
361
362 if (domain == AF_INET) {
363 len = sizeof(struct sockaddr_in);
364 addr4 = (struct sockaddr_in *)&addr;
365 addr4->sin_family = domain;
366 addr4->sin_port = htons(port);
367 if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
368 goto err;
369 } else if (domain == AF_INET6) {
370 len = sizeof(struct sockaddr_in6);
371 addr6 = (struct sockaddr_in6 *)&addr;
372 addr6->sin6_family = domain;
373 addr6->sin6_port = htons(port);
374 if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
375 goto err;
376 } else {
377 goto err;
378 }
379
380 if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
381 goto err;
382
383 goto out;
384err:
385 err = -1;
386out:
387 close(sockfd);
388 return err;
389}
390
391static int run_test_case(int cgfd, const struct sock_test *test)
392{
393 int progfd = -1;
394 int err = 0;
395
396 printf("Test case: %s .. ", test->descr);
397 progfd = load_sock_prog(test->insns, test->expected_attach_type);
398 if (progfd < 0) {
399 if (test->result == LOAD_REJECT)
400 goto out;
401 else
402 goto err;
403 }
404
405 if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
406 if (test->result == ATTACH_REJECT)
407 goto out;
408 else
409 goto err;
410 }
411
412 if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
413 /* sys_bind() may fail for different reasons, errno has to be
414 * checked to confirm that BPF program rejected it.
415 */
416 if (test->result == BIND_REJECT && errno == EPERM)
417 goto out;
418 else
419 goto err;
420 }
421
422
423 if (test->result != SUCCESS)
424 goto err;
425
426 goto out;
427err:
428 err = -1;
429out:
430 /* Detaching w/o checking return code: best effort attempt. */
431 if (progfd != -1)
432 bpf_prog_detach(cgfd, test->attach_type);
433 close(progfd);
434 printf("[%s]\n", err ? "FAIL" : "PASS");
435 return err;
436}
437
438static int run_tests(int cgfd)
439{
440 int passes = 0;
441 int fails = 0;
442 int i;
443
444 for (i = 0; i < ARRAY_SIZE(tests); ++i) {
445 if (run_test_case(cgfd, &tests[i]))
446 ++fails;
447 else
448 ++passes;
449 }
450 printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
451 return fails ? -1 : 0;
452}
453
454int main(int argc, char **argv)
455{
456 int cgfd = -1;
457 int err = 0;
458
459 if (setup_cgroup_environment())
460 goto err;
461
462 cgfd = create_and_get_cgroup(CG_PATH);
463 if (!cgfd)
464 goto err;
465
466 if (join_cgroup(CG_PATH))
467 goto err;
468
469 if (run_tests(cgfd))
470 goto err;
471
472 goto out;
473err:
474 err = -1;
475out:
476 close(cgfd);
477 cleanup_cgroup_environment();
478 return err;
479}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000000..d488f20926e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,588 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
7
8#include <arpa/inet.h>
9#include <sys/types.h>
10#include <sys/socket.h>
11
12#include <linux/filter.h>
13
14#include <bpf/bpf.h>
15#include <bpf/libbpf.h>
16
17#include "cgroup_helpers.h"
18
19#define CG_PATH "/foo"
20#define CONNECT4_PROG_PATH "./connect4_prog.o"
21#define CONNECT6_PROG_PATH "./connect6_prog.o"
22
23#define SERV4_IP "192.168.1.254"
24#define SERV4_REWRITE_IP "127.0.0.1"
25#define SERV4_PORT 4040
26#define SERV4_REWRITE_PORT 4444
27
28#define SERV6_IP "face:b00c:1234:5678::abcd"
29#define SERV6_REWRITE_IP "::1"
30#define SERV6_PORT 6060
31#define SERV6_REWRITE_PORT 6666
32
33#define INET_NTOP_BUF 40
34
35typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
36typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
37
38struct program {
39 enum bpf_attach_type type;
40 load_fn loadfn;
41 int fd;
42 const char *name;
43 enum bpf_attach_type invalid_type;
44};
45
46char bpf_log_buf[BPF_LOG_BUF_SIZE];
47
48static int mk_sockaddr(int domain, const char *ip, unsigned short port,
49 struct sockaddr *addr, socklen_t addr_len)
50{
51 struct sockaddr_in6 *addr6;
52 struct sockaddr_in *addr4;
53
54 if (domain != AF_INET && domain != AF_INET6) {
55 log_err("Unsupported address family");
56 return -1;
57 }
58
59 memset(addr, 0, addr_len);
60
61 if (domain == AF_INET) {
62 if (addr_len < sizeof(struct sockaddr_in))
63 return -1;
64 addr4 = (struct sockaddr_in *)addr;
65 addr4->sin_family = domain;
66 addr4->sin_port = htons(port);
67 if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
68 log_err("Invalid IPv4: %s", ip);
69 return -1;
70 }
71 } else if (domain == AF_INET6) {
72 if (addr_len < sizeof(struct sockaddr_in6))
73 return -1;
74 addr6 = (struct sockaddr_in6 *)addr;
75 addr6->sin6_family = domain;
76 addr6->sin6_port = htons(port);
77 if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
78 log_err("Invalid IPv6: %s", ip);
79 return -1;
80 }
81 }
82
83 return 0;
84}
85
86static int load_insns(enum bpf_attach_type attach_type,
87 const struct bpf_insn *insns, size_t insns_cnt,
88 const char *comment)
89{
90 struct bpf_load_program_attr load_attr;
91 int ret;
92
93 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
94 load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
95 load_attr.expected_attach_type = attach_type;
96 load_attr.insns = insns;
97 load_attr.insns_cnt = insns_cnt;
98 load_attr.license = "GPL";
99
100 ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
101 if (ret < 0 && comment) {
102 log_err(">>> Loading %s program error.\n"
103 ">>> Output from verifier:\n%s\n-------\n",
104 comment, bpf_log_buf);
105 }
106
107 return ret;
108}
109
110/* [1] These testing programs try to read different context fields, including
111 * narrow loads of different sizes from user_ip4 and user_ip6, and write to
112 * those allowed to be overridden.
113 *
114 * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
115 * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
116 * in such cases since it accepts only _signed_ 32bit integer as IMM
117 * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
118 * to count jumps properly.
119 */
120
121static int bind4_prog_load(enum bpf_attach_type attach_type,
122 const char *comment)
123{
124 union {
125 uint8_t u4_addr8[4];
126 uint16_t u4_addr16[2];
127 uint32_t u4_addr32;
128 } ip4;
129 struct sockaddr_in addr4_rw;
130
131 if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
132 log_err("Invalid IPv4: %s", SERV4_IP);
133 return -1;
134 }
135
136 if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
137 (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
138 return -1;
139
140 /* See [1]. */
141 struct bpf_insn insns[] = {
142 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
143
144 /* if (sk.family == AF_INET && */
145 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
146 offsetof(struct bpf_sock_addr, family)),
147 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
148
149 /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
150 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
151 offsetof(struct bpf_sock_addr, type)),
152 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
153 BPF_JMP_A(1),
154 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
155
156 /* 1st_byte_of_user_ip4 == expected && */
157 BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
158 offsetof(struct bpf_sock_addr, user_ip4)),
159 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
160
161 /* 1st_half_of_user_ip4 == expected && */
162 BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
163 offsetof(struct bpf_sock_addr, user_ip4)),
164 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
165
166 /* whole_user_ip4 == expected) { */
167 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
168 offsetof(struct bpf_sock_addr, user_ip4)),
169 BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
170 BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
171
172 /* user_ip4 = addr4_rw.sin_addr */
173 BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
174 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
175 offsetof(struct bpf_sock_addr, user_ip4)),
176
177 /* user_port = addr4_rw.sin_port */
178 BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
179 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
180 offsetof(struct bpf_sock_addr, user_port)),
181 /* } */
182
183 /* return 1 */
184 BPF_MOV64_IMM(BPF_REG_0, 1),
185 BPF_EXIT_INSN(),
186 };
187
188 return load_insns(attach_type, insns,
189 sizeof(insns) / sizeof(struct bpf_insn), comment);
190}
191
192static int bind6_prog_load(enum bpf_attach_type attach_type,
193 const char *comment)
194{
195 struct sockaddr_in6 addr6_rw;
196 struct in6_addr ip6;
197
198 if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
199 log_err("Invalid IPv6: %s", SERV6_IP);
200 return -1;
201 }
202
203 if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
204 (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
205 return -1;
206
207 /* See [1]. */
208 struct bpf_insn insns[] = {
209 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
210
211 /* if (sk.family == AF_INET6 && */
212 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
213 offsetof(struct bpf_sock_addr, family)),
214 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
215
216 /* 5th_byte_of_user_ip6 == expected && */
217 BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
218 offsetof(struct bpf_sock_addr, user_ip6[1])),
219 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
220
221 /* 3rd_half_of_user_ip6 == expected && */
222 BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
223 offsetof(struct bpf_sock_addr, user_ip6[1])),
224 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
225
226 /* last_word_of_user_ip6 == expected) { */
227 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
228 offsetof(struct bpf_sock_addr, user_ip6[3])),
229 BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
230 BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
231
232
233#define STORE_IPV6_WORD(N) \
234 BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
235 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
236 offsetof(struct bpf_sock_addr, user_ip6[N]))
237
238 /* user_ip6 = addr6_rw.sin6_addr */
239 STORE_IPV6_WORD(0),
240 STORE_IPV6_WORD(1),
241 STORE_IPV6_WORD(2),
242 STORE_IPV6_WORD(3),
243
244 /* user_port = addr6_rw.sin6_port */
245 BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
246 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
247 offsetof(struct bpf_sock_addr, user_port)),
248
249 /* } */
250
251 /* return 1 */
252 BPF_MOV64_IMM(BPF_REG_0, 1),
253 BPF_EXIT_INSN(),
254 };
255
256 return load_insns(attach_type, insns,
257 sizeof(insns) / sizeof(struct bpf_insn), comment);
258}
259
260static int connect_prog_load_path(const char *path,
261 enum bpf_attach_type attach_type,
262 const char *comment)
263{
264 struct bpf_prog_load_attr attr;
265 struct bpf_object *obj;
266 int prog_fd;
267
268 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
269 attr.file = path;
270 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
271 attr.expected_attach_type = attach_type;
272
273 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
274 if (comment)
275 log_err(">>> Loading %s program at %s error.\n",
276 comment, path);
277 return -1;
278 }
279
280 return prog_fd;
281}
282
283static int connect4_prog_load(enum bpf_attach_type attach_type,
284 const char *comment)
285{
286 return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
287}
288
289static int connect6_prog_load(enum bpf_attach_type attach_type,
290 const char *comment)
291{
292 return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
293}
294
295static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
296{
297 char addr_buf[INET_NTOP_BUF];
298 struct sockaddr_storage addr;
299 struct sockaddr_in6 *addr6;
300 struct sockaddr_in *addr4;
301 socklen_t addr_len;
302 unsigned short port;
303 void *nip;
304
305 addr_len = sizeof(struct sockaddr_storage);
306 memset(&addr, 0, addr_len);
307
308 if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
309 if (addr.ss_family == AF_INET) {
310 addr4 = (struct sockaddr_in *)&addr;
311 nip = (void *)&addr4->sin_addr;
312 port = ntohs(addr4->sin_port);
313 } else if (addr.ss_family == AF_INET6) {
314 addr6 = (struct sockaddr_in6 *)&addr;
315 nip = (void *)&addr6->sin6_addr;
316 port = ntohs(addr6->sin6_port);
317 } else {
318 return;
319 }
320 const char *addr_str =
321 inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
322 printf(fmt, addr_str ? addr_str : "??", port);
323 }
324}
325
326static void print_local_ip_port(int sockfd, const char *fmt)
327{
328 print_ip_port(sockfd, getsockname, fmt);
329}
330
331static void print_remote_ip_port(int sockfd, const char *fmt)
332{
333 print_ip_port(sockfd, getpeername, fmt);
334}
335
336static int start_server(int type, const struct sockaddr_storage *addr,
337 socklen_t addr_len)
338{
339
340 int fd;
341
342 fd = socket(addr->ss_family, type, 0);
343 if (fd == -1) {
344 log_err("Failed to create server socket");
345 goto out;
346 }
347
348 if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
349 log_err("Failed to bind server socket");
350 goto close_out;
351 }
352
353 if (type == SOCK_STREAM) {
354 if (listen(fd, 128) == -1) {
355 log_err("Failed to listen on server socket");
356 goto close_out;
357 }
358 }
359
360 print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n");
361
362 goto out;
363close_out:
364 close(fd);
365 fd = -1;
366out:
367 return fd;
368}
369
370static int connect_to_server(int type, const struct sockaddr_storage *addr,
371 socklen_t addr_len)
372{
373 int domain;
374 int fd;
375
376 domain = addr->ss_family;
377
378 if (domain != AF_INET && domain != AF_INET6) {
379 log_err("Unsupported address family");
380 return -1;
381 }
382
383 fd = socket(domain, type, 0);
384 if (fd == -1) {
385 log_err("Failed to creating client socket");
386 return -1;
387 }
388
389 if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
390 log_err("Fail to connect to server");
391 goto err;
392 }
393
394 print_remote_ip_port(fd, "\t Actual: connect(%s, %d)");
395 print_local_ip_port(fd, " from (%s, %d)\n");
396
397 return 0;
398err:
399 close(fd);
400 return -1;
401}
402
403static void print_test_case_num(int domain, int type)
404{
405 static int test_num;
406
407 printf("Test case #%d (%s/%s):\n", ++test_num,
408 (domain == AF_INET ? "IPv4" :
409 domain == AF_INET6 ? "IPv6" :
410 "unknown_domain"),
411 (type == SOCK_STREAM ? "TCP" :
412 type == SOCK_DGRAM ? "UDP" :
413 "unknown_type"));
414}
415
416static int run_test_case(int domain, int type, const char *ip,
417 unsigned short port)
418{
419 struct sockaddr_storage addr;
420 socklen_t addr_len = sizeof(addr);
421 int servfd = -1;
422 int err = 0;
423
424 print_test_case_num(domain, type);
425
426 if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
427 addr_len) == -1)
428 return -1;
429
430 printf("\tRequested: bind(%s, %d) ..\n", ip, port);
431 servfd = start_server(type, &addr, addr_len);
432 if (servfd == -1)
433 goto err;
434
435 printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
436 if (connect_to_server(type, &addr, addr_len))
437 goto err;
438
439 goto out;
440err:
441 err = -1;
442out:
443 close(servfd);
444 return err;
445}
446
447static void close_progs_fds(struct program *progs, size_t prog_cnt)
448{
449 size_t i;
450
451 for (i = 0; i < prog_cnt; ++i) {
452 close(progs[i].fd);
453 progs[i].fd = -1;
454 }
455}
456
457static int load_and_attach_progs(int cgfd, struct program *progs,
458 size_t prog_cnt)
459{
460 size_t i;
461
462 for (i = 0; i < prog_cnt; ++i) {
463 printf("Load %s with invalid type (can pollute stderr) ",
464 progs[i].name);
465 fflush(stdout);
466 progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
467 if (progs[i].fd != -1) {
468 log_err("Load with invalid type accepted for %s",
469 progs[i].name);
470 goto err;
471 }
472 printf("... REJECTED\n");
473
474 printf("Load %s with valid type", progs[i].name);
475 progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
476 if (progs[i].fd == -1) {
477 log_err("Failed to load program %s", progs[i].name);
478 goto err;
479 }
480 printf(" ... OK\n");
481
482 printf("Attach %s with invalid type", progs[i].name);
483 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
484 BPF_F_ALLOW_OVERRIDE) != -1) {
485 log_err("Attach with invalid type accepted for %s",
486 progs[i].name);
487 goto err;
488 }
489 printf(" ... REJECTED\n");
490
491 printf("Attach %s with valid type", progs[i].name);
492 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
493 BPF_F_ALLOW_OVERRIDE) == -1) {
494 log_err("Failed to attach program %s", progs[i].name);
495 goto err;
496 }
497 printf(" ... OK\n");
498 }
499
500 return 0;
501err:
502 close_progs_fds(progs, prog_cnt);
503 return -1;
504}
505
506static int run_domain_test(int domain, int cgfd, struct program *progs,
507 size_t prog_cnt, const char *ip, unsigned short port)
508{
509 int err = 0;
510
511 if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
512 goto err;
513
514 if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
515 goto err;
516
517 if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
518 goto err;
519
520 goto out;
521err:
522 err = -1;
523out:
524 close_progs_fds(progs, prog_cnt);
525 return err;
526}
527
528static int run_test(void)
529{
530 size_t inet6_prog_cnt;
531 size_t inet_prog_cnt;
532 int cgfd = -1;
533 int err = 0;
534
535 struct program inet6_progs[] = {
536 {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
537 BPF_CGROUP_INET4_BIND},
538 {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
539 BPF_CGROUP_INET4_CONNECT},
540 };
541 inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
542
543 struct program inet_progs[] = {
544 {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
545 BPF_CGROUP_INET6_BIND},
546 {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
547 BPF_CGROUP_INET6_CONNECT},
548 };
549 inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
550
551 if (setup_cgroup_environment())
552 goto err;
553
554 cgfd = create_and_get_cgroup(CG_PATH);
555 if (!cgfd)
556 goto err;
557
558 if (join_cgroup(CG_PATH))
559 goto err;
560
561 if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
562 SERV4_PORT) == -1)
563 goto err;
564
565 if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
566 SERV6_IP, SERV6_PORT) == -1)
567 goto err;
568
569 goto out;
570err:
571 err = -1;
572out:
573 close(cgfd);
574 cleanup_cgroup_environment();
575 printf(err ? "### FAIL\n" : "### SUCCESS\n");
576 return err;
577}
578
579int main(int argc, char **argv)
580{
581 if (argc < 2) {
582 fprintf(stderr,
583 "%s has to be run via %s.sh. Skip direct run.\n",
584 argv[0], argv[0]);
585 exit(0);
586 }
587 return run_test();
588}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000000..c6e1dcf992c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
1#!/bin/sh
2
3set -eu
4
5ping_once()
6{
7 ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
8}
9
10wait_for_ip()
11{
12 local _i
13 echo -n "Wait for testing IPv4/IPv6 to become available "
14 for _i in $(seq ${MAX_PING_TRIES}); do
15 echo -n "."
16 if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
17 echo " OK"
18 return
19 fi
20 done
21 echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
22 exit 1
23}
24
25setup()
26{
27 # Create testing interfaces not to interfere with current environment.
28 ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
29 ip link set ${TEST_IF} up
30 ip link set ${TEST_IF_PEER} up
31
32 ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
33 ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
34 wait_for_ip
35}
36
37cleanup()
38{
39 ip link del ${TEST_IF} 2>/dev/null || :
40 ip link del ${TEST_IF_PEER} 2>/dev/null || :
41}
42
43main()
44{
45 trap cleanup EXIT 2 3 6 15
46 setup
47 ./test_sock_addr setup_done
48}
49
50BASENAME=$(basename $0 .sh)
51TEST_IF="${BASENAME}1"
52TEST_IF_PEER="${BASENAME}2"
53TEST_IPv4="127.0.0.4/8"
54TEST_IPv6="::6/128"
55MAX_PING_TRIES=5
56
57main