aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorAndrey Ignatov <rdna@fb.com>2018-03-30 18:08:05 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-03-30 20:15:54 -0400
commitd74bad4e74ee373787a9ae24197c17b7cdc428d5 (patch)
tree568fbc1dc8c41b35e442226ca15ef0c16cc2996e /kernel/bpf/syscall.c
parent3679d585bbc07a1ac4448d5b478b492cad3587ce (diff)
bpf: Hooks for sys_connect
== The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2cad66a4cacb..cf1b29bc0ab8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1180,6 +1180,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
1180 switch (expected_attach_type) { 1180 switch (expected_attach_type) {
1181 case BPF_CGROUP_INET4_BIND: 1181 case BPF_CGROUP_INET4_BIND:
1182 case BPF_CGROUP_INET6_BIND: 1182 case BPF_CGROUP_INET6_BIND:
1183 case BPF_CGROUP_INET4_CONNECT:
1184 case BPF_CGROUP_INET6_CONNECT:
1183 return 0; 1185 return 0;
1184 default: 1186 default:
1185 return -EINVAL; 1187 return -EINVAL;
@@ -1491,6 +1493,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1491 break; 1493 break;
1492 case BPF_CGROUP_INET4_BIND: 1494 case BPF_CGROUP_INET4_BIND:
1493 case BPF_CGROUP_INET6_BIND: 1495 case BPF_CGROUP_INET6_BIND:
1496 case BPF_CGROUP_INET4_CONNECT:
1497 case BPF_CGROUP_INET6_CONNECT:
1494 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 1498 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1495 break; 1499 break;
1496 case BPF_CGROUP_SOCK_OPS: 1500 case BPF_CGROUP_SOCK_OPS:
@@ -1557,6 +1561,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1557 break; 1561 break;
1558 case BPF_CGROUP_INET4_BIND: 1562 case BPF_CGROUP_INET4_BIND:
1559 case BPF_CGROUP_INET6_BIND: 1563 case BPF_CGROUP_INET6_BIND:
1564 case BPF_CGROUP_INET4_CONNECT:
1565 case BPF_CGROUP_INET6_CONNECT:
1560 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 1566 ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1561 break; 1567 break;
1562 case BPF_CGROUP_SOCK_OPS: 1568 case BPF_CGROUP_SOCK_OPS:
@@ -1610,6 +1616,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
1610 case BPF_CGROUP_INET_SOCK_CREATE: 1616 case BPF_CGROUP_INET_SOCK_CREATE:
1611 case BPF_CGROUP_INET4_BIND: 1617 case BPF_CGROUP_INET4_BIND:
1612 case BPF_CGROUP_INET6_BIND: 1618 case BPF_CGROUP_INET6_BIND:
1619 case BPF_CGROUP_INET4_CONNECT:
1620 case BPF_CGROUP_INET6_CONNECT:
1613 case BPF_CGROUP_SOCK_OPS: 1621 case BPF_CGROUP_SOCK_OPS:
1614 case BPF_CGROUP_DEVICE: 1622 case BPF_CGROUP_DEVICE:
1615 break; 1623 break;