diff options
author | Eric Dumazet <edumazet@google.com> | 2019-07-18 22:28:14 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-07-18 23:33:48 -0400 |
commit | 8d650cdedaabb33e85e9b7c517c0c71fcecc1de9 (patch) | |
tree | ab0428e5e973eb2b4e8b1f4859a508e2c0d07859 | |
parent | 269b7c5ff78264e3728b95828d219e0e0eeaec94 (diff) |
tcp: fix tcp_set_congestion_control() use from bpf hook
Neal reported incorrect use of ns_capable() from bpf hook.
bpf_setsockopt(...TCP_CONGESTION...)
-> tcp_set_congestion_control()
-> ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)
-> ns_capable_common()
-> current_cred()
-> rcu_dereference_protected(current->cred, 1)
Accessing 'current' in bpf context makes no sense, since packets
are processed from softirq context.
As Neal stated : The capability check in tcp_set_congestion_control()
was written assuming a system call context, and then was reused from
a BPF call site.
The fix is to add a new parameter to tcp_set_congestion_control(),
so that the ns_capable() call is only performed under the right
context.
Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | net/core/filter.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 6 |
4 files changed, 9 insertions, 6 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index cca3c59b98bf..f42d300f0cfa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1064,7 +1064,8 @@ void tcp_get_default_congestion_control(struct net *net, char *name); | |||
1064 | void tcp_get_available_congestion_control(char *buf, size_t len); | 1064 | void tcp_get_available_congestion_control(char *buf, size_t len); |
1065 | void tcp_get_allowed_congestion_control(char *buf, size_t len); | 1065 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
1066 | int tcp_set_allowed_congestion_control(char *allowed); | 1066 | int tcp_set_allowed_congestion_control(char *allowed); |
1067 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); | 1067 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, |
1068 | bool reinit, bool cap_net_admin); | ||
1068 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); | 1069 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); |
1069 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); | 1070 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); |
1070 | 1071 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 0f6854ccf894..4e2a79b2fd77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -4335,7 +4335,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, | |||
4335 | TCP_CA_NAME_MAX-1)); | 4335 | TCP_CA_NAME_MAX-1)); |
4336 | name[TCP_CA_NAME_MAX-1] = 0; | 4336 | name[TCP_CA_NAME_MAX-1] = 0; |
4337 | ret = tcp_set_congestion_control(sk, name, false, | 4337 | ret = tcp_set_congestion_control(sk, name, false, |
4338 | reinit); | 4338 | reinit, true); |
4339 | } else { | 4339 | } else { |
4340 | struct tcp_sock *tp = tcp_sk(sk); | 4340 | struct tcp_sock *tp = tcp_sk(sk); |
4341 | 4341 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7846afacdf0b..776905899ac0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2785,7 +2785,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2785 | name[val] = 0; | 2785 | name[val] = 0; |
2786 | 2786 | ||
2787 | lock_sock(sk); | 2787 | lock_sock(sk); |
2788 | err = tcp_set_congestion_control(sk, name, true, true); | 2788 | err = tcp_set_congestion_control(sk, name, true, true, |
2789 | ns_capable(sock_net(sk)->user_ns, | ||
2790 | CAP_NET_ADMIN)); | ||
2789 | release_sock(sk); | 2791 | release_sock(sk); |
2790 | return err; | 2792 | return err; |
2791 | } | 2793 | } |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index e1862b64a90f..c445a81d144e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -333,7 +333,8 @@ out: | |||
333 | * tcp_reinit_congestion_control (if the current congestion control was | 333 | * tcp_reinit_congestion_control (if the current congestion control was |
334 | * already initialized. | 334 | * already initialized. |
335 | */ | 335 | */ |
336 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) | 336 | int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, |
337 | bool reinit, bool cap_net_admin) | ||
337 | { | 338 | { |
338 | struct inet_connection_sock *icsk = inet_csk(sk); | 339 | struct inet_connection_sock *icsk = inet_csk(sk); |
339 | const struct tcp_congestion_ops *ca; | 340 | const struct tcp_congestion_ops *ca; |
@@ -369,8 +370,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo | |||
369 | } else { | 370 | } else { |
370 | err = -EBUSY; | 371 | err = -EBUSY; |
371 | } | 372 | } |
372 | } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || | 373 | } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) { |
373 | ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { | ||
374 | err = -EPERM; | 374 | err = -EPERM; |
375 | } else if (!try_module_get(ca->owner)) { | 375 | } else if (!try_module_get(ca->owner)) { |
376 | err = -EBUSY; | 376 | err = -EBUSY; |