summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-07-18 22:28:14 -0400
committerDavid S. Miller <davem@davemloft.net>2019-07-18 23:33:48 -0400
commit8d650cdedaabb33e85e9b7c517c0c71fcecc1de9 (patch)
treeab0428e5e973eb2b4e8b1f4859a508e2c0d07859
parent269b7c5ff78264e3728b95828d219e0e0eeaec94 (diff)
tcp: fix tcp_set_congestion_control() use from bpf hook
Neal reported incorrect use of ns_capable() from bpf hook. bpf_setsockopt(...TCP_CONGESTION...) -> tcp_set_congestion_control() -> ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) -> ns_capable_common() -> current_cred() -> rcu_dereference_protected(current->cred, 1) Accessing 'current' in bpf context makes no sense, since packets are processed from softirq context. As Neal stated : The capability check in tcp_set_congestion_control() was written assuming a system call context, and then was reused from a BPF call site. The fix is to add a new parameter to tcp_set_congestion_control(), so that the ns_capable() call is only performed under the right context. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Lawrence Brakmo <brakmo@fb.com> Reported-by: Neal Cardwell <ncardwell@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/core/filter.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c6
4 files changed, 9 insertions, 6 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cca3c59b98bf..f42d300f0cfa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1064,7 +1064,8 @@ void tcp_get_default_congestion_control(struct net *net, char *name);
1064void tcp_get_available_congestion_control(char *buf, size_t len); 1064void tcp_get_available_congestion_control(char *buf, size_t len);
1065void tcp_get_allowed_congestion_control(char *buf, size_t len); 1065void tcp_get_allowed_congestion_control(char *buf, size_t len);
1066int tcp_set_allowed_congestion_control(char *allowed); 1066int tcp_set_allowed_congestion_control(char *allowed);
1067int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); 1067int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
1068 bool reinit, bool cap_net_admin);
1068u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); 1069u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
1069void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); 1070void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
1070 1071
diff --git a/net/core/filter.c b/net/core/filter.c
index 0f6854ccf894..4e2a79b2fd77 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4335,7 +4335,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4335 TCP_CA_NAME_MAX-1)); 4335 TCP_CA_NAME_MAX-1));
4336 name[TCP_CA_NAME_MAX-1] = 0; 4336 name[TCP_CA_NAME_MAX-1] = 0;
4337 ret = tcp_set_congestion_control(sk, name, false, 4337 ret = tcp_set_congestion_control(sk, name, false,
4338 reinit); 4338 reinit, true);
4339 } else { 4339 } else {
4340 struct tcp_sock *tp = tcp_sk(sk); 4340 struct tcp_sock *tp = tcp_sk(sk);
4341 4341
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7846afacdf0b..776905899ac0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2785,7 +2785,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2785 name[val] = 0; 2785 name[val] = 0;
2786 2786
2787 lock_sock(sk); 2787 lock_sock(sk);
2788 err = tcp_set_congestion_control(sk, name, true, true); 2788 err = tcp_set_congestion_control(sk, name, true, true,
2789 ns_capable(sock_net(sk)->user_ns,
2790 CAP_NET_ADMIN));
2789 release_sock(sk); 2791 release_sock(sk);
2790 return err; 2792 return err;
2791 } 2793 }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e1862b64a90f..c445a81d144e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -333,7 +333,8 @@ out:
333 * tcp_reinit_congestion_control (if the current congestion control was 333 * tcp_reinit_congestion_control (if the current congestion control was
334 * already initialized. 334 * already initialized.
335 */ 335 */
336int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) 336int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
337 bool reinit, bool cap_net_admin)
337{ 338{
338 struct inet_connection_sock *icsk = inet_csk(sk); 339 struct inet_connection_sock *icsk = inet_csk(sk);
339 const struct tcp_congestion_ops *ca; 340 const struct tcp_congestion_ops *ca;
@@ -369,8 +370,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
369 } else { 370 } else {
370 err = -EBUSY; 371 err = -EBUSY;
371 } 372 }
372 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || 373 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
373 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
374 err = -EPERM; 374 err = -EPERM;
375 } else if (!try_module_get(ca->owner)) { 375 } else if (!try_module_get(ca->owner)) {
376 err = -EBUSY; 376 err = -EBUSY;