aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSabrina Dubroca <sd@queasysnail.net>2017-08-25 07:10:12 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-25 20:16:27 -0400
commitebfa00c5745660fe7f0a91eea88d4dff658486c4 (patch)
tree70f899bdadb25a073dc98f97bac41e60a149077a
parent3614364527daa870264f6dde77f02853cdecd02c (diff)
tcp: fix refcnt leak with ebpf congestion control
There are a few bugs around refcnt handling in the new BPF congestion control setsockopt: - The new ca is assigned to icsk->icsk_ca_ops even in the case where we cannot get a reference on it. This would lead to a use after free, since that ca is going away soon. - Changing the congestion control case doesn't release the refcnt on the previous ca. - In the reinit case, we first leak a reference on the old ca, then we call tcp_reinit_congestion_control on the ca that we have just assigned, leading to deinitializing the wrong ca (->release of the new ca on the old ca's data) and releasing the refcount on the ca that we actually want to use. This is visible by building (for example) BIC as a module and setting net.ipv4.tcp_congestion_control=bic, and using tcp_cong_kern.c from samples/bpf. This patch fixes the refcount issues, and moves reinit back into tcp core to avoid passing a ca pointer back to BPF. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Acked-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h4
-rw-r--r--net/core/filter.c7
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_cong.c19
4 files changed, 18 insertions, 14 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ada65e767b28..f642a39f9eee 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1004,9 +1004,7 @@ void tcp_get_default_congestion_control(char *name);
1004void tcp_get_available_congestion_control(char *buf, size_t len); 1004void tcp_get_available_congestion_control(char *buf, size_t len);
1005void tcp_get_allowed_congestion_control(char *buf, size_t len); 1005void tcp_get_allowed_congestion_control(char *buf, size_t len);
1006int tcp_set_allowed_congestion_control(char *allowed); 1006int tcp_set_allowed_congestion_control(char *allowed);
1007int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); 1007int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit);
1008void tcp_reinit_congestion_control(struct sock *sk,
1009 const struct tcp_congestion_ops *ca);
1010u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); 1008u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
1011void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); 1009void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
1012 1010
diff --git a/net/core/filter.c b/net/core/filter.c
index 8eb81e5fae08..169974998c76 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2836,15 +2836,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
2836 sk->sk_prot->setsockopt == tcp_setsockopt) { 2836 sk->sk_prot->setsockopt == tcp_setsockopt) {
2837 if (optname == TCP_CONGESTION) { 2837 if (optname == TCP_CONGESTION) {
2838 char name[TCP_CA_NAME_MAX]; 2838 char name[TCP_CA_NAME_MAX];
2839 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
2839 2840
2840 strncpy(name, optval, min_t(long, optlen, 2841 strncpy(name, optval, min_t(long, optlen,
2841 TCP_CA_NAME_MAX-1)); 2842 TCP_CA_NAME_MAX-1));
2842 name[TCP_CA_NAME_MAX-1] = 0; 2843 name[TCP_CA_NAME_MAX-1] = 0;
2843 ret = tcp_set_congestion_control(sk, name, false); 2844 ret = tcp_set_congestion_control(sk, name, false, reinit);
2844 if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
2845 /* replacing an existing ca */
2846 tcp_reinit_congestion_control(sk,
2847 inet_csk(sk)->icsk_ca_ops);
2848 } else { 2845 } else {
2849 struct tcp_sock *tp = tcp_sk(sk); 2846 struct tcp_sock *tp = tcp_sk(sk);
2850 2847
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..a3e91b552edc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2481 name[val] = 0; 2481 name[val] = 0;
2482 2482
2483 lock_sock(sk); 2483 lock_sock(sk);
2484 err = tcp_set_congestion_control(sk, name, true); 2484 err = tcp_set_congestion_control(sk, name, true, true);
2485 release_sock(sk); 2485 release_sock(sk);
2486 return err; 2486 return err;
2487 } 2487 }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..421ea1b918da 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
189 INET_ECN_dontxmit(sk); 189 INET_ECN_dontxmit(sk);
190} 190}
191 191
192void tcp_reinit_congestion_control(struct sock *sk, 192static void tcp_reinit_congestion_control(struct sock *sk,
193 const struct tcp_congestion_ops *ca) 193 const struct tcp_congestion_ops *ca)
194{ 194{
195 struct inet_connection_sock *icsk = inet_csk(sk); 195 struct inet_connection_sock *icsk = inet_csk(sk);
196 196
@@ -338,7 +338,7 @@ out:
338 * tcp_reinit_congestion_control (if the current congestion control was 338 * tcp_reinit_congestion_control (if the current congestion control was
339 * already initialized. 339 * already initialized.
340 */ 340 */
341int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) 341int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
342{ 342{
343 struct inet_connection_sock *icsk = inet_csk(sk); 343 struct inet_connection_sock *icsk = inet_csk(sk);
344 const struct tcp_congestion_ops *ca; 344 const struct tcp_congestion_ops *ca;
@@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
360 if (!ca) { 360 if (!ca) {
361 err = -ENOENT; 361 err = -ENOENT;
362 } else if (!load) { 362 } else if (!load) {
363 icsk->icsk_ca_ops = ca; 363 const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
364 if (!try_module_get(ca->owner)) 364
365 if (try_module_get(ca->owner)) {
366 if (reinit) {
367 tcp_reinit_congestion_control(sk, ca);
368 } else {
369 icsk->icsk_ca_ops = ca;
370 module_put(old_ca->owner);
371 }
372 } else {
365 err = -EBUSY; 373 err = -EBUSY;
374 }
366 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || 375 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
367 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { 376 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
368 err = -EPERM; 377 err = -EPERM;