aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_cong.c
diff options
context:
space:
mode:
authorSabrina Dubroca <sd@queasysnail.net>2017-08-25 07:10:12 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-25 20:16:27 -0400
commitebfa00c5745660fe7f0a91eea88d4dff658486c4 (patch)
tree70f899bdadb25a073dc98f97bac41e60a149077a /net/ipv4/tcp_cong.c
parent3614364527daa870264f6dde77f02853cdecd02c (diff)
tcp: fix refcnt leak with ebpf congestion control
There are a few bugs around refcnt handling in the new BPF congestion control setsockopt: - The new ca is assigned to icsk->icsk_ca_ops even in the case where we cannot get a reference on it. This would lead to a use after free, since that ca is going away soon. - Changing the congestion control case doesn't release the refcnt on the previous ca. - In the reinit case, we first leak a reference on the old ca, then we call tcp_reinit_congestion_control on the ca that we have just assigned, leading to deinitializing the wrong ca (->release of the new ca on the old ca's data) and releasing the refcount on the ca that we actually want to use. This is visible by building (for example) BIC as a module and setting net.ipv4.tcp_congestion_control=bic, and using tcp_cong_kern.c from samples/bpf. This patch fixes the refcount issues, and moves reinit back into tcp core to avoid passing a ca pointer back to BPF. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Acked-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_cong.c')
-rw-r--r--net/ipv4/tcp_cong.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..421ea1b918da 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
189 INET_ECN_dontxmit(sk); 189 INET_ECN_dontxmit(sk);
190} 190}
191 191
192void tcp_reinit_congestion_control(struct sock *sk, 192static void tcp_reinit_congestion_control(struct sock *sk,
193 const struct tcp_congestion_ops *ca) 193 const struct tcp_congestion_ops *ca)
194{ 194{
195 struct inet_connection_sock *icsk = inet_csk(sk); 195 struct inet_connection_sock *icsk = inet_csk(sk);
196 196
@@ -338,7 +338,7 @@ out:
338 * tcp_reinit_congestion_control (if the current congestion control was 338 * tcp_reinit_congestion_control (if the current congestion control was
339 * already initialized. 339 * already initialized.
340 */ 340 */
341int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) 341int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
342{ 342{
343 struct inet_connection_sock *icsk = inet_csk(sk); 343 struct inet_connection_sock *icsk = inet_csk(sk);
344 const struct tcp_congestion_ops *ca; 344 const struct tcp_congestion_ops *ca;
@@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
360 if (!ca) { 360 if (!ca) {
361 err = -ENOENT; 361 err = -ENOENT;
362 } else if (!load) { 362 } else if (!load) {
363 icsk->icsk_ca_ops = ca; 363 const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
364 if (!try_module_get(ca->owner)) 364
365 if (try_module_get(ca->owner)) {
366 if (reinit) {
367 tcp_reinit_congestion_control(sk, ca);
368 } else {
369 icsk->icsk_ca_ops = ca;
370 module_put(old_ca->owner);
371 }
372 } else {
365 err = -EBUSY; 373 err = -EBUSY;
374 }
366 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || 375 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
367 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { 376 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
368 err = -EPERM; 377 err = -EPERM;