tcp: fix refcnt leak with ebpf congestion control

There are a few bugs around refcnt handling in the new BPF congestion control setsockopt: - The new ca is assigned to icsk->icsk_ca_ops even in the case where we cannot get a reference on it. This would lead to a use after free, since that ca is going away soon. - Changing the congestion control case doesn't release the refcnt on the previous ca. - In the reinit case, we first leak a reference on the old ca, then we call tcp_reinit_congestion_control on the ca that we have just assigned, leading to deinitializing the wrong ca (->release of the new ca on the old ca's data) and releasing the refcount on the ca that we actually want to use. This is visible by building (for example) BIC as a module and setting net.ipv4.tcp_congestion_control=bic, and using tcp_cong_kern.c from samples/bpf. This patch fixes the refcount issues, and moves reinit back into tcp core to avoid passing a ca pointer back to BPF. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Acked-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Sabrina Dubroca <sd@queasysnail.net> 2017-08-25 07:10:12 -0400
committer: David S. Miller <davem@davemloft.net> 2017-08-25 20:16:27 -0400
commit: ebfa00c5745660fe7f0a91eea88d4dff658486c4 (patch)
tree: 70f899bdadb25a073dc98f97bac41e60a149077a /net/ipv4/tcp_cong.c
parent: 3614364527daa870264f6dde77f02853cdecd02c (diff)
1 files changed, 14 insertions, 5 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..421ea1b918da 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
                INET_ECN_dontxmit(sk);
 }
-void tcp_reinit_congestion_control(struct sock *sk,
+static void tcp_reinit_congestion_control(struct sock *sk,
-                                   const struct tcp_congestion_ops *ca)
+                                          const struct tcp_congestion_ops *ca)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -338,7 +338,7 @@ out:
 * tcp_reinit_congestion_control (if the current congestion control was
 * already initialized.
 */
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_congestion_ops *ca;
@@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
        if (!ca) {
                err = -ENOENT;
        } else if (!load) {
-                icsk->icsk_ca_ops = ca;
+                const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
-                if (!try_module_get(ca->owner))
+                if (try_module_get(ca->owner)) {
+                        if (reinit) {
+                                tcp_reinit_congestion_control(sk, ca);
+                        } else {
+                                icsk->icsk_ca_ops = ca;
+                                module_put(old_ca->owner);
+                        }
+                } else {
                        err = -EBUSY;
+                }
        } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
                     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
                err = -EPERM;
author	Sabrina Dubroca <sd@queasysnail.net>	2017-08-25 07:10:12 -0400
committer	David S. Miller <davem@davemloft.net>	2017-08-25 20:16:27 -0400
commit	ebfa00c5745660fe7f0a91eea88d4dff658486c4 (patch)
tree	70f899bdadb25a073dc98f97bac41e60a149077a /net/ipv4/tcp_cong.c
parent	3614364527daa870264f6dde77f02853cdecd02c (diff)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index fde983f6376b..421ea1b918da 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
189	INET_ECN_dontxmit(sk);	189	INET_ECN_dontxmit(sk);
190	}	190	}
191		191
192	void tcp_reinit_congestion_control(struct sock *sk,	192	static void tcp_reinit_congestion_control(struct sock *sk,
193	const struct tcp_congestion_ops *ca)	193	const struct tcp_congestion_ops *ca)
194	{	194	{
195	struct inet_connection_sock *icsk = inet_csk(sk);	195	struct inet_connection_sock *icsk = inet_csk(sk);
196		196
@@ -338,7 +338,7 @@ out:
338	* tcp_reinit_congestion_control (if the current congestion control was	338	* tcp_reinit_congestion_control (if the current congestion control was
339	* already initialized.	339	* already initialized.
340	*/	340	*/
341	int tcp_set_congestion_control(struct sock sk, const char name, bool load)	341	int tcp_set_congestion_control(struct sock sk, const char name, bool load, bool reinit)
342	{	342	{
343	struct inet_connection_sock *icsk = inet_csk(sk);	343	struct inet_connection_sock *icsk = inet_csk(sk);
344	const struct tcp_congestion_ops *ca;	344	const struct tcp_congestion_ops *ca;
@@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock sk, const char name, bool load)
360	if (!ca) {	360	if (!ca) {
361	err = -ENOENT;	361	err = -ENOENT;
362	} else if (!load) {	362	} else if (!load) {
363	icsk->icsk_ca_ops = ca;	363	const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
364	if (!try_module_get(ca->owner))	364
		365	if (try_module_get(ca->owner)) {
		366	if (reinit) {
		367	tcp_reinit_congestion_control(sk, ca);
		368	} else {
		369	icsk->icsk_ca_ops = ca;
		370	module_put(old_ca->owner);
		371	}
		372	} else {
365	err = -EBUSY;	373	err = -EBUSY;
		374	}
366	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) \|\|	375	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) \|\|
367	ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {	376	ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
368	err = -EPERM;	377	err = -EPERM;