aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorBalazs Scheidler <bazsi@balabit.hu>2010-10-21 07:06:43 -0400
committerPatrick McHardy <kaber@trash.net>2010-10-21 07:06:43 -0400
commit093d282321daeb19c107e5f1f16d7f68484f3ade (patch)
tree36e9eed23573068819bf67a91caac6ebf60d0d7c /net
parent6006db84a91838813cdad8a6622a4e39efe9ea47 (diff)
tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()
When __inet_inherit_port() is called on a tproxy connection the wrong locks are held for the inet_bind_bucket it is added to. __inet_inherit_port() made an implicit assumption that the listener's port number (and thus its bind bucket). Unfortunately, if you're using the TPROXY target to redirect skbs to a transparent proxy that assumption is not true anymore and things break. This patch adds code to __inet_inherit_port() so that it can handle this case by looking up or creating a new bind bucket for the child socket and updates callers of __inet_inherit_port() to gracefully handle __inet_inherit_port() failing. Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>. See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion. Signed-off-by: KOVACS Krisztian <hidden@balabit.hu> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv6/tcp_ipv6.c12
5 files changed, 55 insertions, 15 deletions
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f391..3f69ea114829 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
392 392
393 newsk = dccp_create_openreq_child(sk, req, skb); 393 newsk = dccp_create_openreq_child(sk, req, skb);
394 if (newsk == NULL) 394 if (newsk == NULL)
395 goto exit; 395 goto exit_nonewsk;
396 396
397 sk_setup_caps(newsk, dst); 397 sk_setup_caps(newsk, dst);
398 398
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
409 409
410 dccp_sync_mss(newsk, dst_mtu(dst)); 410 dccp_sync_mss(newsk, dst_mtu(dst));
411 411
412 if (__inet_inherit_port(sk, newsk) < 0) {
413 sock_put(newsk);
414 goto exit;
415 }
412 __inet_hash_nolisten(newsk, NULL); 416 __inet_hash_nolisten(newsk, NULL);
413 __inet_inherit_port(sk, newsk);
414 417
415 return newsk; 418 return newsk;
416 419
417exit_overflow: 420exit_overflow:
418 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 421 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
422exit_nonewsk:
423 dst_release(dst);
419exit: 424exit:
420 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
421 dst_release(dst);
422 return NULL; 426 return NULL;
423} 427}
424 428
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df7..dca711df9b60 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
564 564
565 newsk = dccp_create_openreq_child(sk, req, skb); 565 newsk = dccp_create_openreq_child(sk, req, skb);
566 if (newsk == NULL) 566 if (newsk == NULL)
567 goto out; 567 goto out_nonewsk;
568 568
569 /* 569 /*
570 * No need to charge this sock to the relevant IPv6 refcnt debug socks 570 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
634 634
635 if (__inet_inherit_port(sk, newsk) < 0) {
636 sock_put(newsk);
637 goto out;
638 }
635 __inet6_hash(newsk, NULL); 639 __inet6_hash(newsk, NULL);
636 __inet_inherit_port(sk, newsk);
637 640
638 return newsk; 641 return newsk;
639 642
640out_overflow: 643out_overflow:
641 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
645out_nonewsk:
646 dst_release(dst);
642out: 647out:
643 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 648 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
644 if (opt != NULL && opt != np->opt) 649 if (opt != NULL && opt != np->opt)
645 sock_kfree_s(sk, opt, opt->tot_len); 650 sock_kfree_s(sk, opt, opt->tot_len);
646 dst_release(dst);
647 return NULL; 651 return NULL;
648} 652}
649 653
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff3..1b344f30b463 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
101} 101}
102EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
103 103
104void __inet_inherit_port(struct sock *sk, struct sock *child) 104int __inet_inherit_port(struct sock *sk, struct sock *child)
105{ 105{
106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
107 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, 107 unsigned short port = inet_sk(child)->inet_num;
108 const int bhash = inet_bhashfn(sock_net(sk), port,
108 table->bhash_size); 109 table->bhash_size);
109 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
110 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
111 112
112 spin_lock(&head->lock); 113 spin_lock(&head->lock);
113 tb = inet_csk(sk)->icsk_bind_hash; 114 tb = inet_csk(sk)->icsk_bind_hash;
115 if (tb->port != port) {
116 /* NOTE: using tproxy and redirecting skbs to a proxy
117 * on a different listener port breaks the assumption
118 * that the listener socket's icsk_bind_hash is the same
119 * as that of the child socket. We have to look up or
120 * create a new bind bucket for the child here. */
121 struct hlist_node *node;
122 inet_bind_bucket_for_each(tb, node, &head->chain) {
123 if (net_eq(ib_net(tb), sock_net(sk)) &&
124 tb->port == port)
125 break;
126 }
127 if (!node) {
128 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
129 sock_net(sk), head, port);
130 if (!tb) {
131 spin_unlock(&head->lock);
132 return -ENOMEM;
133 }
134 }
135 }
114 sk_add_bind_node(child, &tb->owners); 136 sk_add_bind_node(child, &tb->owners);
115 inet_csk(child)->icsk_bind_hash = tb; 137 inet_csk(child)->icsk_bind_hash = tb;
116 spin_unlock(&head->lock); 138 spin_unlock(&head->lock);
139
140 return 0;
117} 141}
118EXPORT_SYMBOL_GPL(__inet_inherit_port); 142EXPORT_SYMBOL_GPL(__inet_inherit_port);
119 143
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a0232f3a358b..8f8527d41682 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1422 1422
1423 newsk = tcp_create_openreq_child(sk, req, skb); 1423 newsk = tcp_create_openreq_child(sk, req, skb);
1424 if (!newsk) 1424 if (!newsk)
1425 goto exit; 1425 goto exit_nonewsk;
1426 1426
1427 newsk->sk_gso_type = SKB_GSO_TCPV4; 1427 newsk->sk_gso_type = SKB_GSO_TCPV4;
1428 sk_setup_caps(newsk, dst); 1428 sk_setup_caps(newsk, dst);
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 } 1469 }
1470#endif 1470#endif
1471 1471
1472 if (__inet_inherit_port(sk, newsk) < 0) {
1473 sock_put(newsk);
1474 goto exit;
1475 }
1472 __inet_hash_nolisten(newsk, NULL); 1476 __inet_hash_nolisten(newsk, NULL);
1473 __inet_inherit_port(sk, newsk);
1474 1477
1475 return newsk; 1478 return newsk;
1476 1479
1477exit_overflow: 1480exit_overflow:
1478 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1482exit_nonewsk:
1483 dst_release(dst);
1479exit: 1484exit:
1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1485 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1481 dst_release(dst);
1482 return NULL; 1486 return NULL;
1483} 1487}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1488EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..ba5258ef1c57 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1409 1409
1410 newsk = tcp_create_openreq_child(sk, req, skb); 1410 newsk = tcp_create_openreq_child(sk, req, skb);
1411 if (newsk == NULL) 1411 if (newsk == NULL)
1412 goto out; 1412 goto out_nonewsk;
1413 1413
1414 /* 1414 /*
1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1497 } 1497 }
1498#endif 1498#endif
1499 1499
1500 if (__inet_inherit_port(sk, newsk) < 0) {
1501 sock_put(newsk);
1502 goto out;
1503 }
1500 __inet6_hash(newsk, NULL); 1504 __inet6_hash(newsk, NULL);
1501 __inet_inherit_port(sk, newsk);
1502 1505
1503 return newsk; 1506 return newsk;
1504 1507
1505out_overflow: 1508out_overflow:
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1509 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507out: 1510out_nonewsk:
1508 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1509 if (opt && opt != np->opt) 1511 if (opt && opt != np->opt)
1510 sock_kfree_s(sk, opt, opt->tot_len); 1512 sock_kfree_s(sk, opt, opt->tot_len);
1511 dst_release(dst); 1513 dst_release(dst);
1514out:
1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1512 return NULL; 1516 return NULL;
1513} 1517}
1514 1518