diff options
author | Balazs Scheidler <bazsi@balabit.hu> | 2010-10-21 07:06:43 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-10-21 07:06:43 -0400 |
commit | 093d282321daeb19c107e5f1f16d7f68484f3ade (patch) | |
tree | 36e9eed23573068819bf67a91caac6ebf60d0d7c /net | |
parent | 6006db84a91838813cdad8a6622a4e39efe9ea47 (diff) |
tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()
When __inet_inherit_port() is called on a tproxy connection the wrong locks are
held for the inet_bind_bucket it is added to. __inet_inherit_port() made an
implicit assumption that the listener's port number (and thus its bind bucket).
Unfortunately, if you're using the TPROXY target to redirect skbs to a
transparent proxy that assumption is not true anymore and things break.
This patch adds code to __inet_inherit_port() so that it can handle this case
by looking up or creating a new bind bucket for the child socket and updates
callers of __inet_inherit_port() to gracefully handle __inet_inherit_port()
failing.
Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>.
See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion.
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/dccp/ipv4.c | 10 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 10 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 10 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 12 |
5 files changed, 55 insertions, 15 deletions
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d4a166f0f391..3f69ea114829 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
392 | 392 | ||
393 | newsk = dccp_create_openreq_child(sk, req, skb); | 393 | newsk = dccp_create_openreq_child(sk, req, skb); |
394 | if (newsk == NULL) | 394 | if (newsk == NULL) |
395 | goto exit; | 395 | goto exit_nonewsk; |
396 | 396 | ||
397 | sk_setup_caps(newsk, dst); | 397 | sk_setup_caps(newsk, dst); |
398 | 398 | ||
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
409 | 409 | ||
410 | dccp_sync_mss(newsk, dst_mtu(dst)); | 410 | dccp_sync_mss(newsk, dst_mtu(dst)); |
411 | 411 | ||
412 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
413 | sock_put(newsk); | ||
414 | goto exit; | ||
415 | } | ||
412 | __inet_hash_nolisten(newsk, NULL); | 416 | __inet_hash_nolisten(newsk, NULL); |
413 | __inet_inherit_port(sk, newsk); | ||
414 | 417 | ||
415 | return newsk; | 418 | return newsk; |
416 | 419 | ||
417 | exit_overflow: | 420 | exit_overflow: |
418 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 421 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
422 | exit_nonewsk: | ||
423 | dst_release(dst); | ||
419 | exit: | 424 | exit: |
420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 425 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
421 | dst_release(dst); | ||
422 | return NULL; | 426 | return NULL; |
423 | } | 427 | } |
424 | 428 | ||
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e3f32575df7..dca711df9b60 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
564 | 564 | ||
565 | newsk = dccp_create_openreq_child(sk, req, skb); | 565 | newsk = dccp_create_openreq_child(sk, req, skb); |
566 | if (newsk == NULL) | 566 | if (newsk == NULL) |
567 | goto out; | 567 | goto out_nonewsk; |
568 | 568 | ||
569 | /* | 569 | /* |
570 | * No need to charge this sock to the relevant IPv6 refcnt debug socks | 570 | * No need to charge this sock to the relevant IPv6 refcnt debug socks |
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
632 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; | 632 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; |
633 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; | 633 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; |
634 | 634 | ||
635 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
636 | sock_put(newsk); | ||
637 | goto out; | ||
638 | } | ||
635 | __inet6_hash(newsk, NULL); | 639 | __inet6_hash(newsk, NULL); |
636 | __inet_inherit_port(sk, newsk); | ||
637 | 640 | ||
638 | return newsk; | 641 | return newsk; |
639 | 642 | ||
640 | out_overflow: | 643 | out_overflow: |
641 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 644 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
645 | out_nonewsk: | ||
646 | dst_release(dst); | ||
642 | out: | 647 | out: |
643 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 648 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
644 | if (opt != NULL && opt != np->opt) | 649 | if (opt != NULL && opt != np->opt) |
645 | sock_kfree_s(sk, opt, opt->tot_len); | 650 | sock_kfree_s(sk, opt, opt->tot_len); |
646 | dst_release(dst); | ||
647 | return NULL; | 651 | return NULL; |
648 | } | 652 | } |
649 | 653 | ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb7ad5a21ff3..1b344f30b463 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk) | |||
101 | } | 101 | } |
102 | EXPORT_SYMBOL(inet_put_port); | 102 | EXPORT_SYMBOL(inet_put_port); |
103 | 103 | ||
104 | void __inet_inherit_port(struct sock *sk, struct sock *child) | 104 | int __inet_inherit_port(struct sock *sk, struct sock *child) |
105 | { | 105 | { |
106 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | 106 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; |
107 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, | 107 | unsigned short port = inet_sk(child)->inet_num; |
108 | const int bhash = inet_bhashfn(sock_net(sk), port, | ||
108 | table->bhash_size); | 109 | table->bhash_size); |
109 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | 110 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; |
110 | struct inet_bind_bucket *tb; | 111 | struct inet_bind_bucket *tb; |
111 | 112 | ||
112 | spin_lock(&head->lock); | 113 | spin_lock(&head->lock); |
113 | tb = inet_csk(sk)->icsk_bind_hash; | 114 | tb = inet_csk(sk)->icsk_bind_hash; |
115 | if (tb->port != port) { | ||
116 | /* NOTE: using tproxy and redirecting skbs to a proxy | ||
117 | * on a different listener port breaks the assumption | ||
118 | * that the listener socket's icsk_bind_hash is the same | ||
119 | * as that of the child socket. We have to look up or | ||
120 | * create a new bind bucket for the child here. */ | ||
121 | struct hlist_node *node; | ||
122 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
123 | if (net_eq(ib_net(tb), sock_net(sk)) && | ||
124 | tb->port == port) | ||
125 | break; | ||
126 | } | ||
127 | if (!node) { | ||
128 | tb = inet_bind_bucket_create(table->bind_bucket_cachep, | ||
129 | sock_net(sk), head, port); | ||
130 | if (!tb) { | ||
131 | spin_unlock(&head->lock); | ||
132 | return -ENOMEM; | ||
133 | } | ||
134 | } | ||
135 | } | ||
114 | sk_add_bind_node(child, &tb->owners); | 136 | sk_add_bind_node(child, &tb->owners); |
115 | inet_csk(child)->icsk_bind_hash = tb; | 137 | inet_csk(child)->icsk_bind_hash = tb; |
116 | spin_unlock(&head->lock); | 138 | spin_unlock(&head->lock); |
139 | |||
140 | return 0; | ||
117 | } | 141 | } |
118 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 142 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
119 | 143 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a0232f3a358b..8f8527d41682 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1422 | 1422 | ||
1423 | newsk = tcp_create_openreq_child(sk, req, skb); | 1423 | newsk = tcp_create_openreq_child(sk, req, skb); |
1424 | if (!newsk) | 1424 | if (!newsk) |
1425 | goto exit; | 1425 | goto exit_nonewsk; |
1426 | 1426 | ||
1427 | newsk->sk_gso_type = SKB_GSO_TCPV4; | 1427 | newsk->sk_gso_type = SKB_GSO_TCPV4; |
1428 | sk_setup_caps(newsk, dst); | 1428 | sk_setup_caps(newsk, dst); |
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1469 | } | 1469 | } |
1470 | #endif | 1470 | #endif |
1471 | 1471 | ||
1472 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
1473 | sock_put(newsk); | ||
1474 | goto exit; | ||
1475 | } | ||
1472 | __inet_hash_nolisten(newsk, NULL); | 1476 | __inet_hash_nolisten(newsk, NULL); |
1473 | __inet_inherit_port(sk, newsk); | ||
1474 | 1477 | ||
1475 | return newsk; | 1478 | return newsk; |
1476 | 1479 | ||
1477 | exit_overflow: | 1480 | exit_overflow: |
1478 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 1481 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
1482 | exit_nonewsk: | ||
1483 | dst_release(dst); | ||
1479 | exit: | 1484 | exit: |
1480 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1485 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1481 | dst_release(dst); | ||
1482 | return NULL; | 1486 | return NULL; |
1483 | } | 1487 | } |
1484 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 1488 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fe6d40418c0b..ba5258ef1c57 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1409 | 1409 | ||
1410 | newsk = tcp_create_openreq_child(sk, req, skb); | 1410 | newsk = tcp_create_openreq_child(sk, req, skb); |
1411 | if (newsk == NULL) | 1411 | if (newsk == NULL) |
1412 | goto out; | 1412 | goto out_nonewsk; |
1413 | 1413 | ||
1414 | /* | 1414 | /* |
1415 | * No need to charge this sock to the relevant IPv6 refcnt debug socks | 1415 | * No need to charge this sock to the relevant IPv6 refcnt debug socks |
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1497 | } | 1497 | } |
1498 | #endif | 1498 | #endif |
1499 | 1499 | ||
1500 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
1501 | sock_put(newsk); | ||
1502 | goto out; | ||
1503 | } | ||
1500 | __inet6_hash(newsk, NULL); | 1504 | __inet6_hash(newsk, NULL); |
1501 | __inet_inherit_port(sk, newsk); | ||
1502 | 1505 | ||
1503 | return newsk; | 1506 | return newsk; |
1504 | 1507 | ||
1505 | out_overflow: | 1508 | out_overflow: |
1506 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 1509 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
1507 | out: | 1510 | out_nonewsk: |
1508 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | ||
1509 | if (opt && opt != np->opt) | 1511 | if (opt && opt != np->opt) |
1510 | sock_kfree_s(sk, opt, opt->tot_len); | 1512 | sock_kfree_s(sk, opt, opt->tot_len); |
1511 | dst_release(dst); | 1513 | dst_release(dst); |
1514 | out: | ||
1515 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | ||
1512 | return NULL; | 1516 | return NULL; |
1513 | } | 1517 | } |
1514 | 1518 | ||