diff options
author | Balazs Scheidler <bazsi@balabit.hu> | 2010-10-21 07:06:43 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-10-21 07:06:43 -0400 |
commit | 093d282321daeb19c107e5f1f16d7f68484f3ade (patch) | |
tree | 36e9eed23573068819bf67a91caac6ebf60d0d7c /net/ipv4 | |
parent | 6006db84a91838813cdad8a6622a4e39efe9ea47 (diff) |
tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()
When __inet_inherit_port() is called on a tproxy connection the wrong locks are
held for the inet_bind_bucket it is added to. __inet_inherit_port() made an
implicit assumption that the listener's port number (and thus its bind bucket).
Unfortunately, if you're using the TPROXY target to redirect skbs to a
transparent proxy that assumption is not true anymore and things break.
This patch adds code to __inet_inherit_port() so that it can handle this case
by looking up or creating a new bind bucket for the child socket and updates
callers of __inet_inherit_port() to gracefully handle __inet_inherit_port()
failing.
Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>.
See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion.
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 10 |
2 files changed, 33 insertions, 5 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb7ad5a21ff3..1b344f30b463 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk) | |||
101 | } | 101 | } |
102 | EXPORT_SYMBOL(inet_put_port); | 102 | EXPORT_SYMBOL(inet_put_port); |
103 | 103 | ||
104 | void __inet_inherit_port(struct sock *sk, struct sock *child) | 104 | int __inet_inherit_port(struct sock *sk, struct sock *child) |
105 | { | 105 | { |
106 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | 106 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; |
107 | const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, | 107 | unsigned short port = inet_sk(child)->inet_num; |
108 | const int bhash = inet_bhashfn(sock_net(sk), port, | ||
108 | table->bhash_size); | 109 | table->bhash_size); |
109 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | 110 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; |
110 | struct inet_bind_bucket *tb; | 111 | struct inet_bind_bucket *tb; |
111 | 112 | ||
112 | spin_lock(&head->lock); | 113 | spin_lock(&head->lock); |
113 | tb = inet_csk(sk)->icsk_bind_hash; | 114 | tb = inet_csk(sk)->icsk_bind_hash; |
115 | if (tb->port != port) { | ||
116 | /* NOTE: using tproxy and redirecting skbs to a proxy | ||
117 | * on a different listener port breaks the assumption | ||
118 | * that the listener socket's icsk_bind_hash is the same | ||
119 | * as that of the child socket. We have to look up or | ||
120 | * create a new bind bucket for the child here. */ | ||
121 | struct hlist_node *node; | ||
122 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
123 | if (net_eq(ib_net(tb), sock_net(sk)) && | ||
124 | tb->port == port) | ||
125 | break; | ||
126 | } | ||
127 | if (!node) { | ||
128 | tb = inet_bind_bucket_create(table->bind_bucket_cachep, | ||
129 | sock_net(sk), head, port); | ||
130 | if (!tb) { | ||
131 | spin_unlock(&head->lock); | ||
132 | return -ENOMEM; | ||
133 | } | ||
134 | } | ||
135 | } | ||
114 | sk_add_bind_node(child, &tb->owners); | 136 | sk_add_bind_node(child, &tb->owners); |
115 | inet_csk(child)->icsk_bind_hash = tb; | 137 | inet_csk(child)->icsk_bind_hash = tb; |
116 | spin_unlock(&head->lock); | 138 | spin_unlock(&head->lock); |
139 | |||
140 | return 0; | ||
117 | } | 141 | } |
118 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 142 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
119 | 143 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a0232f3a358b..8f8527d41682 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1422 | 1422 | ||
1423 | newsk = tcp_create_openreq_child(sk, req, skb); | 1423 | newsk = tcp_create_openreq_child(sk, req, skb); |
1424 | if (!newsk) | 1424 | if (!newsk) |
1425 | goto exit; | 1425 | goto exit_nonewsk; |
1426 | 1426 | ||
1427 | newsk->sk_gso_type = SKB_GSO_TCPV4; | 1427 | newsk->sk_gso_type = SKB_GSO_TCPV4; |
1428 | sk_setup_caps(newsk, dst); | 1428 | sk_setup_caps(newsk, dst); |
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1469 | } | 1469 | } |
1470 | #endif | 1470 | #endif |
1471 | 1471 | ||
1472 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
1473 | sock_put(newsk); | ||
1474 | goto exit; | ||
1475 | } | ||
1472 | __inet_hash_nolisten(newsk, NULL); | 1476 | __inet_hash_nolisten(newsk, NULL); |
1473 | __inet_inherit_port(sk, newsk); | ||
1474 | 1477 | ||
1475 | return newsk; | 1478 | return newsk; |
1476 | 1479 | ||
1477 | exit_overflow: | 1480 | exit_overflow: |
1478 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 1481 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
1482 | exit_nonewsk: | ||
1483 | dst_release(dst); | ||
1479 | exit: | 1484 | exit: |
1480 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1485 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1481 | dst_release(dst); | ||
1482 | return NULL; | 1486 | return NULL; |
1483 | } | 1487 | } |
1484 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 1488 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |