aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Copot <alex.mihai.c@gmail.com>2012-04-12 18:21:45 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-14 15:28:55 -0400
commitaacd9289af8b82f5fb01bcdd53d0e3406d1333c7 (patch)
treed918d09ce942b52f707676e0a648e5c6a84de1b4
parentc72e118334a2590f4f07d9e51490b902c33f5280 (diff)
tcp: bind() use stronger condition for bind_conflict
We must try harder to get unique (addr, port) pairs when doing port autoselection for sockets with SO_REUSEADDR option set. We achieve this by adding a relaxation parameter to inet_csk_bind_conflict. When 'relax' parameter is off we return a conflict whenever the current searched pair (addr, port) is not unique. This tries to address the problems reported in patch: 8d238b25b1ec22a73b1c2206f111df2faaff8285 Revert "tcp: bind() fix when many ports are bound" Tests where ran for creating and binding(0) many sockets on 100 IPs. The results are, on average: * 60000 sockets, 600 ports / IP: * 0.210 s, 620 (IP, port) duplicates without patch * 0.219 s, no duplicates with patch * 100000 sockets, 1000 ports / IP: * 0.371 s, 1720 duplicates without patch * 0.373 s, no duplicates with patch * 200000 sockets, 2000 ports / IP: * 0.766 s, 6900 duplicates without patch * 0.768 s, no duplicates with patch * 500000 sockets, 5000 ports / IP: * 2.227 s, 41500 duplicates without patch * 2.284 s, no duplicates with patch Signed-off-by: Alex Copot <alex.mihai.c@gmail.com> Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet_connection_sock.h4
-rw-r--r--net/ipv4/inet_connection_sock.c18
-rw-r--r--net/ipv6/inet6_connection_sock.c2
4 files changed, 18 insertions, 8 deletions
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 3207e58ee019..1866a676c810 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -23,7 +23,7 @@ struct sock;
23struct sockaddr; 23struct sockaddr;
24 24
25extern int inet6_csk_bind_conflict(const struct sock *sk, 25extern int inet6_csk_bind_conflict(const struct sock *sk,
26 const struct inet_bind_bucket *tb); 26 const struct inet_bind_bucket *tb, bool relax);
27 27
28extern struct dst_entry* inet6_csk_route_req(struct sock *sk, 28extern struct dst_entry* inet6_csk_route_req(struct sock *sk,
29 const struct request_sock *req); 29 const struct request_sock *req);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index dbf9aab34c82..46c9e2ccdf02 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -60,7 +60,7 @@ struct inet_connection_sock_af_ops {
60#endif 60#endif
61 void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); 61 void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
62 int (*bind_conflict)(const struct sock *sk, 62 int (*bind_conflict)(const struct sock *sk,
63 const struct inet_bind_bucket *tb); 63 const struct inet_bind_bucket *tb, bool relax);
64}; 64};
65 65
66/** inet_connection_sock - INET connection oriented sock 66/** inet_connection_sock - INET connection oriented sock
@@ -245,7 +245,7 @@ extern struct request_sock *inet_csk_search_req(const struct sock *sk,
245 const __be32 raddr, 245 const __be32 raddr,
246 const __be32 laddr); 246 const __be32 laddr);
247extern int inet_csk_bind_conflict(const struct sock *sk, 247extern int inet_csk_bind_conflict(const struct sock *sk,
248 const struct inet_bind_bucket *tb); 248 const struct inet_bind_bucket *tb, bool relax);
249extern int inet_csk_get_port(struct sock *sk, unsigned short snum); 249extern int inet_csk_get_port(struct sock *sk, unsigned short snum);
250 250
251extern struct dst_entry* inet_csk_route_req(struct sock *sk, 251extern struct dst_entry* inet_csk_route_req(struct sock *sk,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d19f32aca6ca..14409f111bc2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -53,7 +53,7 @@ void inet_get_local_port_range(int *low, int *high)
53EXPORT_SYMBOL(inet_get_local_port_range); 53EXPORT_SYMBOL(inet_get_local_port_range);
54 54
55int inet_csk_bind_conflict(const struct sock *sk, 55int inet_csk_bind_conflict(const struct sock *sk,
56 const struct inet_bind_bucket *tb) 56 const struct inet_bind_bucket *tb, bool relax)
57{ 57{
58 struct sock *sk2; 58 struct sock *sk2;
59 struct hlist_node *node; 59 struct hlist_node *node;
@@ -79,6 +79,14 @@ int inet_csk_bind_conflict(const struct sock *sk,
79 sk2_rcv_saddr == sk_rcv_saddr(sk)) 79 sk2_rcv_saddr == sk_rcv_saddr(sk))
80 break; 80 break;
81 } 81 }
82 if (!relax && reuse && sk2->sk_reuse &&
83 sk2->sk_state != TCP_LISTEN) {
84 const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
85
86 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
87 sk2_rcv_saddr == sk_rcv_saddr(sk))
88 break;
89 }
82 } 90 }
83 } 91 }
84 return node != NULL; 92 return node != NULL;
@@ -122,12 +130,13 @@ again:
122 (tb->num_owners < smallest_size || smallest_size == -1)) { 130 (tb->num_owners < smallest_size || smallest_size == -1)) {
123 smallest_size = tb->num_owners; 131 smallest_size = tb->num_owners;
124 smallest_rover = rover; 132 smallest_rover = rover;
125 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { 133 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
134 !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
126 snum = smallest_rover; 135 snum = smallest_rover;
127 goto tb_found; 136 goto tb_found;
128 } 137 }
129 } 138 }
130 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { 139 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
131 snum = rover; 140 snum = rover;
132 goto tb_found; 141 goto tb_found;
133 } 142 }
@@ -178,12 +187,13 @@ tb_found:
178 goto success; 187 goto success;
179 } else { 188 } else {
180 ret = 1; 189 ret = 1;
181 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { 190 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
182 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && 191 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
183 smallest_size != -1 && --attempts >= 0) { 192 smallest_size != -1 && --attempts >= 0) {
184 spin_unlock(&head->lock); 193 spin_unlock(&head->lock);
185 goto again; 194 goto again;
186 } 195 }
196
187 goto fail_unlock; 197 goto fail_unlock;
188 } 198 }
189 } 199 }
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 02dd203d9eac..e6cee5292a0b 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -28,7 +28,7 @@
28#include <net/inet6_connection_sock.h> 28#include <net/inet6_connection_sock.h>
29 29
30int inet6_csk_bind_conflict(const struct sock *sk, 30int inet6_csk_bind_conflict(const struct sock *sk,
31 const struct inet_bind_bucket *tb) 31 const struct inet_bind_bucket *tb, bool relax)
32{ 32{
33 const struct sock *sk2; 33 const struct sock *sk2;
34 const struct hlist_node *node; 34 const struct hlist_node *node;