diff options
author | Alex Copot <alex.mihai.c@gmail.com> | 2012-04-12 18:21:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-04-14 15:28:55 -0400 |
commit | aacd9289af8b82f5fb01bcdd53d0e3406d1333c7 (patch) | |
tree | d918d09ce942b52f707676e0a648e5c6a84de1b4 | |
parent | c72e118334a2590f4f07d9e51490b902c33f5280 (diff) |
tcp: bind() use stronger condition for bind_conflict
We must try harder to get unique (addr, port) pairs when
doing port autoselection for sockets with SO_REUSEADDR
option set.
We achieve this by adding a relaxation parameter to
inet_csk_bind_conflict. When 'relax' parameter is off
we return a conflict whenever the current searched
pair (addr, port) is not unique.
This tries to address the problems reported in patch:
8d238b25b1ec22a73b1c2206f111df2faaff8285
Revert "tcp: bind() fix when many ports are bound"
Tests where ran for creating and binding(0) many sockets
on 100 IPs. The results are, on average:
* 60000 sockets, 600 ports / IP:
* 0.210 s, 620 (IP, port) duplicates without patch
* 0.219 s, no duplicates with patch
* 100000 sockets, 1000 ports / IP:
* 0.371 s, 1720 duplicates without patch
* 0.373 s, no duplicates with patch
* 200000 sockets, 2000 ports / IP:
* 0.766 s, 6900 duplicates without patch
* 0.768 s, no duplicates with patch
* 500000 sockets, 5000 ports / IP:
* 2.227 s, 41500 duplicates without patch
* 2.284 s, no duplicates with patch
Signed-off-by: Alex Copot <alex.mihai.c@gmail.com>
Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet6_connection_sock.h | 2 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 4 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 18 | ||||
-rw-r--r-- | net/ipv6/inet6_connection_sock.c | 2 |
4 files changed, 18 insertions, 8 deletions
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 3207e58ee019..1866a676c810 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h | |||
@@ -23,7 +23,7 @@ struct sock; | |||
23 | struct sockaddr; | 23 | struct sockaddr; |
24 | 24 | ||
25 | extern int inet6_csk_bind_conflict(const struct sock *sk, | 25 | extern int inet6_csk_bind_conflict(const struct sock *sk, |
26 | const struct inet_bind_bucket *tb); | 26 | const struct inet_bind_bucket *tb, bool relax); |
27 | 27 | ||
28 | extern struct dst_entry* inet6_csk_route_req(struct sock *sk, | 28 | extern struct dst_entry* inet6_csk_route_req(struct sock *sk, |
29 | const struct request_sock *req); | 29 | const struct request_sock *req); |
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index dbf9aab34c82..46c9e2ccdf02 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -60,7 +60,7 @@ struct inet_connection_sock_af_ops { | |||
60 | #endif | 60 | #endif |
61 | void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); | 61 | void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); |
62 | int (*bind_conflict)(const struct sock *sk, | 62 | int (*bind_conflict)(const struct sock *sk, |
63 | const struct inet_bind_bucket *tb); | 63 | const struct inet_bind_bucket *tb, bool relax); |
64 | }; | 64 | }; |
65 | 65 | ||
66 | /** inet_connection_sock - INET connection oriented sock | 66 | /** inet_connection_sock - INET connection oriented sock |
@@ -245,7 +245,7 @@ extern struct request_sock *inet_csk_search_req(const struct sock *sk, | |||
245 | const __be32 raddr, | 245 | const __be32 raddr, |
246 | const __be32 laddr); | 246 | const __be32 laddr); |
247 | extern int inet_csk_bind_conflict(const struct sock *sk, | 247 | extern int inet_csk_bind_conflict(const struct sock *sk, |
248 | const struct inet_bind_bucket *tb); | 248 | const struct inet_bind_bucket *tb, bool relax); |
249 | extern int inet_csk_get_port(struct sock *sk, unsigned short snum); | 249 | extern int inet_csk_get_port(struct sock *sk, unsigned short snum); |
250 | 250 | ||
251 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, | 251 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d19f32aca6ca..14409f111bc2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -53,7 +53,7 @@ void inet_get_local_port_range(int *low, int *high) | |||
53 | EXPORT_SYMBOL(inet_get_local_port_range); | 53 | EXPORT_SYMBOL(inet_get_local_port_range); |
54 | 54 | ||
55 | int inet_csk_bind_conflict(const struct sock *sk, | 55 | int inet_csk_bind_conflict(const struct sock *sk, |
56 | const struct inet_bind_bucket *tb) | 56 | const struct inet_bind_bucket *tb, bool relax) |
57 | { | 57 | { |
58 | struct sock *sk2; | 58 | struct sock *sk2; |
59 | struct hlist_node *node; | 59 | struct hlist_node *node; |
@@ -79,6 +79,14 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | 79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) |
80 | break; | 80 | break; |
81 | } | 81 | } |
82 | if (!relax && reuse && sk2->sk_reuse && | ||
83 | sk2->sk_state != TCP_LISTEN) { | ||
84 | const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); | ||
85 | |||
86 | if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || | ||
87 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | ||
88 | break; | ||
89 | } | ||
82 | } | 90 | } |
83 | } | 91 | } |
84 | return node != NULL; | 92 | return node != NULL; |
@@ -122,12 +130,13 @@ again: | |||
122 | (tb->num_owners < smallest_size || smallest_size == -1)) { | 130 | (tb->num_owners < smallest_size || smallest_size == -1)) { |
123 | smallest_size = tb->num_owners; | 131 | smallest_size = tb->num_owners; |
124 | smallest_rover = rover; | 132 | smallest_rover = rover; |
125 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { | 133 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && |
134 | !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { | ||
126 | snum = smallest_rover; | 135 | snum = smallest_rover; |
127 | goto tb_found; | 136 | goto tb_found; |
128 | } | 137 | } |
129 | } | 138 | } |
130 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 139 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { |
131 | snum = rover; | 140 | snum = rover; |
132 | goto tb_found; | 141 | goto tb_found; |
133 | } | 142 | } |
@@ -178,12 +187,13 @@ tb_found: | |||
178 | goto success; | 187 | goto success; |
179 | } else { | 188 | } else { |
180 | ret = 1; | 189 | ret = 1; |
181 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 190 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { |
182 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && | 191 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
183 | smallest_size != -1 && --attempts >= 0) { | 192 | smallest_size != -1 && --attempts >= 0) { |
184 | spin_unlock(&head->lock); | 193 | spin_unlock(&head->lock); |
185 | goto again; | 194 | goto again; |
186 | } | 195 | } |
196 | |||
187 | goto fail_unlock; | 197 | goto fail_unlock; |
188 | } | 198 | } |
189 | } | 199 | } |
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 02dd203d9eac..e6cee5292a0b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <net/inet6_connection_sock.h> | 28 | #include <net/inet6_connection_sock.h> |
29 | 29 | ||
30 | int inet6_csk_bind_conflict(const struct sock *sk, | 30 | int inet6_csk_bind_conflict(const struct sock *sk, |
31 | const struct inet_bind_bucket *tb) | 31 | const struct inet_bind_bucket *tb, bool relax) |
32 | { | 32 | { |
33 | const struct sock *sk2; | 33 | const struct sock *sk2; |
34 | const struct hlist_node *node; | 34 | const struct hlist_node *node; |