diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2009-01-27 00:35:35 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-01-27 00:35:35 -0500 |
commit | 98322f22eca889478045cf896b572250d03dc45f (patch) | |
tree | 22a06e97ece02db900f7d4f496639582b828a4ee /net/ipv4 | |
parent | 8527bec548e01a29c6d1928d20d6d3be71861482 (diff) |
udp: optimize bind(0) if many ports are in use
commit 9088c5609584684149f3fb5b065aa7f18dcb03ff
(udp: Improve port randomization) introduced a regression for UDP bind() syscall
to null port (getting a random port) in case lot of ports are already in use.
This is because we do about 28000 scans of very long chains (220 sockets per chain),
with many spin_lock_bh()/spin_unlock_bh() calls.
Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE)
so that we scan chains at most once.
Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms
Based on a report from Vitaly Mayatskikh
Reported-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/udp.c | 55 |
1 files changed, 39 insertions, 16 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab0581eba..b7faffe5c029 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); | |||
120 | atomic_t udp_memory_allocated; | 120 | atomic_t udp_memory_allocated; |
121 | EXPORT_SYMBOL(udp_memory_allocated); | 121 | EXPORT_SYMBOL(udp_memory_allocated); |
122 | 122 | ||
123 | #define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) | ||
124 | |||
123 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 125 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
124 | const struct udp_hslot *hslot, | 126 | const struct udp_hslot *hslot, |
127 | unsigned long *bitmap, | ||
125 | struct sock *sk, | 128 | struct sock *sk, |
126 | int (*saddr_comp)(const struct sock *sk1, | 129 | int (*saddr_comp)(const struct sock *sk1, |
127 | const struct sock *sk2)) | 130 | const struct sock *sk2)) |
@@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
132 | sk_nulls_for_each(sk2, node, &hslot->head) | 135 | sk_nulls_for_each(sk2, node, &hslot->head) |
133 | if (net_eq(sock_net(sk2), net) && | 136 | if (net_eq(sock_net(sk2), net) && |
134 | sk2 != sk && | 137 | sk2 != sk && |
135 | sk2->sk_hash == num && | 138 | (bitmap || sk2->sk_hash == num) && |
136 | (!sk2->sk_reuse || !sk->sk_reuse) && | 139 | (!sk2->sk_reuse || !sk->sk_reuse) && |
137 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 140 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
138 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 141 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
139 | (*saddr_comp)(sk, sk2)) | 142 | (*saddr_comp)(sk, sk2)) { |
140 | return 1; | 143 | if (bitmap) |
144 | __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, | ||
145 | bitmap); | ||
146 | else | ||
147 | return 1; | ||
148 | } | ||
141 | return 0; | 149 | return 0; |
142 | } | 150 | } |
143 | 151 | ||
@@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
160 | if (!snum) { | 168 | if (!snum) { |
161 | int low, high, remaining; | 169 | int low, high, remaining; |
162 | unsigned rand; | 170 | unsigned rand; |
163 | unsigned short first; | 171 | unsigned short first, last; |
172 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); | ||
164 | 173 | ||
165 | inet_get_local_port_range(&low, &high); | 174 | inet_get_local_port_range(&low, &high); |
166 | remaining = (high - low) + 1; | 175 | remaining = (high - low) + 1; |
167 | 176 | ||
168 | rand = net_random(); | 177 | rand = net_random(); |
169 | snum = first = rand % remaining + low; | 178 | first = (((u64)rand * remaining) >> 32) + low; |
170 | rand |= 1; | 179 | /* |
171 | for (;;) { | 180 | * force rand to be an odd multiple of UDP_HTABLE_SIZE |
172 | hslot = &udptable->hash[udp_hashfn(net, snum)]; | 181 | */ |
182 | rand = (rand | 1) * UDP_HTABLE_SIZE; | ||
183 | for (last = first + UDP_HTABLE_SIZE; first != last; first++) { | ||
184 | hslot = &udptable->hash[udp_hashfn(net, first)]; | ||
185 | bitmap_zero(bitmap, PORTS_PER_CHAIN); | ||
173 | spin_lock_bh(&hslot->lock); | 186 | spin_lock_bh(&hslot->lock); |
174 | if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | 187 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, |
175 | break; | 188 | saddr_comp); |
176 | spin_unlock_bh(&hslot->lock); | 189 | |
190 | snum = first; | ||
191 | /* | ||
192 | * Iterate on all possible values of snum for this hash. | ||
193 | * Using steps of an odd multiple of UDP_HTABLE_SIZE | ||
194 | * give us randomization and full range coverage. | ||
195 | */ | ||
177 | do { | 196 | do { |
178 | snum = snum + rand; | 197 | if (low <= snum && snum <= high && |
179 | } while (snum < low || snum > high); | 198 | !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) |
180 | if (snum == first) | 199 | goto found; |
181 | goto fail; | 200 | snum += rand; |
201 | } while (snum != first); | ||
202 | spin_unlock_bh(&hslot->lock); | ||
182 | } | 203 | } |
204 | goto fail; | ||
183 | } else { | 205 | } else { |
184 | hslot = &udptable->hash[udp_hashfn(net, snum)]; | 206 | hslot = &udptable->hash[udp_hashfn(net, snum)]; |
185 | spin_lock_bh(&hslot->lock); | 207 | spin_lock_bh(&hslot->lock); |
186 | if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | 208 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) |
187 | goto fail_unlock; | 209 | goto fail_unlock; |
188 | } | 210 | } |
211 | found: | ||
189 | inet_sk(sk)->num = snum; | 212 | inet_sk(sk)->num = snum; |
190 | sk->sk_hash = snum; | 213 | sk->sk_hash = snum; |
191 | if (sk_unhashed(sk)) { | 214 | if (sk_unhashed(sk)) { |