aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-06-05 18:18:43 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-06-07 16:40:50 -0400
commitdf2bc459a3ad71f8b44c358bf7169acf9caf4acd (patch)
tree143206b16d59c723be3c2cf2375a33b2a9306280 /net/ipv4/udp.c
parent3c0d2f3780fc94746c4842e965bd2570e2119bb6 (diff)
[UDP]: Revert 2-pass hashing changes.
This reverts changesets: 6aaf47fa48d3c44280810b1b470261d340e4ed87 b7b5f487ab39bc10ed0694af35651a03d9cb97ff de34ed91c4ffa4727964a832c46e624dd1495cf5 fc038410b4b1643766f8033f4940bcdb1dace633 There are still some correctness issues recently discovered which do not have a known fix that doesn't involve doing a full hash table scan on port bind. So revert for now. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c241
1 files changed, 68 insertions, 173 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5da703e699da..facb7e29304e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,36 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock);
114 114
115static int udp_port_rover; 115static int udp_port_rover;
116 116
117/* 117static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
118 * Note about this hash function :
119 * Typical use is probably daddr = 0, only dport is going to vary hash
120 */
121static inline unsigned int udp_hash_port(__u16 port)
122{
123 return port;
124}
125
126static inline int __udp_lib_port_inuse(unsigned int hash, int port,
127 const struct sock *this_sk,
128 struct hlist_head udptable[],
129 const struct udp_get_port_ops *ops)
130{ 118{
131 struct sock *sk; 119 struct sock *sk;
132 struct hlist_node *node; 120 struct hlist_node *node;
133 struct inet_sock *inet;
134 121
135 sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { 122 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
136 if (sk->sk_hash != hash) 123 if (sk->sk_hash == num)
137 continue;
138 inet = inet_sk(sk);
139 if (inet->num != port)
140 continue;
141 if (this_sk) {
142 if (ops->saddr_cmp(sk, this_sk))
143 return 1;
144 } else if (ops->saddr_any(sk))
145 return 1; 124 return 1;
146 }
147 return 0; 125 return 0;
148} 126}
149 127
@@ -154,16 +132,16 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port,
154 * @snum: port number to look up 132 * @snum: port number to look up
155 * @udptable: hash list table, must be of UDP_HTABLE_SIZE 133 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
156 * @port_rover: pointer to record of last unallocated port 134 * @port_rover: pointer to record of last unallocated port
157 * @ops: AF-dependent address operations 135 * @saddr_comp: AF-dependent comparison of bound local IP addresses
158 */ 136 */
159int __udp_lib_get_port(struct sock *sk, unsigned short snum, 137int __udp_lib_get_port(struct sock *sk, unsigned short snum,
160 struct hlist_head udptable[], int *port_rover, 138 struct hlist_head udptable[], int *port_rover,
161 const struct udp_get_port_ops *ops) 139 int (*saddr_comp)(const struct sock *sk1,
140 const struct sock *sk2 ) )
162{ 141{
163 struct hlist_node *node; 142 struct hlist_node *node;
164 struct hlist_head *head; 143 struct hlist_head *head;
165 struct sock *sk2; 144 struct sock *sk2;
166 unsigned int hash;
167 int error = 1; 145 int error = 1;
168 146
169 write_lock_bh(&udp_hash_lock); 147 write_lock_bh(&udp_hash_lock);
@@ -178,8 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
178 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 156 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
179 int size; 157 int size;
180 158
181 hash = ops->hash_port_and_rcv_saddr(result, sk); 159 head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
182 head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
183 if (hlist_empty(head)) { 160 if (hlist_empty(head)) {
184 if (result > sysctl_local_port_range[1]) 161 if (result > sysctl_local_port_range[1])
185 result = sysctl_local_port_range[0] + 162 result = sysctl_local_port_range[0] +
@@ -204,16 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
204 result = sysctl_local_port_range[0] 181 result = sysctl_local_port_range[0]
205 + ((result - sysctl_local_port_range[0]) & 182 + ((result - sysctl_local_port_range[0]) &
206 (UDP_HTABLE_SIZE - 1)); 183 (UDP_HTABLE_SIZE - 1));
207 hash = udp_hash_port(result); 184 if (! __udp_lib_lport_inuse(result, udptable))
208 if (__udp_lib_port_inuse(hash, result,
209 NULL, udptable, ops))
210 continue;
211 if (ops->saddr_any(sk))
212 break;
213
214 hash = ops->hash_port_and_rcv_saddr(result, sk);
215 if (! __udp_lib_port_inuse(hash, result,
216 sk, udptable, ops))
217 break; 185 break;
218 } 186 }
219 if (i >= (1 << 16) / UDP_HTABLE_SIZE) 187 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -221,40 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
221gotit: 189gotit:
222 *port_rover = snum = result; 190 *port_rover = snum = result;
223 } else { 191 } else {
224 hash = udp_hash_port(snum); 192 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
225 head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
226 193
227 sk_for_each(sk2, node, head) 194 sk_for_each(sk2, node, head)
228 if (sk2->sk_hash == hash && 195 if (sk2->sk_hash == snum &&
229 sk2 != sk && 196 sk2 != sk &&
230 inet_sk(sk2)->num == snum && 197 (!sk2->sk_reuse || !sk->sk_reuse) &&
231 (!sk2->sk_reuse || !sk->sk_reuse) && 198 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
232 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 199 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
233 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 200 (*saddr_comp)(sk, sk2) )
234 ops->saddr_cmp(sk, sk2))
235 goto fail; 201 goto fail;
236
237 if (!ops->saddr_any(sk)) {
238 hash = ops->hash_port_and_rcv_saddr(snum, sk);
239 head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
240
241 sk_for_each(sk2, node, head)
242 if (sk2->sk_hash == hash &&
243 sk2 != sk &&
244 inet_sk(sk2)->num == snum &&
245 (!sk2->sk_reuse || !sk->sk_reuse) &&
246 (!sk2->sk_bound_dev_if ||
247 !sk->sk_bound_dev_if ||
248 sk2->sk_bound_dev_if ==
249 sk->sk_bound_dev_if) &&
250 ops->saddr_cmp(sk, sk2))
251 goto fail;
252 }
253 } 202 }
254 inet_sk(sk)->num = snum; 203 inet_sk(sk)->num = snum;
255 sk->sk_hash = hash; 204 sk->sk_hash = snum;
256 if (sk_unhashed(sk)) { 205 if (sk_unhashed(sk)) {
257 head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; 206 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
258 sk_add_node(sk, head); 207 sk_add_node(sk, head);
259 sock_prot_inc_use(sk->sk_prot); 208 sock_prot_inc_use(sk->sk_prot);
260 } 209 }
@@ -265,12 +214,12 @@ fail:
265} 214}
266 215
267int udp_get_port(struct sock *sk, unsigned short snum, 216int udp_get_port(struct sock *sk, unsigned short snum,
268 const struct udp_get_port_ops *ops) 217 int (*scmp)(const struct sock *, const struct sock *))
269{ 218{
270 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops); 219 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
271} 220}
272 221
273static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 222int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
274{ 223{
275 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 224 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
276 225
@@ -279,33 +228,9 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
279 inet1->rcv_saddr == inet2->rcv_saddr )); 228 inet1->rcv_saddr == inet2->rcv_saddr ));
280} 229}
281 230
282static int ipv4_rcv_saddr_any(const struct sock *sk)
283{
284 return !inet_sk(sk)->rcv_saddr;
285}
286
287static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr)
288{
289 addr ^= addr >> 16;
290 addr ^= addr >> 8;
291 return port ^ addr;
292}
293
294static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port,
295 const struct sock *sk)
296{
297 return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr);
298}
299
300const struct udp_get_port_ops udp_ipv4_ops = {
301 .saddr_cmp = ipv4_rcv_saddr_equal,
302 .saddr_any = ipv4_rcv_saddr_any,
303 .hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr,
304};
305
306static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) 231static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
307{ 232{
308 return udp_get_port(sk, snum, &udp_ipv4_ops); 233 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
309} 234}
310 235
311/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 236/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -317,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
317{ 242{
318 struct sock *sk, *result = NULL; 243 struct sock *sk, *result = NULL;
319 struct hlist_node *node; 244 struct hlist_node *node;
320 unsigned int hash, hashwild; 245 unsigned short hnum = ntohs(dport);
321 int score, best = -1, hport = ntohs(dport); 246 int badness = -1;
322
323 hash = ipv4_hash_port_and_addr(hport, daddr);
324 hashwild = udp_hash_port(hport);
325 247
326 read_lock(&udp_hash_lock); 248 read_lock(&udp_hash_lock);
327 249 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
328lookup:
329
330 sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
331 struct inet_sock *inet = inet_sk(sk); 250 struct inet_sock *inet = inet_sk(sk);
332 251
333 if (sk->sk_hash != hash || ipv6_only_sock(sk) || 252 if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
334 inet->num != hport) 253 int score = (sk->sk_family == PF_INET ? 1 : 0);
335 continue; 254 if (inet->rcv_saddr) {
336 255 if (inet->rcv_saddr != daddr)
337 score = (sk->sk_family == PF_INET ? 1 : 0); 256 continue;
338 if (inet->rcv_saddr) { 257 score+=2;
339 if (inet->rcv_saddr != daddr) 258 }
340 continue; 259 if (inet->daddr) {
341 score+=2; 260 if (inet->daddr != saddr)
342 } 261 continue;
343 if (inet->daddr) { 262 score+=2;
344 if (inet->daddr != saddr) 263 }
345 continue; 264 if (inet->dport) {
346 score+=2; 265 if (inet->dport != sport)
347 } 266 continue;
348 if (inet->dport) { 267 score+=2;
349 if (inet->dport != sport) 268 }
350 continue; 269 if (sk->sk_bound_dev_if) {
351 score+=2; 270 if (sk->sk_bound_dev_if != dif)
352 } 271 continue;
353 if (sk->sk_bound_dev_if) { 272 score+=2;
354 if (sk->sk_bound_dev_if != dif) 273 }
355 continue; 274 if (score == 9) {
356 score+=2; 275 result = sk;
357 } 276 break;
358 if (score == 9) { 277 } else if (score > badness) {
359 result = sk; 278 result = sk;
360 goto found; 279 badness = score;
361 } else if (score > best) { 280 }
362 result = sk;
363 best = score;
364 } 281 }
365 } 282 }
366
367 if (hash != hashwild) {
368 hash = hashwild;
369 goto lookup;
370 }
371found:
372 if (result) 283 if (result)
373 sock_hold(result); 284 sock_hold(result);
374 read_unlock(&udp_hash_lock); 285 read_unlock(&udp_hash_lock);
375 return result; 286 return result;
376} 287}
377 288
378static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, 289static inline struct sock *udp_v4_mcast_next(struct sock *sk,
379 int hport, __be32 loc_addr, 290 __be16 loc_port, __be32 loc_addr,
380 __be16 rmt_port, __be32 rmt_addr, 291 __be16 rmt_port, __be32 rmt_addr,
381 int dif) 292 int dif)
382{ 293{
383 struct hlist_node *node; 294 struct hlist_node *node;
384 struct sock *s = sk; 295 struct sock *s = sk;
296 unsigned short hnum = ntohs(loc_port);
385 297
386 sk_for_each_from(s, node) { 298 sk_for_each_from(s, node) {
387 struct inet_sock *inet = inet_sk(s); 299 struct inet_sock *inet = inet_sk(s);
388 300
389 if (s->sk_hash != hnum || 301 if (s->sk_hash != hnum ||
390 inet->num != hport ||
391 (inet->daddr && inet->daddr != rmt_addr) || 302 (inet->daddr && inet->daddr != rmt_addr) ||
392 (inet->dport != rmt_port && inet->dport) || 303 (inet->dport != rmt_port && inet->dport) ||
393 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || 304 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -1221,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1221 __be32 saddr, __be32 daddr, 1132 __be32 saddr, __be32 daddr,
1222 struct hlist_head udptable[]) 1133 struct hlist_head udptable[])
1223{ 1134{
1224 struct sock *sk, *skw, *sknext; 1135 struct sock *sk;
1225 int dif; 1136 int dif;
1226 int hport = ntohs(uh->dest);
1227 unsigned int hash = ipv4_hash_port_and_addr(hport, daddr);
1228 unsigned int hashwild = udp_hash_port(hport);
1229
1230 dif = skb->dev->ifindex;
1231 1137
1232 read_lock(&udp_hash_lock); 1138 read_lock(&udp_hash_lock);
1233 1139 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1234 sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); 1140 dif = skb->dev->ifindex;
1235 skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); 1141 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1236
1237 sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
1238 if (!sk) {
1239 hash = hashwild;
1240 sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
1241 saddr, dif);
1242 }
1243 if (sk) { 1142 if (sk) {
1143 struct sock *sknext = NULL;
1144
1244 do { 1145 do {
1245 struct sk_buff *skb1 = skb; 1146 struct sk_buff *skb1 = skb;
1246 sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, 1147
1247 daddr, uh->source, saddr, dif); 1148 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
1248 if (!sknext && hash != hashwild) { 1149 uh->source, saddr, dif);
1249 hash = hashwild;
1250 sknext = udp_v4_mcast_next(skw, hash, hport,
1251 daddr, uh->source, saddr, dif);
1252 }
1253 if (sknext) 1150 if (sknext)
1254 skb1 = skb_clone(skb, GFP_ATOMIC); 1151 skb1 = skb_clone(skb, GFP_ATOMIC);
1255 1152
1256 if (skb1) { 1153 if (skb1) {
1257 int ret = udp_queue_rcv_skb(sk, skb1); 1154 int ret = udp_queue_rcv_skb(sk, skb1);
1258 if (ret > 0) 1155 if (ret > 0)
1259 /* 1156 /* we should probably re-process instead
1260 * we should probably re-process 1157 * of dropping packets here. */
1261 * instead of dropping packets here.
1262 */
1263 kfree_skb(skb1); 1158 kfree_skb(skb1);
1264 } 1159 }
1265 sk = sknext; 1160 sk = sknext;
@@ -1346,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1346 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1241 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1347 1242
1348 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, 1243 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
1349 skb->dev->ifindex, udptable); 1244 skb->dev->ifindex, udptable );
1350 1245
1351 if (sk != NULL) { 1246 if (sk != NULL) {
1352 int ret = udp_queue_rcv_skb(sk, skb); 1247 int ret = udp_queue_rcv_skb(sk, skb);