diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/ip_input.c | 14 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 30 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 8 | ||||
-rw-r--r-- | net/ipv4/udp.c | 171 |
6 files changed, 162 insertions, 81 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 324e7e0fdb2a..97069399d864 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -329,6 +329,7 @@ drop: | |||
329 | static inline int ip_rcv_finish(struct sk_buff *skb) | 329 | static inline int ip_rcv_finish(struct sk_buff *skb) |
330 | { | 330 | { |
331 | const struct iphdr *iph = ip_hdr(skb); | 331 | const struct iphdr *iph = ip_hdr(skb); |
332 | struct rtable *rt; | ||
332 | 333 | ||
333 | /* | 334 | /* |
334 | * Initialise the virtual path cache for the packet. It describes | 335 | * Initialise the virtual path cache for the packet. It describes |
@@ -340,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb) | |||
340 | if (unlikely(err)) { | 341 | if (unlikely(err)) { |
341 | if (err == -EHOSTUNREACH) | 342 | if (err == -EHOSTUNREACH) |
342 | IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | 343 | IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); |
344 | else if (err == -ENETUNREACH) | ||
345 | IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); | ||
343 | goto drop; | 346 | goto drop; |
344 | } | 347 | } |
345 | } | 348 | } |
@@ -358,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb) | |||
358 | if (iph->ihl > 5 && ip_rcv_options(skb)) | 361 | if (iph->ihl > 5 && ip_rcv_options(skb)) |
359 | goto drop; | 362 | goto drop; |
360 | 363 | ||
364 | rt = (struct rtable*)skb->dst; | ||
365 | if (rt->rt_type == RTN_MULTICAST) | ||
366 | IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); | ||
367 | else if (rt->rt_type == RTN_BROADCAST) | ||
368 | IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); | ||
369 | |||
361 | return dst_input(skb); | 370 | return dst_input(skb); |
362 | 371 | ||
363 | drop: | 372 | drop: |
@@ -414,7 +423,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, | |||
414 | goto inhdr_error; | 423 | goto inhdr_error; |
415 | 424 | ||
416 | len = ntohs(iph->tot_len); | 425 | len = ntohs(iph->tot_len); |
417 | if (skb->len < len || len < (iph->ihl*4)) | 426 | if (skb->len < len) { |
427 | IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); | ||
428 | goto drop; | ||
429 | } else if (len < (iph->ihl*4)) | ||
418 | goto inhdr_error; | 430 | goto inhdr_error; |
419 | 431 | ||
420 | /* Our transport medium may have padded the buffer out. Now we know it | 432 | /* Our transport medium may have padded the buffer out. Now we know it |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 534650cad3a8..d6427d918512 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -160,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); | |||
160 | static inline int ip_finish_output2(struct sk_buff *skb) | 160 | static inline int ip_finish_output2(struct sk_buff *skb) |
161 | { | 161 | { |
162 | struct dst_entry *dst = skb->dst; | 162 | struct dst_entry *dst = skb->dst; |
163 | struct rtable *rt = (struct rtable *)dst; | ||
163 | struct net_device *dev = dst->dev; | 164 | struct net_device *dev = dst->dev; |
164 | int hh_len = LL_RESERVED_SPACE(dev); | 165 | int hh_len = LL_RESERVED_SPACE(dev); |
165 | 166 | ||
167 | if (rt->rt_type == RTN_MULTICAST) | ||
168 | IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); | ||
169 | else if (rt->rt_type == RTN_BROADCAST) | ||
170 | IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); | ||
171 | |||
166 | /* Be paranoid, rather than too clever. */ | 172 | /* Be paranoid, rather than too clever. */ |
167 | if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { | 173 | if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { |
168 | struct sk_buff *skb2; | 174 | struct sk_buff *skb2; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2cf9a898ce50..d6e488668171 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1573,14 +1573,12 @@ void tcp_close(struct sock *sk, long timeout) | |||
1573 | 1573 | ||
1574 | sk_stream_mem_reclaim(sk); | 1574 | sk_stream_mem_reclaim(sk); |
1575 | 1575 | ||
1576 | /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section | 1576 | /* As outlined in RFC 2525, section 2.17, we send a RST here because |
1577 | * 3.10, we send a RST here because data was lost. To | 1577 | * data was lost. To witness the awful effects of the old behavior of |
1578 | * witness the awful effects of the old behavior of always | 1578 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk |
1579 | * doing a FIN, run an older 2.1.x kernel or 2.0.x, start | 1579 | * GET in an FTP client, suspend the process, wait for the client to |
1580 | * a bulk GET in an FTP client, suspend the process, wait | 1580 | * advertise a zero window, then kill -9 the FTP client, wheee... |
1581 | * for the client to advertise a zero window, then kill -9 | 1581 | * Note: timeout is always zero in such a case. |
1582 | * the FTP client, wheee... Note: timeout is always zero | ||
1583 | * in such a case. | ||
1584 | */ | 1582 | */ |
1585 | if (data_was_unread) { | 1583 | if (data_was_unread) { |
1586 | /* Unread data was tossed, zap the connection. */ | 1584 | /* Unread data was tossed, zap the connection. */ |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 051f0f815f17..7641b2761a14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1265,20 +1265,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1265 | return flag; | 1265 | return flag; |
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | /* F-RTO can only be used if these conditions are satisfied: | 1268 | /* F-RTO can only be used if TCP has never retransmitted anything other than |
1269 | * - there must be some unsent new data | 1269 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) |
1270 | * - the advertised window should allow sending it | ||
1271 | * - TCP has never retransmitted anything other than head (SACK enhanced | ||
1272 | * variant from Appendix B of RFC4138 is more robust here) | ||
1273 | */ | 1270 | */ |
1274 | int tcp_use_frto(struct sock *sk) | 1271 | int tcp_use_frto(struct sock *sk) |
1275 | { | 1272 | { |
1276 | const struct tcp_sock *tp = tcp_sk(sk); | 1273 | const struct tcp_sock *tp = tcp_sk(sk); |
1277 | struct sk_buff *skb; | 1274 | struct sk_buff *skb; |
1278 | 1275 | ||
1279 | if (!sysctl_tcp_frto || !tcp_send_head(sk) || | 1276 | if (!sysctl_tcp_frto) |
1280 | after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, | ||
1281 | tp->snd_una + tp->snd_wnd)) | ||
1282 | return 0; | 1277 | return 0; |
1283 | 1278 | ||
1284 | if (IsSackFrto()) | 1279 | if (IsSackFrto()) |
@@ -2642,7 +2637,9 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) | |||
2642 | * algorithm is not part of the F-RTO detection algorithm | 2637 | * algorithm is not part of the F-RTO detection algorithm |
2643 | * given in RFC4138 but can be selected separately). | 2638 | * given in RFC4138 but can be selected separately). |
2644 | * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss | 2639 | * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss |
2645 | * and TCP falls back to conventional RTO recovery. | 2640 | * and TCP falls back to conventional RTO recovery. F-RTO allows overriding |
2641 | * of Nagle, this is done using frto_counter states 2 and 3, when a new data | ||
2642 | * segment of any size sent during F-RTO, state 2 is upgraded to 3. | ||
2646 | * | 2643 | * |
2647 | * Rationale: if the RTO was spurious, new ACKs should arrive from the | 2644 | * Rationale: if the RTO was spurious, new ACKs should arrive from the |
2648 | * original window even after we transmit two new data segments. | 2645 | * original window even after we transmit two new data segments. |
@@ -2671,7 +2668,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | |||
2671 | inet_csk(sk)->icsk_retransmits = 0; | 2668 | inet_csk(sk)->icsk_retransmits = 0; |
2672 | 2669 | ||
2673 | if (!before(tp->snd_una, tp->frto_highmark)) { | 2670 | if (!before(tp->snd_una, tp->frto_highmark)) { |
2674 | tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag); | 2671 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); |
2675 | return 1; | 2672 | return 1; |
2676 | } | 2673 | } |
2677 | 2674 | ||
@@ -2697,7 +2694,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | |||
2697 | return 1; | 2694 | return 1; |
2698 | } | 2695 | } |
2699 | 2696 | ||
2700 | if ((tp->frto_counter == 2) && | 2697 | if ((tp->frto_counter >= 2) && |
2701 | (!(flag&FLAG_FORWARD_PROGRESS) || | 2698 | (!(flag&FLAG_FORWARD_PROGRESS) || |
2702 | ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { | 2699 | ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { |
2703 | /* RFC4138 shortcoming (see comment above) */ | 2700 | /* RFC4138 shortcoming (see comment above) */ |
@@ -2710,10 +2707,19 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | |||
2710 | } | 2707 | } |
2711 | 2708 | ||
2712 | if (tp->frto_counter == 1) { | 2709 | if (tp->frto_counter == 1) { |
2710 | /* Sending of the next skb must be allowed or no FRTO */ | ||
2711 | if (!tcp_send_head(sk) || | ||
2712 | after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, | ||
2713 | tp->snd_una + tp->snd_wnd)) { | ||
2714 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), | ||
2715 | flag); | ||
2716 | return 1; | ||
2717 | } | ||
2718 | |||
2713 | tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; | 2719 | tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; |
2714 | tp->frto_counter = 2; | 2720 | tp->frto_counter = 2; |
2715 | return 1; | 2721 | return 1; |
2716 | } else /* frto_counter == 2 */ { | 2722 | } else { |
2717 | switch (sysctl_tcp_frto_response) { | 2723 | switch (sysctl_tcp_frto_response) { |
2718 | case 2: | 2724 | case 2: |
2719 | tcp_undo_spur_to_response(sk, flag); | 2725 | tcp_undo_spur_to_response(sk, flag); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e70a6840cb64..0faacf9c419d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1035,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, | |||
1035 | if (nonagle & TCP_NAGLE_PUSH) | 1035 | if (nonagle & TCP_NAGLE_PUSH) |
1036 | return 1; | 1036 | return 1; |
1037 | 1037 | ||
1038 | /* Don't use the nagle rule for urgent data (or for the final FIN). */ | 1038 | /* Don't use the nagle rule for urgent data (or for the final FIN). |
1039 | if (tp->urg_mode || | 1039 | * Nagle can be ignored during F-RTO too (see RFC4138). |
1040 | */ | ||
1041 | if (tp->urg_mode || (tp->frto_counter == 2) || | ||
1040 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) | 1042 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) |
1041 | return 1; | 1043 | return 1; |
1042 | 1044 | ||
@@ -2035,7 +2037,7 @@ void tcp_send_fin(struct sock *sk) | |||
2035 | /* We get here when a process closes a file descriptor (either due to | 2037 | /* We get here when a process closes a file descriptor (either due to |
2036 | * an explicit close() or as a byproduct of exit()'ing) and there | 2038 | * an explicit close() or as a byproduct of exit()'ing) and there |
2037 | * was unread data in the receive queue. This behavior is recommended | 2039 | * was unread data in the receive queue. This behavior is recommended |
2038 | * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM | 2040 | * by RFC 2525, section 2.17. -DaveM |
2039 | */ | 2041 | */ |
2040 | void tcp_send_active_reset(struct sock *sk, gfp_t priority) | 2042 | void tcp_send_active_reset(struct sock *sk, gfp_t priority) |
2041 | { | 2043 | { |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cec0f2cc49b7..144970704c2c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock); | |||
114 | 114 | ||
115 | static int udp_port_rover; | 115 | static int udp_port_rover; |
116 | 116 | ||
117 | static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) | 117 | /* |
118 | * Note about this hash function : | ||
119 | * Typical use is probably daddr = 0, only dport is going to vary hash | ||
120 | */ | ||
121 | static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr) | ||
122 | { | ||
123 | addr ^= addr >> 16; | ||
124 | addr ^= addr >> 8; | ||
125 | return port ^ addr; | ||
126 | } | ||
127 | |||
128 | static inline int __udp_lib_port_inuse(unsigned int hash, int port, | ||
129 | __be32 daddr, struct hlist_head udptable[]) | ||
118 | { | 130 | { |
119 | struct sock *sk; | 131 | struct sock *sk; |
120 | struct hlist_node *node; | 132 | struct hlist_node *node; |
133 | struct inet_sock *inet; | ||
121 | 134 | ||
122 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) | 135 | sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { |
123 | if (sk->sk_hash == num) | 136 | if (sk->sk_hash != hash) |
137 | continue; | ||
138 | inet = inet_sk(sk); | ||
139 | if (inet->num != port) | ||
140 | continue; | ||
141 | if (inet->rcv_saddr == daddr) | ||
124 | return 1; | 142 | return 1; |
143 | } | ||
125 | return 0; | 144 | return 0; |
126 | } | 145 | } |
127 | 146 | ||
@@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
142 | struct hlist_node *node; | 161 | struct hlist_node *node; |
143 | struct hlist_head *head; | 162 | struct hlist_head *head; |
144 | struct sock *sk2; | 163 | struct sock *sk2; |
164 | unsigned int hash; | ||
145 | int error = 1; | 165 | int error = 1; |
146 | 166 | ||
147 | write_lock_bh(&udp_hash_lock); | 167 | write_lock_bh(&udp_hash_lock); |
@@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
156 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | 176 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { |
157 | int size; | 177 | int size; |
158 | 178 | ||
159 | head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; | 179 | hash = hash_port_and_addr(result, |
180 | inet_sk(sk)->rcv_saddr); | ||
181 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | ||
160 | if (hlist_empty(head)) { | 182 | if (hlist_empty(head)) { |
161 | if (result > sysctl_local_port_range[1]) | 183 | if (result > sysctl_local_port_range[1]) |
162 | result = sysctl_local_port_range[0] + | 184 | result = sysctl_local_port_range[0] + |
@@ -181,7 +203,10 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
181 | result = sysctl_local_port_range[0] | 203 | result = sysctl_local_port_range[0] |
182 | + ((result - sysctl_local_port_range[0]) & | 204 | + ((result - sysctl_local_port_range[0]) & |
183 | (UDP_HTABLE_SIZE - 1)); | 205 | (UDP_HTABLE_SIZE - 1)); |
184 | if (! __udp_lib_lport_inuse(result, udptable)) | 206 | hash = hash_port_and_addr(result, |
207 | inet_sk(sk)->rcv_saddr); | ||
208 | if (! __udp_lib_port_inuse(hash, result, | ||
209 | inet_sk(sk)->rcv_saddr, udptable)) | ||
185 | break; | 210 | break; |
186 | } | 211 | } |
187 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) | 212 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) |
@@ -189,11 +214,13 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
189 | gotit: | 214 | gotit: |
190 | *port_rover = snum = result; | 215 | *port_rover = snum = result; |
191 | } else { | 216 | } else { |
192 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; | 217 | hash = hash_port_and_addr(snum, inet_sk(sk)->rcv_saddr); |
218 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; | ||
193 | 219 | ||
194 | sk_for_each(sk2, node, head) | 220 | sk_for_each(sk2, node, head) |
195 | if (sk2->sk_hash == snum && | 221 | if (sk2->sk_hash == hash && |
196 | sk2 != sk && | 222 | sk2 != sk && |
223 | inet_sk(sk2)->num == snum && | ||
197 | (!sk2->sk_reuse || !sk->sk_reuse) && | 224 | (!sk2->sk_reuse || !sk->sk_reuse) && |
198 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 225 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
199 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 226 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
@@ -201,9 +228,9 @@ gotit: | |||
201 | goto fail; | 228 | goto fail; |
202 | } | 229 | } |
203 | inet_sk(sk)->num = snum; | 230 | inet_sk(sk)->num = snum; |
204 | sk->sk_hash = snum; | 231 | sk->sk_hash = hash; |
205 | if (sk_unhashed(sk)) { | 232 | if (sk_unhashed(sk)) { |
206 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; | 233 | head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; |
207 | sk_add_node(sk, head); | 234 | sk_add_node(sk, head); |
208 | sock_prot_inc_use(sk->sk_prot); | 235 | sock_prot_inc_use(sk->sk_prot); |
209 | } | 236 | } |
@@ -242,63 +269,78 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, | |||
242 | { | 269 | { |
243 | struct sock *sk, *result = NULL; | 270 | struct sock *sk, *result = NULL; |
244 | struct hlist_node *node; | 271 | struct hlist_node *node; |
245 | unsigned short hnum = ntohs(dport); | 272 | unsigned int hash, hashwild; |
246 | int badness = -1; | 273 | int score, best = -1; |
274 | |||
275 | hash = hash_port_and_addr(ntohs(dport), daddr); | ||
276 | hashwild = hash_port_and_addr(ntohs(dport), 0); | ||
247 | 277 | ||
248 | read_lock(&udp_hash_lock); | 278 | read_lock(&udp_hash_lock); |
249 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | 279 | |
280 | lookup: | ||
281 | |||
282 | sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { | ||
250 | struct inet_sock *inet = inet_sk(sk); | 283 | struct inet_sock *inet = inet_sk(sk); |
251 | 284 | ||
252 | if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { | 285 | if (sk->sk_hash != hash || ipv6_only_sock(sk) || |
253 | int score = (sk->sk_family == PF_INET ? 1 : 0); | 286 | inet->num != dport) |
254 | if (inet->rcv_saddr) { | 287 | continue; |
255 | if (inet->rcv_saddr != daddr) | 288 | |
256 | continue; | 289 | score = (sk->sk_family == PF_INET ? 1 : 0); |
257 | score+=2; | 290 | if (inet->rcv_saddr) { |
258 | } | 291 | if (inet->rcv_saddr != daddr) |
259 | if (inet->daddr) { | 292 | continue; |
260 | if (inet->daddr != saddr) | 293 | score+=2; |
261 | continue; | 294 | } |
262 | score+=2; | 295 | if (inet->daddr) { |
263 | } | 296 | if (inet->daddr != saddr) |
264 | if (inet->dport) { | 297 | continue; |
265 | if (inet->dport != sport) | 298 | score+=2; |
266 | continue; | 299 | } |
267 | score+=2; | 300 | if (inet->dport) { |
268 | } | 301 | if (inet->dport != sport) |
269 | if (sk->sk_bound_dev_if) { | 302 | continue; |
270 | if (sk->sk_bound_dev_if != dif) | 303 | score+=2; |
271 | continue; | 304 | } |
272 | score+=2; | 305 | if (sk->sk_bound_dev_if) { |
273 | } | 306 | if (sk->sk_bound_dev_if != dif) |
274 | if (score == 9) { | 307 | continue; |
275 | result = sk; | 308 | score+=2; |
276 | break; | 309 | } |
277 | } else if (score > badness) { | 310 | if (score == 9) { |
278 | result = sk; | 311 | result = sk; |
279 | badness = score; | 312 | goto found; |
280 | } | 313 | } else if (score > best) { |
314 | result = sk; | ||
315 | best = score; | ||
281 | } | 316 | } |
282 | } | 317 | } |
318 | |||
319 | if (hash != hashwild) { | ||
320 | hash = hashwild; | ||
321 | goto lookup; | ||
322 | } | ||
323 | found: | ||
283 | if (result) | 324 | if (result) |
284 | sock_hold(result); | 325 | sock_hold(result); |
285 | read_unlock(&udp_hash_lock); | 326 | read_unlock(&udp_hash_lock); |
286 | return result; | 327 | return result; |
287 | } | 328 | } |
288 | 329 | ||
289 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | 330 | static inline struct sock *udp_v4_mcast_next( |
290 | __be16 loc_port, __be32 loc_addr, | 331 | struct sock *sk, |
291 | __be16 rmt_port, __be32 rmt_addr, | 332 | unsigned int hnum, __be16 loc_port, __be32 loc_addr, |
292 | int dif) | 333 | __be16 rmt_port, __be32 rmt_addr, |
334 | int dif) | ||
293 | { | 335 | { |
294 | struct hlist_node *node; | 336 | struct hlist_node *node; |
295 | struct sock *s = sk; | 337 | struct sock *s = sk; |
296 | unsigned short hnum = ntohs(loc_port); | ||
297 | 338 | ||
298 | sk_for_each_from(s, node) { | 339 | sk_for_each_from(s, node) { |
299 | struct inet_sock *inet = inet_sk(s); | 340 | struct inet_sock *inet = inet_sk(s); |
300 | 341 | ||
301 | if (s->sk_hash != hnum || | 342 | if (s->sk_hash != hnum || |
343 | inet->num != loc_port || | ||
302 | (inet->daddr && inet->daddr != rmt_addr) || | 344 | (inet->daddr && inet->daddr != rmt_addr) || |
303 | (inet->dport != rmt_port && inet->dport) || | 345 | (inet->dport != rmt_port && inet->dport) || |
304 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | 346 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || |
@@ -1129,29 +1171,44 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1129 | __be32 saddr, __be32 daddr, | 1171 | __be32 saddr, __be32 daddr, |
1130 | struct hlist_head udptable[]) | 1172 | struct hlist_head udptable[]) |
1131 | { | 1173 | { |
1132 | struct sock *sk; | 1174 | struct sock *sk, *skw, *sknext; |
1133 | int dif; | 1175 | int dif; |
1176 | unsigned int hash = hash_port_and_addr(ntohs(uh->dest), daddr); | ||
1177 | unsigned int hashwild = hash_port_and_addr(ntohs(uh->dest), 0); | ||
1134 | 1178 | ||
1135 | read_lock(&udp_hash_lock); | ||
1136 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | ||
1137 | dif = skb->dev->ifindex; | 1179 | dif = skb->dev->ifindex; |
1138 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | ||
1139 | if (sk) { | ||
1140 | struct sock *sknext = NULL; | ||
1141 | 1180 | ||
1181 | read_lock(&udp_hash_lock); | ||
1182 | |||
1183 | sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); | ||
1184 | skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); | ||
1185 | |||
1186 | sk = udp_v4_mcast_next(sk, hash, uh->dest, daddr, uh->source, saddr, dif); | ||
1187 | if (!sk) { | ||
1188 | hash = hashwild; | ||
1189 | sk = udp_v4_mcast_next(skw, hash, uh->dest, daddr, uh->source, | ||
1190 | saddr, dif); | ||
1191 | } | ||
1192 | if (sk) { | ||
1142 | do { | 1193 | do { |
1143 | struct sk_buff *skb1 = skb; | 1194 | struct sk_buff *skb1 = skb; |
1144 | 1195 | sknext = udp_v4_mcast_next(sk_next(sk), hash, uh->dest, | |
1145 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | 1196 | daddr, uh->source, saddr, dif); |
1146 | uh->source, saddr, dif); | 1197 | if (!sknext && hash != hashwild) { |
1198 | hash = hashwild; | ||
1199 | sknext = udp_v4_mcast_next(skw, hash, uh->dest, | ||
1200 | daddr, uh->source, saddr, dif); | ||
1201 | } | ||
1147 | if (sknext) | 1202 | if (sknext) |
1148 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1203 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1149 | 1204 | ||
1150 | if (skb1) { | 1205 | if (skb1) { |
1151 | int ret = udp_queue_rcv_skb(sk, skb1); | 1206 | int ret = udp_queue_rcv_skb(sk, skb1); |
1152 | if (ret > 0) | 1207 | if (ret > 0) |
1153 | /* we should probably re-process instead | 1208 | /* |
1154 | * of dropping packets here. */ | 1209 | * we should probably re-process |
1210 | * instead of dropping packets here. | ||
1211 | */ | ||
1155 | kfree_skb(skb1); | 1212 | kfree_skb(skb1); |
1156 | } | 1213 | } |
1157 | sk = sknext; | 1214 | sk = sknext; |