diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 16 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 5 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 28 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 7 | ||||
-rw-r--r-- | net/ipv4/raw.c | 24 | ||||
-rw-r--r-- | net/ipv4/route.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 59 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 77 |
10 files changed, 177 insertions, 85 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e92f1fd28aa5..5df2f6a0b0f0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1077 | ip_mc_up(in_dev); | 1077 | ip_mc_up(in_dev); |
1078 | /* fall through */ | 1078 | /* fall through */ |
1079 | case NETDEV_CHANGEADDR: | 1079 | case NETDEV_CHANGEADDR: |
1080 | if (IN_DEV_ARP_NOTIFY(in_dev)) | 1080 | /* Send gratuitous ARP to notify of link change */ |
1081 | arp_send(ARPOP_REQUEST, ETH_P_ARP, | 1081 | if (IN_DEV_ARP_NOTIFY(in_dev)) { |
1082 | in_dev->ifa_list->ifa_address, | 1082 | struct in_ifaddr *ifa = in_dev->ifa_list; |
1083 | dev, | 1083 | |
1084 | in_dev->ifa_list->ifa_address, | 1084 | if (ifa) |
1085 | NULL, dev->dev_addr, NULL); | 1085 | arp_send(ARPOP_REQUEST, ETH_P_ARP, |
1086 | ifa->ifa_address, dev, | ||
1087 | ifa->ifa_address, NULL, | ||
1088 | dev->dev_addr, NULL); | ||
1089 | } | ||
1086 | break; | 1090 | break; |
1087 | case NETDEV_DOWN: | 1091 | case NETDEV_DOWN: |
1088 | ip_mc_down(in_dev); | 1092 | ip_mc_down(in_dev); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e2f950592566..aa00398be80e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -229,14 +229,17 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, | |||
229 | */ | 229 | */ |
230 | 230 | ||
231 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 231 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, |
232 | struct net_device *dev, __be32 *spec_dst, u32 *itag) | 232 | struct net_device *dev, __be32 *spec_dst, |
233 | u32 *itag, u32 mark) | ||
233 | { | 234 | { |
234 | struct in_device *in_dev; | 235 | struct in_device *in_dev; |
235 | struct flowi fl = { .nl_u = { .ip4_u = | 236 | struct flowi fl = { .nl_u = { .ip4_u = |
236 | { .daddr = src, | 237 | { .daddr = src, |
237 | .saddr = dst, | 238 | .saddr = dst, |
238 | .tos = tos } }, | 239 | .tos = tos } }, |
240 | .mark = mark, | ||
239 | .iif = oif }; | 241 | .iif = oif }; |
242 | |||
240 | struct fib_result res; | 243 | struct fib_result res; |
241 | int no_addr, rpf; | 244 | int no_addr, rpf; |
242 | int ret; | 245 | int ret; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4351ca2cf0b8..537731b3bcb3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries; | |||
446 | 446 | ||
447 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | 447 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); |
448 | 448 | ||
449 | /* Decide when to expire the request and when to resend SYN-ACK */ | ||
450 | static inline void syn_ack_recalc(struct request_sock *req, const int thresh, | ||
451 | const int max_retries, | ||
452 | const u8 rskq_defer_accept, | ||
453 | int *expire, int *resend) | ||
454 | { | ||
455 | if (!rskq_defer_accept) { | ||
456 | *expire = req->retrans >= thresh; | ||
457 | *resend = 1; | ||
458 | return; | ||
459 | } | ||
460 | *expire = req->retrans >= thresh && | ||
461 | (!inet_rsk(req)->acked || req->retrans >= max_retries); | ||
462 | /* | ||
463 | * Do not resend while waiting for data after ACK, | ||
464 | * start to resend on end of deferring period to give | ||
465 | * last chance for data or ACK to create established socket. | ||
466 | */ | ||
467 | *resend = !inet_rsk(req)->acked || | ||
468 | req->retrans >= rskq_defer_accept - 1; | ||
469 | } | ||
470 | |||
449 | void inet_csk_reqsk_queue_prune(struct sock *parent, | 471 | void inet_csk_reqsk_queue_prune(struct sock *parent, |
450 | const unsigned long interval, | 472 | const unsigned long interval, |
451 | const unsigned long timeout, | 473 | const unsigned long timeout, |
@@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
501 | reqp=&lopt->syn_table[i]; | 523 | reqp=&lopt->syn_table[i]; |
502 | while ((req = *reqp) != NULL) { | 524 | while ((req = *reqp) != NULL) { |
503 | if (time_after_eq(now, req->expires)) { | 525 | if (time_after_eq(now, req->expires)) { |
504 | if ((req->retrans < thresh || | 526 | int expire = 0, resend = 0; |
505 | (inet_rsk(req)->acked && req->retrans < max_retries)) | 527 | |
506 | && !req->rsk_ops->rtx_syn_ack(parent, req)) { | 528 | syn_ack_recalc(req, thresh, max_retries, |
529 | queue->rskq_defer_accept, | ||
530 | &expire, &resend); | ||
531 | if (!expire && | ||
532 | (!resend || | ||
533 | !req->rsk_ops->rtx_syn_ack(parent, req) || | ||
534 | inet_rsk(req)->acked)) { | ||
507 | unsigned long timeo; | 535 | unsigned long timeo; |
508 | 536 | ||
509 | if (req->retrans++ == 0) | 537 | if (req->retrans++ == 0) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 41ada9904d31..143333852624 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -1464,7 +1464,7 @@ static void ipgre_tap_setup(struct net_device *dev) | |||
1464 | 1464 | ||
1465 | ether_setup(dev); | 1465 | ether_setup(dev); |
1466 | 1466 | ||
1467 | dev->netdev_ops = &ipgre_netdev_ops; | 1467 | dev->netdev_ops = &ipgre_tap_netdev_ops; |
1468 | dev->destructor = free_netdev; | 1468 | dev->destructor = free_netdev; |
1469 | 1469 | ||
1470 | dev->iflink = 0; | 1470 | dev->iflink = 0; |
@@ -1525,25 +1525,29 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], | |||
1525 | if (t->dev != dev) | 1525 | if (t->dev != dev) |
1526 | return -EEXIST; | 1526 | return -EEXIST; |
1527 | } else { | 1527 | } else { |
1528 | unsigned nflags = 0; | ||
1529 | |||
1530 | t = nt; | 1528 | t = nt; |
1531 | 1529 | ||
1532 | if (ipv4_is_multicast(p.iph.daddr)) | 1530 | if (dev->type != ARPHRD_ETHER) { |
1533 | nflags = IFF_BROADCAST; | 1531 | unsigned nflags = 0; |
1534 | else if (p.iph.daddr) | ||
1535 | nflags = IFF_POINTOPOINT; | ||
1536 | 1532 | ||
1537 | if ((dev->flags ^ nflags) & | 1533 | if (ipv4_is_multicast(p.iph.daddr)) |
1538 | (IFF_POINTOPOINT | IFF_BROADCAST)) | 1534 | nflags = IFF_BROADCAST; |
1539 | return -EINVAL; | 1535 | else if (p.iph.daddr) |
1536 | nflags = IFF_POINTOPOINT; | ||
1537 | |||
1538 | if ((dev->flags ^ nflags) & | ||
1539 | (IFF_POINTOPOINT | IFF_BROADCAST)) | ||
1540 | return -EINVAL; | ||
1541 | } | ||
1540 | 1542 | ||
1541 | ipgre_tunnel_unlink(ign, t); | 1543 | ipgre_tunnel_unlink(ign, t); |
1542 | t->parms.iph.saddr = p.iph.saddr; | 1544 | t->parms.iph.saddr = p.iph.saddr; |
1543 | t->parms.iph.daddr = p.iph.daddr; | 1545 | t->parms.iph.daddr = p.iph.daddr; |
1544 | t->parms.i_key = p.i_key; | 1546 | t->parms.i_key = p.i_key; |
1545 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 1547 | if (dev->type != ARPHRD_ETHER) { |
1546 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 1548 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
1549 | memcpy(dev->broadcast, &p.iph.daddr, 4); | ||
1550 | } | ||
1547 | ipgre_tunnel_link(ign, t); | 1551 | ipgre_tunnel_link(ign, t); |
1548 | netdev_state_change(dev); | 1552 | netdev_state_change(dev); |
1549 | } | 1553 | } |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0c0b6e363a20..e982b5c1ee17 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
634 | break; | 634 | break; |
635 | } | 635 | } |
636 | dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); | 636 | dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); |
637 | if (dev) { | 637 | if (dev) |
638 | mreq.imr_ifindex = dev->ifindex; | 638 | mreq.imr_ifindex = dev->ifindex; |
639 | dev_put(dev); | ||
640 | } | ||
641 | } else | 639 | } else |
642 | dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); | 640 | dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); |
643 | 641 | ||
644 | 642 | ||
645 | err = -EADDRNOTAVAIL; | 643 | err = -EADDRNOTAVAIL; |
646 | if (!dev) | 644 | if (!dev) |
647 | break; | 645 | break; |
646 | dev_put(dev); | ||
648 | 647 | ||
649 | err = -EINVAL; | 648 | err = -EINVAL; |
650 | if (sk->sk_bound_dev_if && | 649 | if (sk->sk_bound_dev_if && |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 757c9171e7c2..ab996f9c0fe0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -352,13 +352,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
352 | skb->ip_summed = CHECKSUM_NONE; | 352 | skb->ip_summed = CHECKSUM_NONE; |
353 | 353 | ||
354 | skb->transport_header = skb->network_header; | 354 | skb->transport_header = skb->network_header; |
355 | err = memcpy_fromiovecend((void *)iph, from, 0, length); | 355 | err = -EFAULT; |
356 | if (err) | 356 | if (memcpy_fromiovecend((void *)iph, from, 0, length)) |
357 | goto error_fault; | 357 | goto error_free; |
358 | 358 | ||
359 | /* We don't modify invalid header */ | ||
360 | iphlen = iph->ihl * 4; | 359 | iphlen = iph->ihl * 4; |
361 | if (iphlen >= sizeof(*iph) && iphlen <= length) { | 360 | |
361 | /* | ||
362 | * We don't want to modify the ip header, but we do need to | ||
363 | * be sure that it won't cause problems later along the network | ||
364 | * stack. Specifically we want to make sure that iph->ihl is a | ||
365 | * sane value. If ihl points beyond the length of the buffer passed | ||
366 | * in, reject the frame as invalid | ||
367 | */ | ||
368 | err = -EINVAL; | ||
369 | if (iphlen > length) | ||
370 | goto error_free; | ||
371 | |||
372 | if (iphlen >= sizeof(*iph)) { | ||
362 | if (!iph->saddr) | 373 | if (!iph->saddr) |
363 | iph->saddr = rt->rt_src; | 374 | iph->saddr = rt->rt_src; |
364 | iph->check = 0; | 375 | iph->check = 0; |
@@ -381,8 +392,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
381 | out: | 392 | out: |
382 | return 0; | 393 | return 0; |
383 | 394 | ||
384 | error_fault: | 395 | error_free: |
385 | err = -EFAULT; | ||
386 | kfree_skb(skb); | 396 | kfree_skb(skb); |
387 | error: | 397 | error: |
388 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 398 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bb4199252026..5b1050a5d874 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1854,7 +1854,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1854 | goto e_inval; | 1854 | goto e_inval; |
1855 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1855 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1856 | } else if (fib_validate_source(saddr, 0, tos, 0, | 1856 | } else if (fib_validate_source(saddr, 0, tos, 0, |
1857 | dev, &spec_dst, &itag) < 0) | 1857 | dev, &spec_dst, &itag, 0) < 0) |
1858 | goto e_inval; | 1858 | goto e_inval; |
1859 | 1859 | ||
1860 | rth = dst_alloc(&ipv4_dst_ops); | 1860 | rth = dst_alloc(&ipv4_dst_ops); |
@@ -1967,7 +1967,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1967 | 1967 | ||
1968 | 1968 | ||
1969 | err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), | 1969 | err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), |
1970 | in_dev->dev, &spec_dst, &itag); | 1970 | in_dev->dev, &spec_dst, &itag, skb->mark); |
1971 | if (err < 0) { | 1971 | if (err < 0) { |
1972 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1972 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
1973 | saddr); | 1973 | saddr); |
@@ -2141,7 +2141,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2141 | int result; | 2141 | int result; |
2142 | result = fib_validate_source(saddr, daddr, tos, | 2142 | result = fib_validate_source(saddr, daddr, tos, |
2143 | net->loopback_dev->ifindex, | 2143 | net->loopback_dev->ifindex, |
2144 | dev, &spec_dst, &itag); | 2144 | dev, &spec_dst, &itag, skb->mark); |
2145 | if (result < 0) | 2145 | if (result < 0) |
2146 | goto martian_source; | 2146 | goto martian_source; |
2147 | if (result) | 2147 | if (result) |
@@ -2170,7 +2170,7 @@ brd_input: | |||
2170 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2170 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
2171 | else { | 2171 | else { |
2172 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2172 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
2173 | &itag); | 2173 | &itag, skb->mark); |
2174 | if (err < 0) | 2174 | if (err < 0) |
2175 | goto martian_source; | 2175 | goto martian_source; |
2176 | if (err) | 2176 | if (err) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 64d0af675823..98440ad82558 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk) | |||
326 | 326 | ||
327 | EXPORT_SYMBOL(tcp_enter_memory_pressure); | 327 | EXPORT_SYMBOL(tcp_enter_memory_pressure); |
328 | 328 | ||
329 | /* Convert seconds to retransmits based on initial and max timeout */ | ||
330 | static u8 secs_to_retrans(int seconds, int timeout, int rto_max) | ||
331 | { | ||
332 | u8 res = 0; | ||
333 | |||
334 | if (seconds > 0) { | ||
335 | int period = timeout; | ||
336 | |||
337 | res = 1; | ||
338 | while (seconds > period && res < 255) { | ||
339 | res++; | ||
340 | timeout <<= 1; | ||
341 | if (timeout > rto_max) | ||
342 | timeout = rto_max; | ||
343 | period += timeout; | ||
344 | } | ||
345 | } | ||
346 | return res; | ||
347 | } | ||
348 | |||
349 | /* Convert retransmits to seconds based on initial and max timeout */ | ||
350 | static int retrans_to_secs(u8 retrans, int timeout, int rto_max) | ||
351 | { | ||
352 | int period = 0; | ||
353 | |||
354 | if (retrans > 0) { | ||
355 | period = timeout; | ||
356 | while (--retrans) { | ||
357 | timeout <<= 1; | ||
358 | if (timeout > rto_max) | ||
359 | timeout = rto_max; | ||
360 | period += timeout; | ||
361 | } | ||
362 | } | ||
363 | return period; | ||
364 | } | ||
365 | |||
329 | /* | 366 | /* |
330 | * Wait for a TCP event. | 367 | * Wait for a TCP event. |
331 | * | 368 | * |
@@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1405 | goto found_ok_skb; | 1442 | goto found_ok_skb; |
1406 | if (tcp_hdr(skb)->fin) | 1443 | if (tcp_hdr(skb)->fin) |
1407 | goto found_fin_ok; | 1444 | goto found_fin_ok; |
1408 | WARN_ON(!(flags & MSG_PEEK)); | 1445 | WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " |
1446 | "copied %X seq %X\n", *seq, | ||
1447 | TCP_SKB_CB(skb)->seq); | ||
1409 | } | 1448 | } |
1410 | 1449 | ||
1411 | /* Well, if we have backlog, try to process it now yet. */ | 1450 | /* Well, if we have backlog, try to process it now yet. */ |
@@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2163 | break; | 2202 | break; |
2164 | 2203 | ||
2165 | case TCP_DEFER_ACCEPT: | 2204 | case TCP_DEFER_ACCEPT: |
2166 | icsk->icsk_accept_queue.rskq_defer_accept = 0; | 2205 | /* Translate value in seconds to number of retransmits */ |
2167 | if (val > 0) { | 2206 | icsk->icsk_accept_queue.rskq_defer_accept = |
2168 | /* Translate value in seconds to number of | 2207 | secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, |
2169 | * retransmits */ | 2208 | TCP_RTO_MAX / HZ); |
2170 | while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && | ||
2171 | val > ((TCP_TIMEOUT_INIT / HZ) << | ||
2172 | icsk->icsk_accept_queue.rskq_defer_accept)) | ||
2173 | icsk->icsk_accept_queue.rskq_defer_accept++; | ||
2174 | icsk->icsk_accept_queue.rskq_defer_accept++; | ||
2175 | } | ||
2176 | break; | 2209 | break; |
2177 | 2210 | ||
2178 | case TCP_WINDOW_CLAMP: | 2211 | case TCP_WINDOW_CLAMP: |
@@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2353 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; | 2386 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; |
2354 | break; | 2387 | break; |
2355 | case TCP_DEFER_ACCEPT: | 2388 | case TCP_DEFER_ACCEPT: |
2356 | val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : | 2389 | val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, |
2357 | ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); | 2390 | TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); |
2358 | break; | 2391 | break; |
2359 | case TCP_WINDOW_CLAMP: | 2392 | case TCP_WINDOW_CLAMP: |
2360 | val = tp->window_clamp; | 2393 | val = tp->window_clamp; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624c3c9b3c2b..4c03598ed924 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
641 | if (!(flg & TCP_FLAG_ACK)) | 641 | if (!(flg & TCP_FLAG_ACK)) |
642 | return NULL; | 642 | return NULL; |
643 | 643 | ||
644 | /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ | 644 | /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ |
645 | if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && | 645 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
646 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | 646 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { |
647 | inet_rsk(req)->acked = 1; | 647 | inet_rsk(req)->acked = 1; |
648 | return NULL; | 648 | return NULL; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6ec6a8a4a224..0fa9f70e4b19 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -841,6 +841,42 @@ out: | |||
841 | return ret; | 841 | return ret; |
842 | } | 842 | } |
843 | 843 | ||
844 | |||
845 | /** | ||
846 | * first_packet_length - return length of first packet in receive queue | ||
847 | * @sk: socket | ||
848 | * | ||
849 | * Drops all bad checksum frames, until a valid one is found. | ||
850 | * Returns the length of found skb, or 0 if none is found. | ||
851 | */ | ||
852 | static unsigned int first_packet_length(struct sock *sk) | ||
853 | { | ||
854 | struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; | ||
855 | struct sk_buff *skb; | ||
856 | unsigned int res; | ||
857 | |||
858 | __skb_queue_head_init(&list_kill); | ||
859 | |||
860 | spin_lock_bh(&rcvq->lock); | ||
861 | while ((skb = skb_peek(rcvq)) != NULL && | ||
862 | udp_lib_checksum_complete(skb)) { | ||
863 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | ||
864 | IS_UDPLITE(sk)); | ||
865 | __skb_unlink(skb, rcvq); | ||
866 | __skb_queue_tail(&list_kill, skb); | ||
867 | } | ||
868 | res = skb ? skb->len : 0; | ||
869 | spin_unlock_bh(&rcvq->lock); | ||
870 | |||
871 | if (!skb_queue_empty(&list_kill)) { | ||
872 | lock_sock(sk); | ||
873 | __skb_queue_purge(&list_kill); | ||
874 | sk_mem_reclaim_partial(sk); | ||
875 | release_sock(sk); | ||
876 | } | ||
877 | return res; | ||
878 | } | ||
879 | |||
844 | /* | 880 | /* |
845 | * IOCTL requests applicable to the UDP protocol | 881 | * IOCTL requests applicable to the UDP protocol |
846 | */ | 882 | */ |
@@ -857,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
857 | 893 | ||
858 | case SIOCINQ: | 894 | case SIOCINQ: |
859 | { | 895 | { |
860 | struct sk_buff *skb; | 896 | unsigned int amount = first_packet_length(sk); |
861 | unsigned long amount; | ||
862 | 897 | ||
863 | amount = 0; | 898 | if (amount) |
864 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
865 | skb = skb_peek(&sk->sk_receive_queue); | ||
866 | if (skb != NULL) { | ||
867 | /* | 899 | /* |
868 | * We will only return the amount | 900 | * We will only return the amount |
869 | * of this packet since that is all | 901 | * of this packet since that is all |
870 | * that will be read. | 902 | * that will be read. |
871 | */ | 903 | */ |
872 | amount = skb->len - sizeof(struct udphdr); | 904 | amount -= sizeof(struct udphdr); |
873 | } | 905 | |
874 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
875 | return put_user(amount, (int __user *)arg); | 906 | return put_user(amount, (int __user *)arg); |
876 | } | 907 | } |
877 | 908 | ||
@@ -968,9 +999,7 @@ try_again: | |||
968 | err = ulen; | 999 | err = ulen; |
969 | 1000 | ||
970 | out_free: | 1001 | out_free: |
971 | lock_sock(sk); | 1002 | skb_free_datagram_locked(sk, skb); |
972 | skb_free_datagram(sk, skb); | ||
973 | release_sock(sk); | ||
974 | out: | 1003 | out: |
975 | return err; | 1004 | return err; |
976 | 1005 | ||
@@ -1540,29 +1569,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1540 | { | 1569 | { |
1541 | unsigned int mask = datagram_poll(file, sock, wait); | 1570 | unsigned int mask = datagram_poll(file, sock, wait); |
1542 | struct sock *sk = sock->sk; | 1571 | struct sock *sk = sock->sk; |
1543 | int is_lite = IS_UDPLITE(sk); | ||
1544 | 1572 | ||
1545 | /* Check for false positives due to checksum errors */ | 1573 | /* Check for false positives due to checksum errors */ |
1546 | if ((mask & POLLRDNORM) && | 1574 | if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && |
1547 | !(file->f_flags & O_NONBLOCK) && | 1575 | !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) |
1548 | !(sk->sk_shutdown & RCV_SHUTDOWN)) { | 1576 | mask &= ~(POLLIN | POLLRDNORM); |
1549 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; | ||
1550 | struct sk_buff *skb; | ||
1551 | |||
1552 | spin_lock_bh(&rcvq->lock); | ||
1553 | while ((skb = skb_peek(rcvq)) != NULL && | ||
1554 | udp_lib_checksum_complete(skb)) { | ||
1555 | UDP_INC_STATS_BH(sock_net(sk), | ||
1556 | UDP_MIB_INERRORS, is_lite); | ||
1557 | __skb_unlink(skb, rcvq); | ||
1558 | kfree_skb(skb); | ||
1559 | } | ||
1560 | spin_unlock_bh(&rcvq->lock); | ||
1561 | |||
1562 | /* nothing to see, move along */ | ||
1563 | if (skb == NULL) | ||
1564 | mask &= ~(POLLIN | POLLRDNORM); | ||
1565 | } | ||
1566 | 1577 | ||
1567 | return mask; | 1578 | return mask; |
1568 | 1579 | ||