aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/inet_connection_sock.c34
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/tcp.c59
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/udp.c73
6 files changed, 135 insertions, 58 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e92f1fd28aa5..5df2f6a0b0f0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1077 ip_mc_up(in_dev); 1077 ip_mc_up(in_dev);
1078 /* fall through */ 1078 /* fall through */
1079 case NETDEV_CHANGEADDR: 1079 case NETDEV_CHANGEADDR:
1080 if (IN_DEV_ARP_NOTIFY(in_dev)) 1080 /* Send gratuitous ARP to notify of link change */
1081 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1081 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1082 in_dev->ifa_list->ifa_address, 1082 struct in_ifaddr *ifa = in_dev->ifa_list;
1083 dev, 1083
1084 in_dev->ifa_list->ifa_address, 1084 if (ifa)
1085 NULL, dev->dev_addr, NULL); 1085 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1086 ifa->ifa_address, dev,
1087 ifa->ifa_address, NULL,
1088 dev->dev_addr, NULL);
1089 }
1086 break; 1090 break;
1087 case NETDEV_DOWN: 1091 case NETDEV_DOWN:
1088 ip_mc_down(in_dev); 1092 ip_mc_down(in_dev);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4351ca2cf0b8..537731b3bcb3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries;
446 446
447EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 447EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
448 448
449/* Decide when to expire the request and when to resend SYN-ACK */
450static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
451 const int max_retries,
452 const u8 rskq_defer_accept,
453 int *expire, int *resend)
454{
455 if (!rskq_defer_accept) {
456 *expire = req->retrans >= thresh;
457 *resend = 1;
458 return;
459 }
460 *expire = req->retrans >= thresh &&
461 (!inet_rsk(req)->acked || req->retrans >= max_retries);
462 /*
463 * Do not resend while waiting for data after ACK,
464 * start to resend on end of deferring period to give
465 * last chance for data or ACK to create established socket.
466 */
467 *resend = !inet_rsk(req)->acked ||
468 req->retrans >= rskq_defer_accept - 1;
469}
470
449void inet_csk_reqsk_queue_prune(struct sock *parent, 471void inet_csk_reqsk_queue_prune(struct sock *parent,
450 const unsigned long interval, 472 const unsigned long interval,
451 const unsigned long timeout, 473 const unsigned long timeout,
@@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
501 reqp=&lopt->syn_table[i]; 523 reqp=&lopt->syn_table[i];
502 while ((req = *reqp) != NULL) { 524 while ((req = *reqp) != NULL) {
503 if (time_after_eq(now, req->expires)) { 525 if (time_after_eq(now, req->expires)) {
504 if ((req->retrans < thresh || 526 int expire = 0, resend = 0;
505 (inet_rsk(req)->acked && req->retrans < max_retries)) 527
506 && !req->rsk_ops->rtx_syn_ack(parent, req)) { 528 syn_ack_recalc(req, thresh, max_retries,
529 queue->rskq_defer_accept,
530 &expire, &resend);
531 if (!expire &&
532 (!resend ||
533 !req->rsk_ops->rtx_syn_ack(parent, req) ||
534 inet_rsk(req)->acked)) {
507 unsigned long timeo; 535 unsigned long timeo;
508 536
509 if (req->retrans++ == 0) 537 if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0c0b6e363a20..e982b5c1ee17 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
634 break; 634 break;
635 } 635 }
636 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 636 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
637 if (dev) { 637 if (dev)
638 mreq.imr_ifindex = dev->ifindex; 638 mreq.imr_ifindex = dev->ifindex;
639 dev_put(dev);
640 }
641 } else 639 } else
642 dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 640 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
643 641
644 642
645 err = -EADDRNOTAVAIL; 643 err = -EADDRNOTAVAIL;
646 if (!dev) 644 if (!dev)
647 break; 645 break;
646 dev_put(dev);
648 647
649 err = -EINVAL; 648 err = -EINVAL;
650 if (sk->sk_bound_dev_if && 649 if (sk->sk_bound_dev_if &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 64d0af675823..90b2e0649bfb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk)
326 326
327EXPORT_SYMBOL(tcp_enter_memory_pressure); 327EXPORT_SYMBOL(tcp_enter_memory_pressure);
328 328
329/* Convert seconds to retransmits based on initial and max timeout */
330static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
331{
332 u8 res = 0;
333
334 if (seconds > 0) {
335 int period = timeout;
336
337 res = 1;
338 while (seconds > period && res < 255) {
339 res++;
340 timeout <<= 1;
341 if (timeout > rto_max)
342 timeout = rto_max;
343 period += timeout;
344 }
345 }
346 return res;
347}
348
349/* Convert retransmits to seconds based on initial and max timeout */
350static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
351{
352 int period = 0;
353
354 if (retrans > 0) {
355 period = timeout;
356 while (--retrans) {
357 timeout <<= 1;
358 if (timeout > rto_max)
359 timeout = rto_max;
360 period += timeout;
361 }
362 }
363 return period;
364}
365
329/* 366/*
330 * Wait for a TCP event. 367 * Wait for a TCP event.
331 * 368 *
@@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1405 goto found_ok_skb; 1442 goto found_ok_skb;
1406 if (tcp_hdr(skb)->fin) 1443 if (tcp_hdr(skb)->fin)
1407 goto found_fin_ok; 1444 goto found_fin_ok;
1408 WARN_ON(!(flags & MSG_PEEK)); 1445 if (WARN_ON(!(flags & MSG_PEEK)))
1446 printk(KERN_INFO "recvmsg bug 2: copied %X "
1447 "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
1409 } 1448 }
1410 1449
1411 /* Well, if we have backlog, try to process it now yet. */ 1450 /* Well, if we have backlog, try to process it now yet. */
@@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2163 break; 2202 break;
2164 2203
2165 case TCP_DEFER_ACCEPT: 2204 case TCP_DEFER_ACCEPT:
2166 icsk->icsk_accept_queue.rskq_defer_accept = 0; 2205 /* Translate value in seconds to number of retransmits */
2167 if (val > 0) { 2206 icsk->icsk_accept_queue.rskq_defer_accept =
2168 /* Translate value in seconds to number of 2207 secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
2169 * retransmits */ 2208 TCP_RTO_MAX / HZ);
2170 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2171 val > ((TCP_TIMEOUT_INIT / HZ) <<
2172 icsk->icsk_accept_queue.rskq_defer_accept))
2173 icsk->icsk_accept_queue.rskq_defer_accept++;
2174 icsk->icsk_accept_queue.rskq_defer_accept++;
2175 }
2176 break; 2209 break;
2177 2210
2178 case TCP_WINDOW_CLAMP: 2211 case TCP_WINDOW_CLAMP:
@@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2353 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2386 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2354 break; 2387 break;
2355 case TCP_DEFER_ACCEPT: 2388 case TCP_DEFER_ACCEPT:
2356 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : 2389 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
2357 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); 2390 TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
2358 break; 2391 break;
2359 case TCP_WINDOW_CLAMP: 2392 case TCP_WINDOW_CLAMP:
2360 val = tp->window_clamp; 2393 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624c3c9b3c2b..4c03598ed924 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
641 if (!(flg & TCP_FLAG_ACK)) 641 if (!(flg & TCP_FLAG_ACK))
642 return NULL; 642 return NULL;
643 643
644 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 644 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
645 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 645 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
646 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 646 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
647 inet_rsk(req)->acked = 1; 647 inet_rsk(req)->acked = 1;
648 return NULL; 648 return NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ec6a8a4a224..d0d436d6216c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -841,6 +841,42 @@ out:
841 return ret; 841 return ret;
842} 842}
843 843
844
845/**
846 * first_packet_length - return length of first packet in receive queue
847 * @sk: socket
848 *
849 * Drops all bad checksum frames, until a valid one is found.
850 * Returns the length of found skb, or 0 if none is found.
851 */
852static unsigned int first_packet_length(struct sock *sk)
853{
854 struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
855 struct sk_buff *skb;
856 unsigned int res;
857
858 __skb_queue_head_init(&list_kill);
859
860 spin_lock_bh(&rcvq->lock);
861 while ((skb = skb_peek(rcvq)) != NULL &&
862 udp_lib_checksum_complete(skb)) {
863 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
864 IS_UDPLITE(sk));
865 __skb_unlink(skb, rcvq);
866 __skb_queue_tail(&list_kill, skb);
867 }
868 res = skb ? skb->len : 0;
869 spin_unlock_bh(&rcvq->lock);
870
871 if (!skb_queue_empty(&list_kill)) {
872 lock_sock(sk);
873 __skb_queue_purge(&list_kill);
874 sk_mem_reclaim_partial(sk);
875 release_sock(sk);
876 }
877 return res;
878}
879
844/* 880/*
845 * IOCTL requests applicable to the UDP protocol 881 * IOCTL requests applicable to the UDP protocol
846 */ 882 */
@@ -857,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
857 893
858 case SIOCINQ: 894 case SIOCINQ:
859 { 895 {
860 struct sk_buff *skb; 896 unsigned int amount = first_packet_length(sk);
861 unsigned long amount;
862 897
863 amount = 0; 898 if (amount)
864 spin_lock_bh(&sk->sk_receive_queue.lock);
865 skb = skb_peek(&sk->sk_receive_queue);
866 if (skb != NULL) {
867 /* 899 /*
868 * We will only return the amount 900 * We will only return the amount
869 * of this packet since that is all 901 * of this packet since that is all
870 * that will be read. 902 * that will be read.
871 */ 903 */
872 amount = skb->len - sizeof(struct udphdr); 904 amount -= sizeof(struct udphdr);
873 } 905
874 spin_unlock_bh(&sk->sk_receive_queue.lock);
875 return put_user(amount, (int __user *)arg); 906 return put_user(amount, (int __user *)arg);
876 } 907 }
877 908
@@ -1540,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1540{ 1571{
1541 unsigned int mask = datagram_poll(file, sock, wait); 1572 unsigned int mask = datagram_poll(file, sock, wait);
1542 struct sock *sk = sock->sk; 1573 struct sock *sk = sock->sk;
1543 int is_lite = IS_UDPLITE(sk);
1544 1574
1545 /* Check for false positives due to checksum errors */ 1575 /* Check for false positives due to checksum errors */
1546 if ((mask & POLLRDNORM) && 1576 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
1547 !(file->f_flags & O_NONBLOCK) && 1577 !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
1548 !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1578 mask &= ~(POLLIN | POLLRDNORM);
1549 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1550 struct sk_buff *skb;
1551
1552 spin_lock_bh(&rcvq->lock);
1553 while ((skb = skb_peek(rcvq)) != NULL &&
1554 udp_lib_checksum_complete(skb)) {
1555 UDP_INC_STATS_BH(sock_net(sk),
1556 UDP_MIB_INERRORS, is_lite);
1557 __skb_unlink(skb, rcvq);
1558 kfree_skb(skb);
1559 }
1560 spin_unlock_bh(&rcvq->lock);
1561
1562 /* nothing to see, move along */
1563 if (skb == NULL)
1564 mask &= ~(POLLIN | POLLRDNORM);
1565 }
1566 1579
1567 return mask; 1580 return mask;
1568 1581