diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/datagram.c | 21 | ||||
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/netpoll.c | 18 | ||||
-rw-r--r-- | net/decnet/af_decnet.c | 14 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 6 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 19 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 15 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 11 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 40 | ||||
-rw-r--r-- | net/ipv4/tcp_highspeed.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_htcp.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_hybla.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 288 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 61 | ||||
-rw-r--r-- | net/ipv4/tcp_scalable.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 42 | ||||
-rw-r--r-- | net/ipv4/udp.c | 7 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 21 | ||||
-rw-r--r-- | net/ipv6/raw.c | 42 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 20 | ||||
-rw-r--r-- | net/ipv6/udp.c | 25 | ||||
-rw-r--r-- | net/rxrpc/transport.c | 15 | ||||
-rw-r--r-- | net/sunrpc/socklib.c | 5 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 9 |
30 files changed, 466 insertions, 331 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index d219435d086c..1bcfef51ac58 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -350,6 +350,20 @@ fault: | |||
350 | return -EFAULT; | 350 | return -EFAULT; |
351 | } | 351 | } |
352 | 352 | ||
353 | unsigned int __skb_checksum_complete(struct sk_buff *skb) | ||
354 | { | ||
355 | unsigned int sum; | ||
356 | |||
357 | sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); | ||
358 | if (likely(!sum)) { | ||
359 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | ||
360 | netdev_rx_csum_fault(skb->dev); | ||
361 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
362 | } | ||
363 | return sum; | ||
364 | } | ||
365 | EXPORT_SYMBOL(__skb_checksum_complete); | ||
366 | |||
353 | /** | 367 | /** |
354 | * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. | 368 | * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. |
355 | * @skb: skbuff | 369 | * @skb: skbuff |
@@ -363,7 +377,7 @@ fault: | |||
363 | * -EFAULT - fault during copy. Beware, in this case iovec | 377 | * -EFAULT - fault during copy. Beware, in this case iovec |
364 | * can be modified! | 378 | * can be modified! |
365 | */ | 379 | */ |
366 | int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, | 380 | int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, |
367 | int hlen, struct iovec *iov) | 381 | int hlen, struct iovec *iov) |
368 | { | 382 | { |
369 | unsigned int csum; | 383 | unsigned int csum; |
@@ -376,8 +390,7 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, | |||
376 | iov++; | 390 | iov++; |
377 | 391 | ||
378 | if (iov->iov_len < chunk) { | 392 | if (iov->iov_len < chunk) { |
379 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen, | 393 | if (__skb_checksum_complete(skb)) |
380 | skb->csum))) | ||
381 | goto csum_error; | 394 | goto csum_error; |
382 | if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) | 395 | if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) |
383 | goto fault; | 396 | goto fault; |
@@ -388,6 +401,8 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, | |||
388 | goto fault; | 401 | goto fault; |
389 | if ((unsigned short)csum_fold(csum)) | 402 | if ((unsigned short)csum_fold(csum)) |
390 | goto csum_error; | 403 | goto csum_error; |
404 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | ||
405 | netdev_rx_csum_fault(skb->dev); | ||
391 | iov->iov_len -= chunk; | 406 | iov->iov_len -= chunk; |
392 | iov->iov_base += chunk; | 407 | iov->iov_base += chunk; |
393 | } | 408 | } |
diff --git a/net/core/dev.c b/net/core/dev.c index 8d1541595277..0b48e294aafe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1108,6 +1108,18 @@ out: | |||
1108 | return ret; | 1108 | return ret; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | /* Take action when hardware reception checksum errors are detected. */ | ||
1112 | #ifdef CONFIG_BUG | ||
1113 | void netdev_rx_csum_fault(struct net_device *dev) | ||
1114 | { | ||
1115 | if (net_ratelimit()) { | ||
1116 | printk(KERN_ERR "%s: hw csum failure.\n", dev->name); | ||
1117 | dump_stack(); | ||
1118 | } | ||
1119 | } | ||
1120 | EXPORT_SYMBOL(netdev_rx_csum_fault); | ||
1121 | #endif | ||
1122 | |||
1111 | #ifdef CONFIG_HIGHMEM | 1123 | #ifdef CONFIG_HIGHMEM |
1112 | /* Actually, we should eliminate this check as soon as we know, that: | 1124 | /* Actually, we should eliminate this check as soon as we know, that: |
1113 | * 1. IOMMU is present and allows to map all the memory. | 1125 | * 1. IOMMU is present and allows to map all the memory. |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 802fe11efad0..49424a42a2c0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -101,16 +101,20 @@ void netpoll_queue(struct sk_buff *skb) | |||
101 | static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, | 101 | static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, |
102 | unsigned short ulen, u32 saddr, u32 daddr) | 102 | unsigned short ulen, u32 saddr, u32 daddr) |
103 | { | 103 | { |
104 | if (uh->check == 0) | 104 | unsigned int psum; |
105 | |||
106 | if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
105 | return 0; | 107 | return 0; |
106 | 108 | ||
107 | if (skb->ip_summed == CHECKSUM_HW) | 109 | psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); |
108 | return csum_tcpudp_magic( | 110 | |
109 | saddr, daddr, ulen, IPPROTO_UDP, skb->csum); | 111 | if (skb->ip_summed == CHECKSUM_HW && |
112 | !(u16)csum_fold(csum_add(psum, skb->csum))) | ||
113 | return 0; | ||
110 | 114 | ||
111 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | 115 | skb->csum = psum; |
112 | 116 | ||
113 | return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); | 117 | return __skb_checksum_complete(skb); |
114 | } | 118 | } |
115 | 119 | ||
116 | /* | 120 | /* |
@@ -489,7 +493,7 @@ int __netpoll_rx(struct sk_buff *skb) | |||
489 | 493 | ||
490 | if (ulen != len) | 494 | if (ulen != len) |
491 | goto out; | 495 | goto out; |
492 | if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0) | 496 | if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) |
493 | goto out; | 497 | goto out; |
494 | if (np->local_ip && np->local_ip != ntohl(iph->daddr)) | 498 | if (np->local_ip && np->local_ip != ntohl(iph->daddr)) |
495 | goto out; | 499 | goto out; |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3f25cadccddd..f89e55f814d9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -1664,17 +1664,15 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1664 | goto out; | 1664 | goto out; |
1665 | } | 1665 | } |
1666 | 1666 | ||
1667 | rv = dn_check_state(sk, NULL, 0, &timeo, flags); | ||
1668 | if (rv) | ||
1669 | goto out; | ||
1670 | |||
1671 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | 1667 | if (sk->sk_shutdown & RCV_SHUTDOWN) { |
1672 | if (!(flags & MSG_NOSIGNAL)) | 1668 | rv = 0; |
1673 | send_sig(SIGPIPE, current, 0); | ||
1674 | rv = -EPIPE; | ||
1675 | goto out; | 1669 | goto out; |
1676 | } | 1670 | } |
1677 | 1671 | ||
1672 | rv = dn_check_state(sk, NULL, 0, &timeo, flags); | ||
1673 | if (rv) | ||
1674 | goto out; | ||
1675 | |||
1678 | if (flags & ~(MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) { | 1676 | if (flags & ~(MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) { |
1679 | rv = -EOPNOTSUPP; | 1677 | rv = -EOPNOTSUPP; |
1680 | goto out; | 1678 | goto out; |
@@ -1928,6 +1926,8 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
1928 | 1926 | ||
1929 | if (sk->sk_shutdown & SEND_SHUTDOWN) { | 1927 | if (sk->sk_shutdown & SEND_SHUTDOWN) { |
1930 | err = -EPIPE; | 1928 | err = -EPIPE; |
1929 | if (!(flags & MSG_NOSIGNAL)) | ||
1930 | send_sig(SIGPIPE, current, 0); | ||
1931 | goto out_err; | 1931 | goto out_err; |
1932 | } | 1932 | } |
1933 | 1933 | ||
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 175e093ec564..e3eceecd0496 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb) | |||
934 | case CHECKSUM_HW: | 934 | case CHECKSUM_HW: |
935 | if (!(u16)csum_fold(skb->csum)) | 935 | if (!(u16)csum_fold(skb->csum)) |
936 | break; | 936 | break; |
937 | LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n"); | 937 | /* fall through */ |
938 | case CHECKSUM_NONE: | 938 | case CHECKSUM_NONE: |
939 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) | 939 | skb->csum = 0; |
940 | if (__skb_checksum_complete(skb)) | ||
940 | goto error; | 941 | goto error; |
941 | default:; | ||
942 | } | 942 | } |
943 | 943 | ||
944 | if (!pskb_pull(skb, sizeof(struct icmphdr))) | 944 | if (!pskb_pull(skb, sizeof(struct icmphdr))) |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c6247fc84060..c04607b49212 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb) | |||
872 | return 0; | 872 | return 0; |
873 | } | 873 | } |
874 | 874 | ||
875 | if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || | 875 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) |
876 | (u16)csum_fold(skb_checksum(skb, 0, len, 0))) { | 876 | goto drop; |
877 | in_dev_put(in_dev); | 877 | |
878 | kfree_skb(skb); | 878 | switch (skb->ip_summed) { |
879 | return 0; | 879 | case CHECKSUM_HW: |
880 | if (!(u16)csum_fold(skb->csum)) | ||
881 | break; | ||
882 | /* fall through */ | ||
883 | case CHECKSUM_NONE: | ||
884 | skb->csum = 0; | ||
885 | if (__skb_checksum_complete(skb)) | ||
886 | goto drop; | ||
880 | } | 887 | } |
881 | 888 | ||
882 | ih = skb->h.igmph; | 889 | ih = skb->h.igmph; |
@@ -906,6 +913,8 @@ int igmp_rcv(struct sk_buff *skb) | |||
906 | default: | 913 | default: |
907 | NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); | 914 | NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); |
908 | } | 915 | } |
916 | |||
917 | drop: | ||
909 | in_dev_put(in_dev); | 918 | in_dev_put(in_dev); |
910 | kfree_skb(skb); | 919 | kfree_skb(skb); |
911 | return 0; | 920 | return 0; |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 896ce3f8f53a..4e9c74b54b15 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
577 | goto drop_nolock; | 577 | goto drop_nolock; |
578 | 578 | ||
579 | if (flags&GRE_CSUM) { | 579 | if (flags&GRE_CSUM) { |
580 | if (skb->ip_summed == CHECKSUM_HW) { | 580 | switch (skb->ip_summed) { |
581 | case CHECKSUM_HW: | ||
581 | csum = (u16)csum_fold(skb->csum); | 582 | csum = (u16)csum_fold(skb->csum); |
582 | if (csum) | 583 | if (!csum) |
583 | skb->ip_summed = CHECKSUM_NONE; | 584 | break; |
584 | } | 585 | /* fall through */ |
585 | if (skb->ip_summed == CHECKSUM_NONE) { | 586 | case CHECKSUM_NONE: |
586 | skb->csum = skb_checksum(skb, 0, skb->len, 0); | 587 | skb->csum = 0; |
588 | csum = __skb_checksum_complete(skb); | ||
587 | skb->ip_summed = CHECKSUM_HW; | 589 | skb->ip_summed = CHECKSUM_HW; |
588 | csum = (u16)csum_fold(skb->csum); | ||
589 | } | 590 | } |
590 | offset += 4; | 591 | offset += 4; |
591 | } | 592 | } |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 5198f3a1e2cd..e4d6b268e8c4 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
14 | #include <linux/icmp.h> | 14 | #include <linux/icmp.h> |
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <linux/skbuff.h> | ||
16 | #include <net/ip.h> | 17 | #include <net/ip.h> |
17 | #include <net/checksum.h> | 18 | #include <net/checksum.h> |
18 | #include <linux/netfilter.h> | 19 | #include <linux/netfilter.h> |
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
230 | case CHECKSUM_HW: | 231 | case CHECKSUM_HW: |
231 | if (!(u16)csum_fold(skb->csum)) | 232 | if (!(u16)csum_fold(skb->csum)) |
232 | break; | 233 | break; |
233 | if (LOG_INVALID(IPPROTO_ICMP)) | 234 | /* fall through */ |
234 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
235 | "ip_ct_icmp: bad HW ICMP checksum "); | ||
236 | return -NF_ACCEPT; | ||
237 | case CHECKSUM_NONE: | 235 | case CHECKSUM_NONE: |
238 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { | 236 | skb->csum = 0; |
237 | if (__skb_checksum_complete(skb)) { | ||
239 | if (LOG_INVALID(IPPROTO_ICMP)) | 238 | if (LOG_INVALID(IPPROTO_ICMP)) |
240 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | 239 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
241 | "ip_ct_icmp: bad ICMP checksum "); | 240 | "ip_ct_icmp: bad ICMP checksum "); |
242 | return -NF_ACCEPT; | 241 | return -NF_ACCEPT; |
243 | } | 242 | } |
244 | default: | ||
245 | break; | ||
246 | } | 243 | } |
247 | 244 | ||
248 | checksum_skipped: | 245 | checksum_skipped: |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 652685623519..01444a02b48b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = { | |||
645 | .proc_handler = &proc_tcp_congestion_control, | 645 | .proc_handler = &proc_tcp_congestion_control, |
646 | .strategy = &sysctl_tcp_congestion_control, | 646 | .strategy = &sysctl_tcp_congestion_control, |
647 | }, | 647 | }, |
648 | { | ||
649 | .ctl_name = NET_TCP_ABC, | ||
650 | .procname = "tcp_abc", | ||
651 | .data = &sysctl_tcp_abc, | ||
652 | .maxlen = sizeof(int), | ||
653 | .mode = 0644, | ||
654 | .proc_handler = &proc_dointvec, | ||
655 | }, | ||
648 | 656 | ||
649 | { .ctl_name = 0 } | 657 | { .ctl_name = 0 } |
650 | }; | 658 | }; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 72b7c22e1ea5..9ac7a4f46bd8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1640 | } else if (tcp_need_reset(old_state) || | 1640 | } else if (tcp_need_reset(old_state) || |
1641 | (tp->snd_nxt != tp->write_seq && | 1641 | (tp->snd_nxt != tp->write_seq && |
1642 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { | 1642 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { |
1643 | /* The last check adjusts for discrepance of Linux wrt. RFC | 1643 | /* The last check adjusts for discrepancy of Linux wrt. RFC |
1644 | * states | 1644 | * states |
1645 | */ | 1645 | */ |
1646 | tcp_send_active_reset(sk, gfp_any()); | 1646 | tcp_send_active_reset(sk, gfp_any()); |
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1669 | tp->packets_out = 0; | 1669 | tp->packets_out = 0; |
1670 | tp->snd_ssthresh = 0x7fffffff; | 1670 | tp->snd_ssthresh = 0x7fffffff; |
1671 | tp->snd_cwnd_cnt = 0; | 1671 | tp->snd_cwnd_cnt = 0; |
1672 | tp->bytes_acked = 0; | ||
1672 | tcp_set_ca_state(sk, TCP_CA_Open); | 1673 | tcp_set_ca_state(sk, TCP_CA_Open); |
1673 | tcp_clear_retrans(tp); | 1674 | tcp_clear_retrans(tp); |
1674 | inet_csk_delack_init(sk); | 1675 | inet_csk_delack_init(sk); |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ae35e0609047..1d0cd86621b1 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, | |||
217 | 217 | ||
218 | bictcp_low_utilization(sk, data_acked); | 218 | bictcp_low_utilization(sk, data_acked); |
219 | 219 | ||
220 | if (in_flight < tp->snd_cwnd) | 220 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
221 | return; | 221 | return; |
222 | 222 | ||
223 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 223 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
224 | /* In "safe" area, increase. */ | 224 | tcp_slow_start(tp); |
225 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 225 | else { |
226 | tp->snd_cwnd++; | ||
227 | } else { | ||
228 | bictcp_update(ca, tp->snd_cwnd); | 226 | bictcp_update(ca, tp->snd_cwnd); |
229 | 227 | ||
230 | /* In dangerous area, increase slowly. | 228 | /* In dangerous area, increase slowly. |
231 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | 229 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd |
232 | */ | 230 | */ |
233 | if (tp->snd_cwnd_cnt >= ca->cnt) { | 231 | if (tp->snd_cwnd_cnt >= ca->cnt) { |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index bbf2d6624e89..c7cc62c8dc12 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, | |||
186 | { | 186 | { |
187 | struct tcp_sock *tp = tcp_sk(sk); | 187 | struct tcp_sock *tp = tcp_sk(sk); |
188 | 188 | ||
189 | if (in_flight < tp->snd_cwnd) | 189 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
190 | return; | 190 | return; |
191 | 191 | ||
192 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 192 | /* In "safe" area, increase. */ |
193 | /* In "safe" area, increase. */ | 193 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
194 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 194 | tcp_slow_start(tp); |
195 | tp->snd_cwnd++; | 195 | |
196 | } else { | 196 | /* In dangerous area, increase slowly. */ |
197 | /* In dangerous area, increase slowly. | 197 | else if (sysctl_tcp_abc) { |
198 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | 198 | /* RFC3465: Apppriate Byte Count |
199 | */ | 199 | * increase once for each full cwnd acked |
200 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 200 | */ |
201 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 201 | if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { |
202 | tp->snd_cwnd++; | 202 | tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; |
203 | tp->snd_cwnd_cnt = 0; | 203 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
204 | } else | 204 | tp->snd_cwnd++; |
205 | tp->snd_cwnd_cnt++; | 205 | } |
206 | } | 206 | } else { |
207 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ | ||
208 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
209 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
210 | tp->snd_cwnd++; | ||
211 | tp->snd_cwnd_cnt = 0; | ||
212 | } else | ||
213 | tp->snd_cwnd_cnt++; | ||
214 | } | ||
207 | } | 215 | } |
208 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 216 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
209 | 217 | ||
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 6acc04bde080..82b3c189bd7d 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk) | |||
111 | } | 111 | } |
112 | 112 | ||
113 | static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, | 113 | static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, |
114 | u32 in_flight, int good) | 114 | u32 in_flight, u32 pkts_acked) |
115 | { | 115 | { |
116 | struct tcp_sock *tp = tcp_sk(sk); | 116 | struct tcp_sock *tp = tcp_sk(sk); |
117 | struct hstcp *ca = inet_csk_ca(sk); | 117 | struct hstcp *ca = inet_csk_ca(sk); |
118 | 118 | ||
119 | if (in_flight < tp->snd_cwnd) | 119 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
120 | return; | 120 | return; |
121 | 121 | ||
122 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 122 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
123 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 123 | tcp_slow_start(tp); |
124 | tp->snd_cwnd++; | 124 | else { |
125 | } else { | ||
126 | /* Update AIMD parameters */ | 125 | /* Update AIMD parameters */ |
127 | if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { | 126 | if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { |
128 | while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && | 127 | while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index e47b37984e95..3284cfb993e6 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
207 | struct tcp_sock *tp = tcp_sk(sk); | 207 | struct tcp_sock *tp = tcp_sk(sk); |
208 | struct htcp *ca = inet_csk_ca(sk); | 208 | struct htcp *ca = inet_csk_ca(sk); |
209 | 209 | ||
210 | if (in_flight < tp->snd_cwnd) | 210 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
211 | return; | 211 | return; |
212 | 212 | ||
213 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 213 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
214 | /* In "safe" area, increase. */ | 214 | tcp_slow_start(tp); |
215 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 215 | else { |
216 | tp->snd_cwnd++; | 216 | |
217 | } else { | ||
218 | measure_rtt(sk); | 217 | measure_rtt(sk); |
219 | 218 | ||
220 | /* keep track of number of round-trip times since last backoff event */ | 219 | /* keep track of number of round-trip times since last backoff event */ |
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
224 | htcp_alpha_update(ca); | 223 | htcp_alpha_update(ca); |
225 | } | 224 | } |
226 | 225 | ||
227 | /* In dangerous area, increase slowly. | 226 | /* In dangerous area, increase slowly. |
228 | * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd | 227 | * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd |
229 | */ | 228 | */ |
230 | if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) { | 229 | if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) { |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 77add63623df..40dbb3877510 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
100 | ca->minrtt = tp->srtt; | 100 | ca->minrtt = tp->srtt; |
101 | } | 101 | } |
102 | 102 | ||
103 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
104 | return; | ||
105 | |||
103 | if (!ca->hybla_en) | 106 | if (!ca->hybla_en) |
104 | return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); | 107 | return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); |
105 | 108 | ||
106 | if (in_flight < tp->snd_cwnd) | ||
107 | return; | ||
108 | |||
109 | if (ca->rho == 0) | 109 | if (ca->rho == 0) |
110 | hybla_recalc_param(sk); | 110 | hybla_recalc_param(sk); |
111 | 111 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e98b57578dc..40a26b7157b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -42,7 +42,7 @@ | |||
42 | * Andi Kleen : Moved open_request checking here | 42 | * Andi Kleen : Moved open_request checking here |
43 | * and process RSTs for open_requests. | 43 | * and process RSTs for open_requests. |
44 | * Andi Kleen : Better prune_queue, and other fixes. | 44 | * Andi Kleen : Better prune_queue, and other fixes. |
45 | * Andrey Savochkin: Fix RTT measurements in the presnce of | 45 | * Andrey Savochkin: Fix RTT measurements in the presence of |
46 | * timestamps. | 46 | * timestamps. |
47 | * Andrey Savochkin: Check sequence numbers correctly when | 47 | * Andrey Savochkin: Check sequence numbers correctly when |
48 | * removing SACKs due to in sequence incoming | 48 | * removing SACKs due to in sequence incoming |
@@ -89,6 +89,7 @@ int sysctl_tcp_frto; | |||
89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save; |
90 | 90 | ||
91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf = 1; |
92 | int sysctl_tcp_abc = 1; | ||
92 | 93 | ||
93 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
94 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
223 | * of receiver window. Check #2. | 224 | * of receiver window. Check #2. |
224 | * | 225 | * |
225 | * The scheme does not work when sender sends good segments opening | 226 | * The scheme does not work when sender sends good segments opening |
226 | * window and then starts to feed us spagetti. But it should work | 227 | * window and then starts to feed us spaghetti. But it should work |
227 | * in common situations. Otherwise, we have to rely on queue collapsing. | 228 | * in common situations. Otherwise, we have to rely on queue collapsing. |
228 | */ | 229 | */ |
229 | 230 | ||
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
233 | { | 234 | { |
234 | /* Optimize this! */ | 235 | /* Optimize this! */ |
235 | int truesize = tcp_win_from_space(skb->truesize)/2; | 236 | int truesize = tcp_win_from_space(skb->truesize)/2; |
236 | int window = tcp_full_space(sk)/2; | 237 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; |
237 | 238 | ||
238 | while (tp->rcv_ssthresh <= window) { | 239 | while (tp->rcv_ssthresh <= window) { |
239 | if (truesize <= skb->len) | 240 | if (truesize <= skb->len) |
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
277 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | 278 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); |
278 | 279 | ||
279 | /* Try to select rcvbuf so that 4 mss-sized segments | 280 | /* Try to select rcvbuf so that 4 mss-sized segments |
280 | * will fit to window and correspoding skbs will fit to our rcvbuf. | 281 | * will fit to window and corresponding skbs will fit to our rcvbuf. |
281 | * (was 3; 4 is minimum to allow fast retransmit to work.) | 282 | * (was 3; 4 is minimum to allow fast retransmit to work.) |
282 | */ | 283 | */ |
283 | while (tcp_win_from_space(rcvmem) < tp->advmss) | 284 | while (tcp_win_from_space(rcvmem) < tp->advmss) |
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
286 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); | 287 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); |
287 | } | 288 | } |
288 | 289 | ||
289 | /* 4. Try to fixup all. It is made iimediately after connection enters | 290 | /* 4. Try to fixup all. It is made immediately after connection enters |
290 | * established state. | 291 | * established state. |
291 | */ | 292 | */ |
292 | static void tcp_init_buffer_space(struct sock *sk) | 293 | static void tcp_init_buffer_space(struct sock *sk) |
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
326 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 327 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) |
327 | { | 328 | { |
328 | struct inet_connection_sock *icsk = inet_csk(sk); | 329 | struct inet_connection_sock *icsk = inet_csk(sk); |
329 | struct sk_buff *skb; | ||
330 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; | ||
331 | int ofo_win = 0; | ||
332 | 330 | ||
333 | icsk->icsk_ack.quick = 0; | 331 | icsk->icsk_ack.quick = 0; |
334 | 332 | ||
335 | skb_queue_walk(&tp->out_of_order_queue, skb) { | 333 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
336 | ofo_win += skb->len; | 334 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
337 | } | 335 | !tcp_memory_pressure && |
338 | 336 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | |
339 | /* If overcommit is due to out of order segments, | 337 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
340 | * do not clamp window. Try to expand rcvbuf instead. | 338 | sysctl_tcp_rmem[2]); |
341 | */ | ||
342 | if (ofo_win) { | ||
343 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | ||
344 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | ||
345 | !tcp_memory_pressure && | ||
346 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) | ||
347 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | ||
348 | sysctl_tcp_rmem[2]); | ||
349 | } | 339 | } |
350 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { | 340 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) |
351 | app_win += ofo_win; | ||
352 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) | ||
353 | app_win >>= 1; | ||
354 | if (app_win > icsk->icsk_ack.rcv_mss) | ||
355 | app_win -= icsk->icsk_ack.rcv_mss; | ||
356 | app_win = max(app_win, 2U*tp->advmss); | ||
357 | |||
358 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); |
359 | } | ||
360 | } | 342 | } |
361 | 343 | ||
362 | /* Receiver "autotuning" code. | 344 | /* Receiver "autotuning" code. |
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
385 | * are stalled on filesystem I/O. | 367 | * are stalled on filesystem I/O. |
386 | * | 368 | * |
387 | * Also, since we are only going for a minimum in the | 369 | * Also, since we are only going for a minimum in the |
388 | * non-timestamp case, we do not smoothe things out | 370 | * non-timestamp case, we do not smoother things out |
389 | * else with timestamps disabled convergance takes too | 371 | * else with timestamps disabled convergence takes too |
390 | * long. | 372 | * long. |
391 | */ | 373 | */ |
392 | if (!win_dep) { | 374 | if (!win_dep) { |
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
395 | } else if (m < new_sample) | 377 | } else if (m < new_sample) |
396 | new_sample = m << 3; | 378 | new_sample = m << 3; |
397 | } else { | 379 | } else { |
398 | /* No previous mesaure. */ | 380 | /* No previous measure. */ |
399 | new_sample = m << 3; | 381 | new_sample = m << 3; |
400 | } | 382 | } |
401 | 383 | ||
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
524 | if (icsk->icsk_ack.ato > icsk->icsk_rto) | 506 | if (icsk->icsk_ack.ato > icsk->icsk_rto) |
525 | icsk->icsk_ack.ato = icsk->icsk_rto; | 507 | icsk->icsk_ack.ato = icsk->icsk_rto; |
526 | } else if (m > icsk->icsk_rto) { | 508 | } else if (m > icsk->icsk_rto) { |
527 | /* Too long gap. Apparently sender falled to | 509 | /* Too long gap. Apparently sender failed to |
528 | * restart window, so that we send ACKs quickly. | 510 | * restart window, so that we send ACKs quickly. |
529 | */ | 511 | */ |
530 | tcp_incr_quickack(sk); | 512 | tcp_incr_quickack(sk); |
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
548 | * To save cycles in the RFC 1323 implementation it was better to break | 530 | * To save cycles in the RFC 1323 implementation it was better to break |
549 | * it up into three procedures. -- erics | 531 | * it up into three procedures. -- erics |
550 | */ | 532 | */ |
551 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | 533 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) |
552 | { | 534 | { |
553 | struct tcp_sock *tp = tcp_sk(sk); | 535 | struct tcp_sock *tp = tcp_sk(sk); |
554 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
555 | long m = mrtt; /* RTT */ | 536 | long m = mrtt; /* RTT */ |
556 | 537 | ||
557 | /* The following amusing code comes from Jacobson's | 538 | /* The following amusing code comes from Jacobson's |
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
565 | * | 546 | * |
566 | * Funny. This algorithm seems to be very broken. | 547 | * Funny. This algorithm seems to be very broken. |
567 | * These formulae increase RTO, when it should be decreased, increase | 548 | * These formulae increase RTO, when it should be decreased, increase |
568 | * too slowly, when it should be incresed fastly, decrease too fastly | 549 | * too slowly, when it should be increased fastly, decrease too fastly |
569 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely | 550 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely |
570 | * does not matter how to _calculate_ it. Seems, it was trap | 551 | * does not matter how to _calculate_ it. Seems, it was trap |
571 | * that VJ failed to avoid. 8) | 552 | * that VJ failed to avoid. 8) |
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
610 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); | 591 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); |
611 | tp->rtt_seq = tp->snd_nxt; | 592 | tp->rtt_seq = tp->snd_nxt; |
612 | } | 593 | } |
613 | |||
614 | if (icsk->icsk_ca_ops->rtt_sample) | ||
615 | icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); | ||
616 | } | 594 | } |
617 | 595 | ||
618 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 596 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk) | |||
629 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ | 607 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ |
630 | * to do with delayed acks, because at cwnd>2 true delack timeout | 608 | * to do with delayed acks, because at cwnd>2 true delack timeout |
631 | * is invisible. Actually, Linux-2.4 also generates erratic | 609 | * is invisible. Actually, Linux-2.4 also generates erratic |
632 | * ACKs in some curcumstances. | 610 | * ACKs in some circumstances. |
633 | */ | 611 | */ |
634 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 612 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; |
635 | 613 | ||
636 | /* 2. Fixups made earlier cannot be right. | 614 | /* 2. Fixups made earlier cannot be right. |
637 | * If we do not estimate RTO correctly without them, | 615 | * If we do not estimate RTO correctly without them, |
638 | * all the algo is pure shit and should be replaced | 616 | * all the algo is pure shit and should be replaced |
639 | * with correct one. It is exaclty, which we pretend to do. | 617 | * with correct one. It is exactly, which we pretend to do. |
640 | */ | 618 | */ |
641 | } | 619 | } |
642 | 620 | ||
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
794 | * to make it more realistic. | 772 | * to make it more realistic. |
795 | * | 773 | * |
796 | * A bit of theory. RTT is time passed after "normal" sized packet | 774 | * A bit of theory. RTT is time passed after "normal" sized packet |
797 | * is sent until it is ACKed. In normal curcumstances sending small | 775 | * is sent until it is ACKed. In normal circumstances sending small |
798 | * packets force peer to delay ACKs and calculation is correct too. | 776 | * packets force peer to delay ACKs and calculation is correct too. |
799 | * The algorithm is adaptive and, provided we follow specs, it | 777 | * The algorithm is adaptive and, provided we follow specs, it |
800 | * NEVER underestimate RTT. BUT! If peer tries to make some clever | 778 | * NEVER underestimate RTT. BUT! If peer tries to make some clever |
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
919 | int prior_fackets; | 897 | int prior_fackets; |
920 | u32 lost_retrans = 0; | 898 | u32 lost_retrans = 0; |
921 | int flag = 0; | 899 | int flag = 0; |
900 | int dup_sack = 0; | ||
922 | int i; | 901 | int i; |
923 | 902 | ||
924 | if (!tp->sacked_out) | 903 | if (!tp->sacked_out) |
925 | tp->fackets_out = 0; | 904 | tp->fackets_out = 0; |
926 | prior_fackets = tp->fackets_out; | 905 | prior_fackets = tp->fackets_out; |
927 | 906 | ||
928 | for (i=0; i<num_sacks; i++, sp++) { | 907 | /* SACK fastpath: |
929 | struct sk_buff *skb; | 908 | * if the only SACK change is the increase of the end_seq of |
930 | __u32 start_seq = ntohl(sp->start_seq); | 909 | * the first block then only apply that SACK block |
931 | __u32 end_seq = ntohl(sp->end_seq); | 910 | * and use retrans queue hinting otherwise slowpath */ |
932 | int fack_count = 0; | 911 | flag = 1; |
933 | int dup_sack = 0; | 912 | for (i = 0; i< num_sacks; i++) { |
913 | __u32 start_seq = ntohl(sp[i].start_seq); | ||
914 | __u32 end_seq = ntohl(sp[i].end_seq); | ||
915 | |||
916 | if (i == 0){ | ||
917 | if (tp->recv_sack_cache[i].start_seq != start_seq) | ||
918 | flag = 0; | ||
919 | } else { | ||
920 | if ((tp->recv_sack_cache[i].start_seq != start_seq) || | ||
921 | (tp->recv_sack_cache[i].end_seq != end_seq)) | ||
922 | flag = 0; | ||
923 | } | ||
924 | tp->recv_sack_cache[i].start_seq = start_seq; | ||
925 | tp->recv_sack_cache[i].end_seq = end_seq; | ||
934 | 926 | ||
935 | /* Check for D-SACK. */ | 927 | /* Check for D-SACK. */ |
936 | if (i == 0) { | 928 | if (i == 0) { |
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
962 | if (before(ack, prior_snd_una - tp->max_window)) | 954 | if (before(ack, prior_snd_una - tp->max_window)) |
963 | return 0; | 955 | return 0; |
964 | } | 956 | } |
957 | } | ||
958 | |||
959 | if (flag) | ||
960 | num_sacks = 1; | ||
961 | else { | ||
962 | int j; | ||
963 | tp->fastpath_skb_hint = NULL; | ||
964 | |||
965 | /* order SACK blocks to allow in order walk of the retrans queue */ | ||
966 | for (i = num_sacks-1; i > 0; i--) { | ||
967 | for (j = 0; j < i; j++){ | ||
968 | if (after(ntohl(sp[j].start_seq), | ||
969 | ntohl(sp[j+1].start_seq))){ | ||
970 | sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); | ||
971 | sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); | ||
972 | sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); | ||
973 | sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); | ||
974 | } | ||
975 | |||
976 | } | ||
977 | } | ||
978 | } | ||
979 | |||
980 | /* clear flag as used for different purpose in following code */ | ||
981 | flag = 0; | ||
982 | |||
983 | for (i=0; i<num_sacks; i++, sp++) { | ||
984 | struct sk_buff *skb; | ||
985 | __u32 start_seq = ntohl(sp->start_seq); | ||
986 | __u32 end_seq = ntohl(sp->end_seq); | ||
987 | int fack_count; | ||
988 | |||
989 | /* Use SACK fastpath hint if valid */ | ||
990 | if (tp->fastpath_skb_hint) { | ||
991 | skb = tp->fastpath_skb_hint; | ||
992 | fack_count = tp->fastpath_cnt_hint; | ||
993 | } else { | ||
994 | skb = sk->sk_write_queue.next; | ||
995 | fack_count = 0; | ||
996 | } | ||
965 | 997 | ||
966 | /* Event "B" in the comment above. */ | 998 | /* Event "B" in the comment above. */ |
967 | if (after(end_seq, tp->high_seq)) | 999 | if (after(end_seq, tp->high_seq)) |
968 | flag |= FLAG_DATA_LOST; | 1000 | flag |= FLAG_DATA_LOST; |
969 | 1001 | ||
970 | sk_stream_for_retrans_queue(skb, sk) { | 1002 | sk_stream_for_retrans_queue_from(skb, sk) { |
971 | int in_sack, pcount; | 1003 | int in_sack, pcount; |
972 | u8 sacked; | 1004 | u8 sacked; |
973 | 1005 | ||
1006 | tp->fastpath_skb_hint = skb; | ||
1007 | tp->fastpath_cnt_hint = fack_count; | ||
1008 | |||
974 | /* The retransmission queue is always in order, so | 1009 | /* The retransmission queue is always in order, so |
975 | * we can short-circuit the walk early. | 1010 | * we can short-circuit the walk early. |
976 | */ | 1011 | */ |
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1045 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1080 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1046 | tp->lost_out -= tcp_skb_pcount(skb); | 1081 | tp->lost_out -= tcp_skb_pcount(skb); |
1047 | tp->retrans_out -= tcp_skb_pcount(skb); | 1082 | tp->retrans_out -= tcp_skb_pcount(skb); |
1083 | |||
1084 | /* clear lost hint */ | ||
1085 | tp->retransmit_skb_hint = NULL; | ||
1048 | } | 1086 | } |
1049 | } else { | 1087 | } else { |
1050 | /* New sack for not retransmitted frame, | 1088 | /* New sack for not retransmitted frame, |
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1057 | if (sacked & TCPCB_LOST) { | 1095 | if (sacked & TCPCB_LOST) { |
1058 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1096 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1059 | tp->lost_out -= tcp_skb_pcount(skb); | 1097 | tp->lost_out -= tcp_skb_pcount(skb); |
1098 | |||
1099 | /* clear lost hint */ | ||
1100 | tp->retransmit_skb_hint = NULL; | ||
1060 | } | 1101 | } |
1061 | } | 1102 | } |
1062 | 1103 | ||
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1080 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { | 1121 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { |
1081 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1122 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1082 | tp->retrans_out -= tcp_skb_pcount(skb); | 1123 | tp->retrans_out -= tcp_skb_pcount(skb); |
1124 | tp->retransmit_skb_hint = NULL; | ||
1083 | } | 1125 | } |
1084 | } | 1126 | } |
1085 | } | 1127 | } |
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1107 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1149 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1108 | tp->retrans_out -= tcp_skb_pcount(skb); | 1150 | tp->retrans_out -= tcp_skb_pcount(skb); |
1109 | 1151 | ||
1152 | /* clear lost hint */ | ||
1153 | tp->retransmit_skb_hint = NULL; | ||
1154 | |||
1110 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { | 1155 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { |
1111 | tp->lost_out += tcp_skb_pcount(skb); | 1156 | tp->lost_out += tcp_skb_pcount(skb); |
1112 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1157 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1214 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1259 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1215 | tp->high_seq = tp->frto_highmark; | 1260 | tp->high_seq = tp->frto_highmark; |
1216 | TCP_ECN_queue_cwr(tp); | 1261 | TCP_ECN_queue_cwr(tp); |
1262 | |||
1263 | clear_all_retrans_hints(tp); | ||
1217 | } | 1264 | } |
1218 | 1265 | ||
1219 | void tcp_clear_retrans(struct tcp_sock *tp) | 1266 | void tcp_clear_retrans(struct tcp_sock *tp) |
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1251 | tp->snd_cwnd_cnt = 0; | 1298 | tp->snd_cwnd_cnt = 0; |
1252 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1299 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1253 | 1300 | ||
1301 | tp->bytes_acked = 0; | ||
1254 | tcp_clear_retrans(tp); | 1302 | tcp_clear_retrans(tp); |
1255 | 1303 | ||
1256 | /* Push undo marker, if it was plain RTO and nothing | 1304 | /* Push undo marker, if it was plain RTO and nothing |
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1279 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1327 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1280 | tp->high_seq = tp->snd_nxt; | 1328 | tp->high_seq = tp->snd_nxt; |
1281 | TCP_ECN_queue_cwr(tp); | 1329 | TCP_ECN_queue_cwr(tp); |
1330 | |||
1331 | clear_all_retrans_hints(tp); | ||
1282 | } | 1332 | } |
1283 | 1333 | ||
1284 | static int tcp_check_sack_reneging(struct sock *sk) | 1334 | static int tcp_check_sack_reneging(struct sock *sk) |
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1503 | int packets, u32 high_seq) | 1553 | int packets, u32 high_seq) |
1504 | { | 1554 | { |
1505 | struct sk_buff *skb; | 1555 | struct sk_buff *skb; |
1506 | int cnt = packets; | 1556 | int cnt; |
1507 | 1557 | ||
1508 | BUG_TRAP(cnt <= tp->packets_out); | 1558 | BUG_TRAP(packets <= tp->packets_out); |
1559 | if (tp->lost_skb_hint) { | ||
1560 | skb = tp->lost_skb_hint; | ||
1561 | cnt = tp->lost_cnt_hint; | ||
1562 | } else { | ||
1563 | skb = sk->sk_write_queue.next; | ||
1564 | cnt = 0; | ||
1565 | } | ||
1509 | 1566 | ||
1510 | sk_stream_for_retrans_queue(skb, sk) { | 1567 | sk_stream_for_retrans_queue_from(skb, sk) { |
1511 | cnt -= tcp_skb_pcount(skb); | 1568 | /* TODO: do this better */ |
1512 | if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | 1569 | /* this is not the most efficient way to do this... */ |
1570 | tp->lost_skb_hint = skb; | ||
1571 | tp->lost_cnt_hint = cnt; | ||
1572 | cnt += tcp_skb_pcount(skb); | ||
1573 | if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | ||
1513 | break; | 1574 | break; |
1514 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1575 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
1515 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1576 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1516 | tp->lost_out += tcp_skb_pcount(skb); | 1577 | tp->lost_out += tcp_skb_pcount(skb); |
1578 | |||
1579 | /* clear xmit_retransmit_queue hints | ||
1580 | * if this is beyond hint */ | ||
1581 | if(tp->retransmit_skb_hint != NULL && | ||
1582 | before(TCP_SKB_CB(skb)->seq, | ||
1583 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | ||
1584 | |||
1585 | tp->retransmit_skb_hint = NULL; | ||
1586 | } | ||
1517 | } | 1587 | } |
1518 | } | 1588 | } |
1519 | tcp_sync_left_out(tp); | 1589 | tcp_sync_left_out(tp); |
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1540 | if (tcp_head_timedout(sk, tp)) { | 1610 | if (tcp_head_timedout(sk, tp)) { |
1541 | struct sk_buff *skb; | 1611 | struct sk_buff *skb; |
1542 | 1612 | ||
1543 | sk_stream_for_retrans_queue(skb, sk) { | 1613 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
1544 | if (tcp_skb_timedout(sk, skb) && | 1614 | : sk->sk_write_queue.next; |
1545 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1615 | |
1616 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
1617 | if (!tcp_skb_timedout(sk, skb)) | ||
1618 | break; | ||
1619 | |||
1620 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | ||
1546 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1621 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1547 | tp->lost_out += tcp_skb_pcount(skb); | 1622 | tp->lost_out += tcp_skb_pcount(skb); |
1623 | |||
1624 | /* clear xmit_retrans hint */ | ||
1625 | if (tp->retransmit_skb_hint && | ||
1626 | before(TCP_SKB_CB(skb)->seq, | ||
1627 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
1628 | |||
1629 | tp->retransmit_skb_hint = NULL; | ||
1548 | } | 1630 | } |
1549 | } | 1631 | } |
1632 | |||
1633 | tp->scoreboard_skb_hint = skb; | ||
1634 | |||
1550 | tcp_sync_left_out(tp); | 1635 | tcp_sync_left_out(tp); |
1551 | } | 1636 | } |
1552 | } | 1637 | } |
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
1626 | } | 1711 | } |
1627 | tcp_moderate_cwnd(tp); | 1712 | tcp_moderate_cwnd(tp); |
1628 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1713 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1714 | |||
1715 | /* There is something screwy going on with the retrans hints after | ||
1716 | an undo */ | ||
1717 | clear_all_retrans_hints(tp); | ||
1629 | } | 1718 | } |
1630 | 1719 | ||
1631 | static inline int tcp_may_undo(struct tcp_sock *tp) | 1720 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
1709 | sk_stream_for_retrans_queue(skb, sk) { | 1798 | sk_stream_for_retrans_queue(skb, sk) { |
1710 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1799 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1711 | } | 1800 | } |
1801 | |||
1802 | clear_all_retrans_hints(tp); | ||
1803 | |||
1712 | DBGUNDO(sk, tp, "partial loss"); | 1804 | DBGUNDO(sk, tp, "partial loss"); |
1713 | tp->lost_out = 0; | 1805 | tp->lost_out = 0; |
1714 | tp->left_out = tp->sacked_out; | 1806 | tp->left_out = tp->sacked_out; |
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1908 | TCP_ECN_queue_cwr(tp); | 2000 | TCP_ECN_queue_cwr(tp); |
1909 | } | 2001 | } |
1910 | 2002 | ||
2003 | tp->bytes_acked = 0; | ||
1911 | tp->snd_cwnd_cnt = 0; | 2004 | tp->snd_cwnd_cnt = 0; |
1912 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2005 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
1913 | } | 2006 | } |
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1919 | } | 2012 | } |
1920 | 2013 | ||
1921 | /* Read draft-ietf-tcplw-high-performance before mucking | 2014 | /* Read draft-ietf-tcplw-high-performance before mucking |
1922 | * with this code. (Superceeds RFC1323) | 2015 | * with this code. (Supersedes RFC1323) |
1923 | */ | 2016 | */ |
1924 | static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | 2017 | static void tcp_ack_saw_tstamp(struct sock *sk, int flag) |
1925 | { | 2018 | { |
1926 | /* RTTM Rule: A TSecr value received in a segment is used to | 2019 | /* RTTM Rule: A TSecr value received in a segment is used to |
1927 | * update the averaged RTT measurement only if the segment | 2020 | * update the averaged RTT measurement only if the segment |
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
1932 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> | 2025 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> |
1933 | * | 2026 | * |
1934 | * Changed: reset backoff as soon as we see the first valid sample. | 2027 | * Changed: reset backoff as soon as we see the first valid sample. |
1935 | * If we do not, we get strongly overstimated rto. With timestamps | 2028 | * If we do not, we get strongly overestimated rto. With timestamps |
1936 | * samples are accepted even from very old segments: f.e., when rtt=1 | 2029 | * samples are accepted even from very old segments: f.e., when rtt=1 |
1937 | * increases to 8, we retransmit 5 times and after 8 seconds delayed | 2030 | * increases to 8, we retransmit 5 times and after 8 seconds delayed |
1938 | * answer arrives rto becomes 120 seconds! If at least one of segments | 2031 | * answer arrives rto becomes 120 seconds! If at least one of segments |
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
1940 | */ | 2033 | */ |
1941 | struct tcp_sock *tp = tcp_sk(sk); | 2034 | struct tcp_sock *tp = tcp_sk(sk); |
1942 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 2035 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; |
1943 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2036 | tcp_rtt_estimator(sk, seq_rtt); |
1944 | tcp_set_rto(sk); | 2037 | tcp_set_rto(sk); |
1945 | inet_csk(sk)->icsk_backoff = 0; | 2038 | inet_csk(sk)->icsk_backoff = 0; |
1946 | tcp_bound_rto(sk); | 2039 | tcp_bound_rto(sk); |
1947 | } | 2040 | } |
1948 | 2041 | ||
1949 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) | 2042 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) |
1950 | { | 2043 | { |
1951 | /* We don't have a timestamp. Can only use | 2044 | /* We don't have a timestamp. Can only use |
1952 | * packets that are not retransmitted to determine | 2045 | * packets that are not retransmitted to determine |
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag | |||
1960 | if (flag & FLAG_RETRANS_DATA_ACKED) | 2053 | if (flag & FLAG_RETRANS_DATA_ACKED) |
1961 | return; | 2054 | return; |
1962 | 2055 | ||
1963 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2056 | tcp_rtt_estimator(sk, seq_rtt); |
1964 | tcp_set_rto(sk); | 2057 | tcp_set_rto(sk); |
1965 | inet_csk(sk)->icsk_backoff = 0; | 2058 | inet_csk(sk)->icsk_backoff = 0; |
1966 | tcp_bound_rto(sk); | 2059 | tcp_bound_rto(sk); |
1967 | } | 2060 | } |
1968 | 2061 | ||
1969 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 2062 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |
1970 | const s32 seq_rtt, u32 *usrtt) | 2063 | const s32 seq_rtt) |
1971 | { | 2064 | { |
1972 | const struct tcp_sock *tp = tcp_sk(sk); | 2065 | const struct tcp_sock *tp = tcp_sk(sk); |
1973 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ | 2066 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ |
1974 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 2067 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) |
1975 | tcp_ack_saw_tstamp(sk, usrtt, flag); | 2068 | tcp_ack_saw_tstamp(sk, flag); |
1976 | else if (seq_rtt >= 0) | 2069 | else if (seq_rtt >= 0) |
1977 | tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); | 2070 | tcp_ack_no_tstamp(sk, seq_rtt, flag); |
1978 | } | 2071 | } |
1979 | 2072 | ||
1980 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | 2073 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
2054 | return acked; | 2147 | return acked; |
2055 | } | 2148 | } |
2056 | 2149 | ||
2150 | static inline u32 tcp_usrtt(const struct sk_buff *skb) | ||
2151 | { | ||
2152 | struct timeval tv, now; | ||
2153 | |||
2154 | do_gettimeofday(&now); | ||
2155 | skb_get_timestamp(skb, &tv); | ||
2156 | return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); | ||
2157 | } | ||
2057 | 2158 | ||
2058 | /* Remove acknowledged frames from the retransmission queue. */ | 2159 | /* Remove acknowledged frames from the retransmission queue. */ |
2059 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt) | 2160 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) |
2060 | { | 2161 | { |
2061 | struct tcp_sock *tp = tcp_sk(sk); | 2162 | struct tcp_sock *tp = tcp_sk(sk); |
2163 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2062 | struct sk_buff *skb; | 2164 | struct sk_buff *skb; |
2063 | __u32 now = tcp_time_stamp; | 2165 | __u32 now = tcp_time_stamp; |
2064 | int acked = 0; | 2166 | int acked = 0; |
2065 | __s32 seq_rtt = -1; | 2167 | __s32 seq_rtt = -1; |
2066 | struct timeval usnow; | ||
2067 | u32 pkts_acked = 0; | 2168 | u32 pkts_acked = 0; |
2068 | 2169 | void (*rtt_sample)(struct sock *sk, u32 usrtt) | |
2069 | if (seq_usrtt) | 2170 | = icsk->icsk_ca_ops->rtt_sample; |
2070 | do_gettimeofday(&usnow); | ||
2071 | 2171 | ||
2072 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2172 | while ((skb = skb_peek(&sk->sk_write_queue)) && |
2073 | skb != sk->sk_send_head) { | 2173 | skb != sk->sk_send_head) { |
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2107 | tp->retrans_out -= tcp_skb_pcount(skb); | 2207 | tp->retrans_out -= tcp_skb_pcount(skb); |
2108 | acked |= FLAG_RETRANS_DATA_ACKED; | 2208 | acked |= FLAG_RETRANS_DATA_ACKED; |
2109 | seq_rtt = -1; | 2209 | seq_rtt = -1; |
2110 | } else if (seq_rtt < 0) | 2210 | } else if (seq_rtt < 0) { |
2111 | seq_rtt = now - scb->when; | 2211 | seq_rtt = now - scb->when; |
2112 | if (seq_usrtt) { | 2212 | if (rtt_sample) |
2113 | struct timeval tv; | 2213 | (*rtt_sample)(sk, tcp_usrtt(skb)); |
2114 | |||
2115 | skb_get_timestamp(skb, &tv); | ||
2116 | *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 | ||
2117 | + (usnow.tv_usec - tv.tv_usec); | ||
2118 | } | 2214 | } |
2119 | |||
2120 | if (sacked & TCPCB_SACKED_ACKED) | 2215 | if (sacked & TCPCB_SACKED_ACKED) |
2121 | tp->sacked_out -= tcp_skb_pcount(skb); | 2216 | tp->sacked_out -= tcp_skb_pcount(skb); |
2122 | if (sacked & TCPCB_LOST) | 2217 | if (sacked & TCPCB_LOST) |
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2126 | !before(scb->end_seq, tp->snd_up)) | 2221 | !before(scb->end_seq, tp->snd_up)) |
2127 | tp->urg_mode = 0; | 2222 | tp->urg_mode = 0; |
2128 | } | 2223 | } |
2129 | } else if (seq_rtt < 0) | 2224 | } else if (seq_rtt < 0) { |
2130 | seq_rtt = now - scb->when; | 2225 | seq_rtt = now - scb->when; |
2226 | if (rtt_sample) | ||
2227 | (*rtt_sample)(sk, tcp_usrtt(skb)); | ||
2228 | } | ||
2131 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2229 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
2132 | tcp_packets_out_dec(tp, skb); | 2230 | tcp_packets_out_dec(tp, skb); |
2133 | __skb_unlink(skb, &sk->sk_write_queue); | 2231 | __skb_unlink(skb, &sk->sk_write_queue); |
2134 | sk_stream_free_skb(sk, skb); | 2232 | sk_stream_free_skb(sk, skb); |
2233 | clear_all_retrans_hints(tp); | ||
2135 | } | 2234 | } |
2136 | 2235 | ||
2137 | if (acked&FLAG_ACKED) { | 2236 | if (acked&FLAG_ACKED) { |
2138 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2237 | tcp_ack_update_rtt(sk, acked, seq_rtt); |
2139 | tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); | ||
2140 | tcp_ack_packets_out(sk, tp); | 2238 | tcp_ack_packets_out(sk, tp); |
2141 | 2239 | ||
2142 | if (icsk->icsk_ca_ops->pkts_acked) | 2240 | if (icsk->icsk_ca_ops->pkts_acked) |
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | |||
2284 | } | 2382 | } |
2285 | 2383 | ||
2286 | /* F-RTO affects on two new ACKs following RTO. | 2384 | /* F-RTO affects on two new ACKs following RTO. |
2287 | * At latest on third ACK the TCP behavor is back to normal. | 2385 | * At latest on third ACK the TCP behavior is back to normal. |
2288 | */ | 2386 | */ |
2289 | tp->frto_counter = (tp->frto_counter + 1) % 3; | 2387 | tp->frto_counter = (tp->frto_counter + 1) % 3; |
2290 | } | 2388 | } |
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2299 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 2397 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
2300 | u32 prior_in_flight; | 2398 | u32 prior_in_flight; |
2301 | s32 seq_rtt; | 2399 | s32 seq_rtt; |
2302 | s32 seq_usrtt = 0; | ||
2303 | int prior_packets; | 2400 | int prior_packets; |
2304 | 2401 | ||
2305 | /* If the ack is newer than sent or older than previous acks | 2402 | /* If the ack is newer than sent or older than previous acks |
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2311 | if (before(ack, prior_snd_una)) | 2408 | if (before(ack, prior_snd_una)) |
2312 | goto old_ack; | 2409 | goto old_ack; |
2313 | 2410 | ||
2411 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | ||
2412 | tp->bytes_acked += ack - prior_snd_una; | ||
2413 | |||
2314 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2414 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
2315 | /* Window is constant, pure forward advance. | 2415 | /* Window is constant, pure forward advance. |
2316 | * No more checks are required. | 2416 | * No more checks are required. |
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2352 | prior_in_flight = tcp_packets_in_flight(tp); | 2452 | prior_in_flight = tcp_packets_in_flight(tp); |
2353 | 2453 | ||
2354 | /* See if we can take anything off of the retransmit queue. */ | 2454 | /* See if we can take anything off of the retransmit queue. */ |
2355 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt, | 2455 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt); |
2356 | icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); | ||
2357 | 2456 | ||
2358 | if (tp->frto_counter) | 2457 | if (tp->frto_counter) |
2359 | tcp_process_frto(sk, prior_snd_una); | 2458 | tcp_process_frto(sk, prior_snd_una); |
2360 | 2459 | ||
2361 | if (tcp_ack_is_dubious(sk, flag)) { | 2460 | if (tcp_ack_is_dubious(sk, flag)) { |
2362 | /* Advanve CWND, if state allows this. */ | 2461 | /* Advance CWND, if state allows this. */ |
2363 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) | 2462 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) |
2364 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); | 2463 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); |
2365 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); | 2464 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); |
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3148 | { | 3247 | { |
3149 | struct sk_buff *skb; | 3248 | struct sk_buff *skb; |
3150 | 3249 | ||
3151 | /* First, check that queue is collapsable and find | 3250 | /* First, check that queue is collapsible and find |
3152 | * the point where collapsing can be useful. */ | 3251 | * the point where collapsing can be useful. */ |
3153 | for (skb = head; skb != tail; ) { | 3252 | for (skb = head; skb != tail; ) { |
3154 | /* No new bits? It is possible on ofo queue. */ | 3253 | /* No new bits? It is possible on ofo queue. */ |
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk) | |||
3456 | 3555 | ||
3457 | /* | 3556 | /* |
3458 | * This routine is only called when we have urgent data | 3557 | * This routine is only called when we have urgent data |
3459 | * signalled. Its the 'slow' part of tcp_urg. It could be | 3558 | * signaled. Its the 'slow' part of tcp_urg. It could be |
3460 | * moved inline now as tcp_urg is only called from one | 3559 | * moved inline now as tcp_urg is only called from one |
3461 | * place. We handle URGent data wrong. We have to - as | 3560 | * place. We handle URGent data wrong. We have to - as |
3462 | * BSD still doesn't use the correction from RFC961. | 3561 | * BSD still doesn't use the correction from RFC961. |
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) | |||
3501 | * urgent. To do this requires some care. We cannot just ignore | 3600 | * urgent. To do this requires some care. We cannot just ignore |
3502 | * tp->copied_seq since we would read the last urgent byte again | 3601 | * tp->copied_seq since we would read the last urgent byte again |
3503 | * as data, nor can we alter copied_seq until this data arrives | 3602 | * as data, nor can we alter copied_seq until this data arrives |
3504 | * or we break the sematics of SIOCATMARK (and thus sockatmark()) | 3603 | * or we break the semantics of SIOCATMARK (and thus sockatmark()) |
3505 | * | 3604 | * |
3506 | * NOTE. Double Dutch. Rendering to plain English: author of comment | 3605 | * NOTE. Double Dutch. Rendering to plain English: author of comment |
3507 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); | 3606 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); |
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3646 | tp->rx_opt.saw_tstamp = 0; | 3745 | tp->rx_opt.saw_tstamp = 0; |
3647 | 3746 | ||
3648 | /* pred_flags is 0xS?10 << 16 + snd_wnd | 3747 | /* pred_flags is 0xS?10 << 16 + snd_wnd |
3649 | * if header_predition is to be made | 3748 | * if header_prediction is to be made |
3650 | * 'S' will always be tp->tcp_header_len >> 2 | 3749 | * 'S' will always be tp->tcp_header_len >> 2 |
3651 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to | 3750 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to |
3652 | * turn it off (when there are holes in the receive | 3751 | * turn it off (when there are holes in the receive |
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4242 | */ | 4341 | */ |
4243 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 4342 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
4244 | !tp->srtt) | 4343 | !tp->srtt) |
4245 | tcp_ack_saw_tstamp(sk, NULL, 0); | 4344 | tcp_ack_saw_tstamp(sk, 0); |
4246 | 4345 | ||
4247 | if (tp->rx_opt.tstamp_ok) | 4346 | if (tp->rx_opt.tstamp_ok) |
4248 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 4347 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
@@ -4372,6 +4471,7 @@ discard: | |||
4372 | 4471 | ||
4373 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 4472 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
4374 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 4473 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
4474 | EXPORT_SYMBOL(sysctl_tcp_abc); | ||
4375 | EXPORT_SYMBOL(tcp_parse_options); | 4475 | EXPORT_SYMBOL(tcp_parse_options); |
4376 | EXPORT_SYMBOL(tcp_rcv_established); | 4476 | EXPORT_SYMBOL(tcp_rcv_established); |
4377 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4477 | EXPORT_SYMBOL(tcp_rcv_state_process); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 634dabb558fd..4d5021e1929b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -39,7 +39,7 @@ | |||
39 | * request_sock handling and moved | 39 | * request_sock handling and moved |
40 | * most of it into the af independent code. | 40 | * most of it into the af independent code. |
41 | * Added tail drop and some other bugfixes. | 41 | * Added tail drop and some other bugfixes. |
42 | * Added new listen sematics. | 42 | * Added new listen semantics. |
43 | * Mike McLagan : Routing by source | 43 | * Mike McLagan : Routing by source |
44 | * Juan Jose Ciarlante: ip_dynaddr bits | 44 | * Juan Jose Ciarlante: ip_dynaddr bits |
45 | * Andi Kleen: various fixes. | 45 | * Andi Kleen: various fixes. |
@@ -1110,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1110 | static int tcp_v4_checksum_init(struct sk_buff *skb) | 1110 | static int tcp_v4_checksum_init(struct sk_buff *skb) |
1111 | { | 1111 | { |
1112 | if (skb->ip_summed == CHECKSUM_HW) { | 1112 | if (skb->ip_summed == CHECKSUM_HW) { |
1113 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1114 | if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, | 1113 | if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, |
1115 | skb->nh.iph->daddr, skb->csum)) | 1114 | skb->nh.iph->daddr, skb->csum)) { |
1115 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1116 | return 0; | 1116 | return 0; |
1117 | 1117 | } | |
1118 | LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n"); | ||
1119 | skb->ip_summed = CHECKSUM_NONE; | ||
1120 | } | 1118 | } |
1119 | |||
1120 | skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr, | ||
1121 | skb->len, IPPROTO_TCP, 0); | ||
1122 | |||
1121 | if (skb->len <= 76) { | 1123 | if (skb->len <= 76) { |
1122 | if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, | 1124 | return __skb_checksum_complete(skb); |
1123 | skb->nh.iph->daddr, | ||
1124 | skb_checksum(skb, 0, skb->len, 0))) | ||
1125 | return -1; | ||
1126 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1127 | } else { | ||
1128 | skb->csum = ~tcp_v4_check(skb->h.th, skb->len, | ||
1129 | skb->nh.iph->saddr, | ||
1130 | skb->nh.iph->daddr, 0); | ||
1131 | } | 1125 | } |
1132 | return 0; | 1126 | return 0; |
1133 | } | 1127 | } |
@@ -1216,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1216 | 1210 | ||
1217 | /* An explanation is required here, I think. | 1211 | /* An explanation is required here, I think. |
1218 | * Packet length and doff are validated by header prediction, | 1212 | * Packet length and doff are validated by header prediction, |
1219 | * provided case of th->doff==0 is elimineted. | 1213 | * provided case of th->doff==0 is eliminated. |
1220 | * So, we defer the checks. */ | 1214 | * So, we defer the checks. */ |
1221 | if ((skb->ip_summed != CHECKSUM_UNNECESSARY && | 1215 | if ((skb->ip_summed != CHECKSUM_UNNECESSARY && |
1222 | tcp_v4_checksum_init(skb) < 0)) | 1216 | tcp_v4_checksum_init(skb))) |
1223 | goto bad_packet; | 1217 | goto bad_packet; |
1224 | 1218 | ||
1225 | th = skb->h.th; | 1219 | th = skb->h.th; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b1a63b2c6b4a..1b66a2ac4321 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -158,7 +158,7 @@ kill_with_rst: | |||
158 | /* I am shamed, but failed to make it more elegant. | 158 | /* I am shamed, but failed to make it more elegant. |
159 | * Yes, it is direct reference to IP, which is impossible | 159 | * Yes, it is direct reference to IP, which is impossible |
160 | * to generalize to IPv6. Taking into account that IPv6 | 160 | * to generalize to IPv6. Taking into account that IPv6 |
161 | * do not undertsnad recycling in any case, it not | 161 | * do not understand recycling in any case, it not |
162 | * a big problem in practice. --ANK */ | 162 | * a big problem in practice. --ANK */ |
163 | if (tw->tw_family == AF_INET && | 163 | if (tw->tw_family == AF_INET && |
164 | tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && | 164 | tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && |
@@ -194,7 +194,7 @@ kill_with_rst: | |||
194 | /* In window segment, it may be only reset or bare ack. */ | 194 | /* In window segment, it may be only reset or bare ack. */ |
195 | 195 | ||
196 | if (th->rst) { | 196 | if (th->rst) { |
197 | /* This is TIME_WAIT assasination, in two flavors. | 197 | /* This is TIME_WAIT assassination, in two flavors. |
198 | * Oh well... nobody has a sufficient solution to this | 198 | * Oh well... nobody has a sufficient solution to this |
199 | * protocol bug yet. | 199 | * protocol bug yet. |
200 | */ | 200 | */ |
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
380 | */ | 380 | */ |
381 | newtp->snd_cwnd = 2; | 381 | newtp->snd_cwnd = 2; |
382 | newtp->snd_cwnd_cnt = 0; | 382 | newtp->snd_cwnd_cnt = 0; |
383 | newtp->bytes_acked = 0; | ||
383 | 384 | ||
384 | newtp->frto_counter = 0; | 385 | newtp->frto_counter = 0; |
385 | newtp->frto_highmark = 0; | 386 | newtp->frto_highmark = 0; |
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
550 | 551 | ||
551 | /* RFC793 page 36: "If the connection is in any non-synchronized state ... | 552 | /* RFC793 page 36: "If the connection is in any non-synchronized state ... |
552 | * and the incoming segment acknowledges something not yet | 553 | * and the incoming segment acknowledges something not yet |
553 | * sent (the segment carries an unaccaptable ACK) ... | 554 | * sent (the segment carries an unacceptable ACK) ... |
554 | * a reset is sent." | 555 | * a reset is sent." |
555 | * | 556 | * |
556 | * Invalid ACK: reset will be sent by listening socket | 557 | * Invalid ACK: reset will be sent by listening socket |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b907456a79f4..029c70dfb585 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
436 | u16 flags; | 436 | u16 flags; |
437 | 437 | ||
438 | BUG_ON(len > skb->len); | 438 | BUG_ON(len > skb->len); |
439 | |||
440 | clear_all_retrans_hints(tp); | ||
439 | nsize = skb_headlen(skb) - len; | 441 | nsize = skb_headlen(skb) - len; |
440 | if (nsize < 0) | 442 | if (nsize < 0) |
441 | nsize = 0; | 443 | nsize = 0; |
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
599 | for TCP options, but includes only bare TCP header. | 601 | for TCP options, but includes only bare TCP header. |
600 | 602 | ||
601 | tp->rx_opt.mss_clamp is mss negotiated at connection setup. | 603 | tp->rx_opt.mss_clamp is mss negotiated at connection setup. |
602 | It is minumum of user_mss and mss received with SYN. | 604 | It is minimum of user_mss and mss received with SYN. |
603 | It also does not include TCP options. | 605 | It also does not include TCP options. |
604 | 606 | ||
605 | tp->pmtu_cookie is last pmtu, seen by this function. | 607 | tp->pmtu_cookie is last pmtu, seen by this function. |
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1171 | { | 1173 | { |
1172 | struct inet_connection_sock *icsk = inet_csk(sk); | 1174 | struct inet_connection_sock *icsk = inet_csk(sk); |
1173 | struct tcp_sock *tp = tcp_sk(sk); | 1175 | struct tcp_sock *tp = tcp_sk(sk); |
1174 | /* MSS for the peer's data. Previous verions used mss_clamp | 1176 | /* MSS for the peer's data. Previous versions used mss_clamp |
1175 | * here. I don't know if the value based on our guesses | 1177 | * here. I don't know if the value based on our guesses |
1176 | * of peer's MSS is better for the performance. It's more correct | 1178 | * of peer's MSS is better for the performance. It's more correct |
1177 | * but may be worse for the performance because of rcv_mss | 1179 | * but may be worse for the performance because of rcv_mss |
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1260 | BUG_ON(tcp_skb_pcount(skb) != 1 || | 1262 | BUG_ON(tcp_skb_pcount(skb) != 1 || |
1261 | tcp_skb_pcount(next_skb) != 1); | 1263 | tcp_skb_pcount(next_skb) != 1); |
1262 | 1264 | ||
1263 | /* Ok. We will be able to collapse the packet. */ | 1265 | /* changing transmit queue under us so clear hints */ |
1266 | clear_all_retrans_hints(tp); | ||
1267 | |||
1268 | /* Ok. We will be able to collapse the packet. */ | ||
1264 | __skb_unlink(next_skb, &sk->sk_write_queue); | 1269 | __skb_unlink(next_skb, &sk->sk_write_queue); |
1265 | 1270 | ||
1266 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); | 1271 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); |
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1330 | } | 1335 | } |
1331 | } | 1336 | } |
1332 | 1337 | ||
1338 | clear_all_retrans_hints(tp); | ||
1339 | |||
1333 | if (!lost) | 1340 | if (!lost) |
1334 | return; | 1341 | return; |
1335 | 1342 | ||
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1361 | int err; | 1368 | int err; |
1362 | 1369 | ||
1363 | /* Do not sent more than we queued. 1/4 is reserved for possible | 1370 | /* Do not sent more than we queued. 1/4 is reserved for possible |
1364 | * copying overhead: frgagmentation, tunneling, mangling etc. | 1371 | * copying overhead: fragmentation, tunneling, mangling etc. |
1365 | */ | 1372 | */ |
1366 | if (atomic_read(&sk->sk_wmem_alloc) > | 1373 | if (atomic_read(&sk->sk_wmem_alloc) > |
1367 | min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) | 1374 | min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) |
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1468 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1475 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1469 | struct tcp_sock *tp = tcp_sk(sk); | 1476 | struct tcp_sock *tp = tcp_sk(sk); |
1470 | struct sk_buff *skb; | 1477 | struct sk_buff *skb; |
1471 | int packet_cnt = tp->lost_out; | 1478 | int packet_cnt; |
1479 | |||
1480 | if (tp->retransmit_skb_hint) { | ||
1481 | skb = tp->retransmit_skb_hint; | ||
1482 | packet_cnt = tp->retransmit_cnt_hint; | ||
1483 | }else{ | ||
1484 | skb = sk->sk_write_queue.next; | ||
1485 | packet_cnt = 0; | ||
1486 | } | ||
1472 | 1487 | ||
1473 | /* First pass: retransmit lost packets. */ | 1488 | /* First pass: retransmit lost packets. */ |
1474 | if (packet_cnt) { | 1489 | if (tp->lost_out) { |
1475 | sk_stream_for_retrans_queue(skb, sk) { | 1490 | sk_stream_for_retrans_queue_from(skb, sk) { |
1476 | __u8 sacked = TCP_SKB_CB(skb)->sacked; | 1491 | __u8 sacked = TCP_SKB_CB(skb)->sacked; |
1477 | 1492 | ||
1493 | /* we could do better than to assign each time */ | ||
1494 | tp->retransmit_skb_hint = skb; | ||
1495 | tp->retransmit_cnt_hint = packet_cnt; | ||
1496 | |||
1478 | /* Assume this retransmit will generate | 1497 | /* Assume this retransmit will generate |
1479 | * only one packet for congestion window | 1498 | * only one packet for congestion window |
1480 | * calculation purposes. This works because | 1499 | * calculation purposes. This works because |
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1485 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) | 1504 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) |
1486 | return; | 1505 | return; |
1487 | 1506 | ||
1488 | if (sacked&TCPCB_LOST) { | 1507 | if (sacked & TCPCB_LOST) { |
1489 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { | 1508 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { |
1490 | if (tcp_retransmit_skb(sk, skb)) | 1509 | if (tcp_retransmit_skb(sk, skb)) { |
1510 | tp->retransmit_skb_hint = NULL; | ||
1491 | return; | 1511 | return; |
1512 | } | ||
1492 | if (icsk->icsk_ca_state != TCP_CA_Loss) | 1513 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
1493 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); | 1514 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); |
1494 | else | 1515 | else |
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1501 | TCP_RTO_MAX); | 1522 | TCP_RTO_MAX); |
1502 | } | 1523 | } |
1503 | 1524 | ||
1504 | packet_cnt -= tcp_skb_pcount(skb); | 1525 | packet_cnt += tcp_skb_pcount(skb); |
1505 | if (packet_cnt <= 0) | 1526 | if (packet_cnt >= tp->lost_out) |
1506 | break; | 1527 | break; |
1507 | } | 1528 | } |
1508 | } | 1529 | } |
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1528 | if (tcp_may_send_now(sk, tp)) | 1549 | if (tcp_may_send_now(sk, tp)) |
1529 | return; | 1550 | return; |
1530 | 1551 | ||
1531 | packet_cnt = 0; | 1552 | if (tp->forward_skb_hint) { |
1553 | skb = tp->forward_skb_hint; | ||
1554 | packet_cnt = tp->forward_cnt_hint; | ||
1555 | } else{ | ||
1556 | skb = sk->sk_write_queue.next; | ||
1557 | packet_cnt = 0; | ||
1558 | } | ||
1559 | |||
1560 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
1561 | tp->forward_cnt_hint = packet_cnt; | ||
1562 | tp->forward_skb_hint = skb; | ||
1532 | 1563 | ||
1533 | sk_stream_for_retrans_queue(skb, sk) { | ||
1534 | /* Similar to the retransmit loop above we | 1564 | /* Similar to the retransmit loop above we |
1535 | * can pretend that the retransmitted SKB | 1565 | * can pretend that the retransmitted SKB |
1536 | * we send out here will be composed of one | 1566 | * we send out here will be composed of one |
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1547 | continue; | 1577 | continue; |
1548 | 1578 | ||
1549 | /* Ok, retransmit it. */ | 1579 | /* Ok, retransmit it. */ |
1550 | if (tcp_retransmit_skb(sk, skb)) | 1580 | if (tcp_retransmit_skb(sk, skb)) { |
1581 | tp->forward_skb_hint = NULL; | ||
1551 | break; | 1582 | break; |
1583 | } | ||
1552 | 1584 | ||
1553 | if (skb == skb_peek(&sk->sk_write_queue)) | 1585 | if (skb == skb_peek(&sk->sk_write_queue)) |
1554 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1586 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect); | |||
2058 | EXPORT_SYMBOL(tcp_make_synack); | 2090 | EXPORT_SYMBOL(tcp_make_synack); |
2059 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2091 | EXPORT_SYMBOL(tcp_simple_retransmit); |
2060 | EXPORT_SYMBOL(tcp_sync_mss); | 2092 | EXPORT_SYMBOL(tcp_sync_mss); |
2093 | EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor); | ||
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 327770bf5522..26d7486ee501 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
20 | u32 in_flight, int flag) | 20 | u32 in_flight, int flag) |
21 | { | 21 | { |
22 | struct tcp_sock *tp = tcp_sk(sk); | 22 | struct tcp_sock *tp = tcp_sk(sk); |
23 | if (in_flight < tp->snd_cwnd) | 23 | |
24 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
24 | return; | 25 | return; |
25 | 26 | ||
26 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 27 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
27 | tp->snd_cwnd++; | 28 | tcp_slow_start(tp); |
28 | } else { | 29 | else { |
29 | tp->snd_cwnd_cnt++; | 30 | tp->snd_cwnd_cnt++; |
30 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ | 31 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ |
31 | tp->snd_cwnd++; | 32 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
33 | tp->snd_cwnd++; | ||
32 | tp->snd_cwnd_cnt = 0; | 34 | tp->snd_cwnd_cnt = 0; |
33 | } | 35 | } |
34 | } | 36 | } |
35 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); | ||
36 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
37 | } | 37 | } |
38 | 38 | ||
39 | static u32 tcp_scalable_ssthresh(struct sock *sk) | 39 | static u32 tcp_scalable_ssthresh(struct sock *sk) |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 415ee47ac1c5..e1880959614a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk) | |||
58 | * to prevent DoS attacks. It is called when a retransmission timeout | 58 | * to prevent DoS attacks. It is called when a retransmission timeout |
59 | * or zero probe timeout occurs on orphaned socket. | 59 | * or zero probe timeout occurs on orphaned socket. |
60 | * | 60 | * |
61 | * Criterium is still not confirmed experimentally and may change. | 61 | * Criteria is still not confirmed experimentally and may change. |
62 | * We kill the socket, if: | 62 | * We kill the socket, if: |
63 | * 1. If number of orphaned sockets exceeds an administratively configured | 63 | * 1. If number of orphaned sockets exceeds an administratively configured |
64 | * limit. | 64 | * limit. |
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk) | |||
132 | hole detection. :-( | 132 | hole detection. :-( |
133 | 133 | ||
134 | It is place to make it. It is not made. I do not want | 134 | It is place to make it. It is not made. I do not want |
135 | to make it. It is disguisting. It does not work in any | 135 | to make it. It is disgusting. It does not work in any |
136 | case. Let me to cite the same draft, which requires for | 136 | case. Let me to cite the same draft, which requires for |
137 | us to implement this: | 137 | us to implement this: |
138 | 138 | ||
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 93c5f92070f9..b7d296a8ac6d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
236 | /* We don't have enough RTT samples to do the Vegas | 236 | /* We don't have enough RTT samples to do the Vegas |
237 | * calculation, so we'll behave like Reno. | 237 | * calculation, so we'll behave like Reno. |
238 | */ | 238 | */ |
239 | if (tp->snd_cwnd > tp->snd_ssthresh) | 239 | tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); |
240 | tp->snd_cwnd++; | ||
241 | } else { | 240 | } else { |
242 | u32 rtt, target_cwnd, diff; | 241 | u32 rtt, target_cwnd, diff; |
243 | 242 | ||
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
275 | */ | 274 | */ |
276 | diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; | 275 | diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; |
277 | 276 | ||
278 | if (tp->snd_cwnd < tp->snd_ssthresh) { | 277 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
279 | /* Slow start. */ | 278 | /* Slow start. */ |
280 | if (diff > gamma) { | 279 | if (diff > gamma) { |
281 | /* Going too fast. Time to slow down | 280 | /* Going too fast. Time to slow down |
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
295 | V_PARAM_SHIFT)+1); | 294 | V_PARAM_SHIFT)+1); |
296 | 295 | ||
297 | } | 296 | } |
297 | tcp_slow_start(tp); | ||
298 | } else { | 298 | } else { |
299 | /* Congestion avoidance. */ | 299 | /* Congestion avoidance. */ |
300 | u32 next_snd_cwnd; | 300 | u32 next_snd_cwnd; |
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
327 | else if (next_snd_cwnd < tp->snd_cwnd) | 327 | else if (next_snd_cwnd < tp->snd_cwnd) |
328 | tp->snd_cwnd--; | 328 | tp->snd_cwnd--; |
329 | } | 329 | } |
330 | } | ||
331 | 330 | ||
332 | /* Wipe the slate clean for the next RTT. */ | 331 | if (tp->snd_cwnd < 2) |
333 | vegas->cntRTT = 0; | 332 | tp->snd_cwnd = 2; |
334 | vegas->minRTT = 0x7fffffff; | 333 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) |
334 | tp->snd_cwnd = tp->snd_cwnd_clamp; | ||
335 | } | ||
335 | } | 336 | } |
336 | 337 | ||
337 | /* The following code is executed for every ack we receive, | 338 | /* Wipe the slate clean for the next RTT. */ |
338 | * except for conditions checked in should_advance_cwnd() | 339 | vegas->cntRTT = 0; |
339 | * before the call to tcp_cong_avoid(). Mainly this means that | 340 | vegas->minRTT = 0x7fffffff; |
340 | * we only execute this code if the ack actually acked some | ||
341 | * data. | ||
342 | */ | ||
343 | |||
344 | /* If we are in slow start, increase our cwnd in response to this ACK. | ||
345 | * (If we are not in slow start then we are in congestion avoidance, | ||
346 | * and adjust our congestion window only once per RTT. See the code | ||
347 | * above.) | ||
348 | */ | ||
349 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
350 | tp->snd_cwnd++; | ||
351 | |||
352 | /* to keep cwnd from growing without bound */ | ||
353 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); | ||
354 | |||
355 | /* Make sure that we are never so timid as to reduce our cwnd below | ||
356 | * 2 MSS. | ||
357 | * | ||
358 | * Going below 2 MSS would risk huge delayed ACKs from our receiver. | ||
359 | */ | ||
360 | tp->snd_cwnd = max(tp->snd_cwnd, 2U); | ||
361 | } | 341 | } |
362 | 342 | ||
363 | /* Extract info for Tcp socket info provided via netlink. */ | 343 | /* Extract info for Tcp socket info provided via netlink. */ |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e0bd1013cb0d..2422a5f7195d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
761 | 761 | ||
762 | static __inline__ int __udp_checksum_complete(struct sk_buff *skb) | 762 | static __inline__ int __udp_checksum_complete(struct sk_buff *skb) |
763 | { | 763 | { |
764 | return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); | 764 | return __skb_checksum_complete(skb); |
765 | } | 765 | } |
766 | 766 | ||
767 | static __inline__ int udp_checksum_complete(struct sk_buff *skb) | 767 | static __inline__ int udp_checksum_complete(struct sk_buff *skb) |
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1100 | if (uh->check == 0) { | 1100 | if (uh->check == 0) { |
1101 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1101 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1102 | } else if (skb->ip_summed == CHECKSUM_HW) { | 1102 | } else if (skb->ip_summed == CHECKSUM_HW) { |
1103 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1104 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) | 1103 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) |
1105 | return 0; | 1104 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1106 | LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n"); | ||
1107 | skb->ip_summed = CHECKSUM_NONE; | ||
1108 | } | 1105 | } |
1109 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 1106 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
1110 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | 1107 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); |
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 23e540365a14..1bdf0fb8bf8a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -585,17 +585,16 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
585 | daddr = &skb->nh.ipv6h->daddr; | 585 | daddr = &skb->nh.ipv6h->daddr; |
586 | 586 | ||
587 | /* Perform checksum. */ | 587 | /* Perform checksum. */ |
588 | if (skb->ip_summed == CHECKSUM_HW) { | 588 | switch (skb->ip_summed) { |
589 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 589 | case CHECKSUM_HW: |
590 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, | 590 | if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, |
591 | skb->csum)) { | 591 | skb->csum)) |
592 | LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n"); | 592 | break; |
593 | skb->ip_summed = CHECKSUM_NONE; | 593 | /* fall through */ |
594 | } | 594 | case CHECKSUM_NONE: |
595 | } | 595 | skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len, |
596 | if (skb->ip_summed == CHECKSUM_NONE) { | 596 | IPPROTO_ICMPV6, 0); |
597 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, | 597 | if (__skb_checksum_complete(skb)) { |
598 | skb_checksum(skb, 0, skb->len, 0))) { | ||
599 | LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", | 598 | LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", |
600 | NIP6(*saddr), NIP6(*daddr)); | 599 | NIP6(*saddr), NIP6(*daddr)); |
601 | goto discard_it; | 600 | goto discard_it; |
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 651c79b41eeb..8e9628f1c4c5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -298,13 +298,10 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb, | |||
298 | static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) | 298 | static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) |
299 | { | 299 | { |
300 | if ((raw6_sk(sk)->checksum || sk->sk_filter) && | 300 | if ((raw6_sk(sk)->checksum || sk->sk_filter) && |
301 | skb->ip_summed != CHECKSUM_UNNECESSARY) { | 301 | skb_checksum_complete(skb)) { |
302 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { | 302 | /* FIXME: increment a raw6 drops counter here */ |
303 | /* FIXME: increment a raw6 drops counter here */ | 303 | kfree_skb(skb); |
304 | kfree_skb(skb); | 304 | return 0; |
305 | return 0; | ||
306 | } | ||
307 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
308 | } | 305 | } |
309 | 306 | ||
310 | /* Charge it to the socket. */ | 307 | /* Charge it to the socket. */ |
@@ -337,32 +334,25 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) | |||
337 | if (!rp->checksum) | 334 | if (!rp->checksum) |
338 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 335 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
339 | 336 | ||
340 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) { | 337 | if (skb->ip_summed == CHECKSUM_HW) { |
341 | if (skb->ip_summed == CHECKSUM_HW) { | 338 | skb_postpull_rcsum(skb, skb->nh.raw, |
342 | skb_postpull_rcsum(skb, skb->nh.raw, | 339 | skb->h.raw - skb->nh.raw); |
343 | skb->h.raw - skb->nh.raw); | 340 | if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, |
341 | &skb->nh.ipv6h->daddr, | ||
342 | skb->len, inet->num, skb->csum)) | ||
344 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 343 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
345 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, | ||
346 | &skb->nh.ipv6h->daddr, | ||
347 | skb->len, inet->num, skb->csum)) { | ||
348 | LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n"); | ||
349 | skb->ip_summed = CHECKSUM_NONE; | ||
350 | } | ||
351 | } | ||
352 | if (skb->ip_summed == CHECKSUM_NONE) | ||
353 | skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, | ||
354 | &skb->nh.ipv6h->daddr, | ||
355 | skb->len, inet->num, 0); | ||
356 | } | 344 | } |
345 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | ||
346 | skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, | ||
347 | &skb->nh.ipv6h->daddr, | ||
348 | skb->len, inet->num, 0); | ||
357 | 349 | ||
358 | if (inet->hdrincl) { | 350 | if (inet->hdrincl) { |
359 | if (skb->ip_summed != CHECKSUM_UNNECESSARY && | 351 | if (skb_checksum_complete(skb)) { |
360 | (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { | ||
361 | /* FIXME: increment a raw6 drops counter here */ | 352 | /* FIXME: increment a raw6 drops counter here */ |
362 | kfree_skb(skb); | 353 | kfree_skb(skb); |
363 | return 0; | 354 | return 0; |
364 | } | 355 | } |
365 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
366 | } | 356 | } |
367 | 357 | ||
368 | rawv6_rcv_skb(sk, skb); | 358 | rawv6_rcv_skb(sk, skb); |
@@ -407,7 +397,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
407 | if (skb->ip_summed==CHECKSUM_UNNECESSARY) { | 397 | if (skb->ip_summed==CHECKSUM_UNNECESSARY) { |
408 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); | 398 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); |
409 | } else if (msg->msg_flags&MSG_TRUNC) { | 399 | } else if (msg->msg_flags&MSG_TRUNC) { |
410 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) | 400 | if (__skb_checksum_complete(skb)) |
411 | goto csum_copy_err; | 401 | goto csum_copy_err; |
412 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); | 402 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); |
413 | } else { | 403 | } else { |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d746d3b27efb..62c0e5bd931c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1401,20 +1401,18 @@ out: | |||
1401 | static int tcp_v6_checksum_init(struct sk_buff *skb) | 1401 | static int tcp_v6_checksum_init(struct sk_buff *skb) |
1402 | { | 1402 | { |
1403 | if (skb->ip_summed == CHECKSUM_HW) { | 1403 | if (skb->ip_summed == CHECKSUM_HW) { |
1404 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1405 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | 1404 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, |
1406 | &skb->nh.ipv6h->daddr,skb->csum)) | 1405 | &skb->nh.ipv6h->daddr,skb->csum)) { |
1406 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1407 | return 0; | 1407 | return 0; |
1408 | LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n"); | 1408 | } |
1409 | } | 1409 | } |
1410 | |||
1411 | skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | ||
1412 | &skb->nh.ipv6h->daddr, 0); | ||
1413 | |||
1410 | if (skb->len <= 76) { | 1414 | if (skb->len <= 76) { |
1411 | if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | 1415 | return __skb_checksum_complete(skb); |
1412 | &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0))) | ||
1413 | return -1; | ||
1414 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1415 | } else { | ||
1416 | skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | ||
1417 | &skb->nh.ipv6h->daddr,0); | ||
1418 | } | 1416 | } |
1419 | return 0; | 1417 | return 0; |
1420 | } | 1418 | } |
@@ -1575,7 +1573,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
1575 | goto discard_it; | 1573 | goto discard_it; |
1576 | 1574 | ||
1577 | if ((skb->ip_summed != CHECKSUM_UNNECESSARY && | 1575 | if ((skb->ip_summed != CHECKSUM_UNNECESSARY && |
1578 | tcp_v6_checksum_init(skb) < 0)) | 1576 | tcp_v6_checksum_init(skb))) |
1579 | goto bad_packet; | 1577 | goto bad_packet; |
1580 | 1578 | ||
1581 | th = skb->h.th; | 1579 | th = skb->h.th; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bf9519341fd3..e671153b47b2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -248,7 +248,7 @@ try_again: | |||
248 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 248 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, |
249 | copied); | 249 | copied); |
250 | } else if (msg->msg_flags&MSG_TRUNC) { | 250 | } else if (msg->msg_flags&MSG_TRUNC) { |
251 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) | 251 | if (__skb_checksum_complete(skb)) |
252 | goto csum_copy_err; | 252 | goto csum_copy_err; |
253 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 253 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, |
254 | copied); | 254 | copied); |
@@ -363,13 +363,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
363 | return -1; | 363 | return -1; |
364 | } | 364 | } |
365 | 365 | ||
366 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) { | 366 | if (skb_checksum_complete(skb)) { |
367 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { | 367 | UDP6_INC_STATS_BH(UDP_MIB_INERRORS); |
368 | UDP6_INC_STATS_BH(UDP_MIB_INERRORS); | 368 | kfree_skb(skb); |
369 | kfree_skb(skb); | 369 | return 0; |
370 | return 0; | ||
371 | } | ||
372 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
373 | } | 370 | } |
374 | 371 | ||
375 | if (sock_queue_rcv_skb(sk,skb)<0) { | 372 | if (sock_queue_rcv_skb(sk,skb)<0) { |
@@ -491,13 +488,10 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
491 | uh = skb->h.uh; | 488 | uh = skb->h.uh; |
492 | } | 489 | } |
493 | 490 | ||
494 | if (skb->ip_summed==CHECKSUM_HW) { | 491 | if (skb->ip_summed == CHECKSUM_HW && |
492 | !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) | ||
495 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 493 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
496 | if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { | 494 | |
497 | LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n"); | ||
498 | skb->ip_summed = CHECKSUM_NONE; | ||
499 | } | ||
500 | } | ||
501 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 495 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
502 | skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); | 496 | skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); |
503 | 497 | ||
@@ -521,8 +515,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
521 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) | 515 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) |
522 | goto discard; | 516 | goto discard; |
523 | 517 | ||
524 | if (skb->ip_summed != CHECKSUM_UNNECESSARY && | 518 | if (skb_checksum_complete(skb)) |
525 | (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) | ||
526 | goto discard; | 519 | goto discard; |
527 | UDP6_INC_STATS_BH(UDP_MIB_NOPORTS); | 520 | UDP6_INC_STATS_BH(UDP_MIB_NOPORTS); |
528 | 521 | ||
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index 122c086ee2db..dbe6105e83a5 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/in.h> | 23 | #include <linux/in.h> |
24 | #include <linux/in6.h> | 24 | #include <linux/in6.h> |
25 | #include <linux/icmp.h> | 25 | #include <linux/icmp.h> |
26 | #include <linux/skbuff.h> | ||
26 | #include <net/sock.h> | 27 | #include <net/sock.h> |
27 | #include <net/ip.h> | 28 | #include <net/ip.h> |
28 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 29 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
@@ -475,15 +476,11 @@ void rxrpc_trans_receive_packet(struct rxrpc_transport *trans) | |||
475 | 476 | ||
476 | /* we'll probably need to checksum it (didn't call | 477 | /* we'll probably need to checksum it (didn't call |
477 | * sock_recvmsg) */ | 478 | * sock_recvmsg) */ |
478 | if (pkt->ip_summed != CHECKSUM_UNNECESSARY) { | 479 | if (skb_checksum_complete(pkt)) { |
479 | if ((unsigned short) | 480 | kfree_skb(pkt); |
480 | csum_fold(skb_checksum(pkt, 0, pkt->len, | 481 | rxrpc_krxiod_queue_transport(trans); |
481 | pkt->csum))) { | 482 | _leave(" CSUM failed"); |
482 | kfree_skb(pkt); | 483 | return; |
483 | rxrpc_krxiod_queue_transport(trans); | ||
484 | _leave(" CSUM failed"); | ||
485 | return; | ||
486 | } | ||
487 | } | 484 | } |
488 | 485 | ||
489 | addr = pkt->nh.iph->saddr; | 486 | addr = pkt->nh.iph->saddr; |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 8f97e90f36c8..eb330d4f66d6 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c | |||
@@ -6,6 +6,9 @@ | |||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/compiler.h> | ||
10 | #include <linux/netdevice.h> | ||
11 | #include <linux/skbuff.h> | ||
9 | #include <linux/types.h> | 12 | #include <linux/types.h> |
10 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
11 | #include <linux/udp.h> | 14 | #include <linux/udp.h> |
@@ -165,6 +168,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | |||
165 | return -1; | 168 | return -1; |
166 | if ((unsigned short)csum_fold(desc.csum)) | 169 | if ((unsigned short)csum_fold(desc.csum)) |
167 | return -1; | 170 | return -1; |
171 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | ||
172 | netdev_rx_csum_fault(skb->dev); | ||
168 | return 0; | 173 | return 0; |
169 | no_checksum: | 174 | no_checksum: |
170 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | 175 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f16e7cdd6150..e50e7cf43737 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -623,12 +623,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
623 | /* we can use it in-place */ | 623 | /* we can use it in-place */ |
624 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 624 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); |
625 | rqstp->rq_arg.head[0].iov_len = len; | 625 | rqstp->rq_arg.head[0].iov_len = len; |
626 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) { | 626 | if (skb_checksum_complete(skb)) { |
627 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { | 627 | skb_free_datagram(svsk->sk_sk, skb); |
628 | skb_free_datagram(svsk->sk_sk, skb); | 628 | return 0; |
629 | return 0; | ||
630 | } | ||
631 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
632 | } | 629 | } |
633 | rqstp->rq_skbuff = skb; | 630 | rqstp->rq_skbuff = skb; |
634 | } | 631 | } |