30 files changed, 466 insertions, 331 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d219435d086c..1bcfef51ac58 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -350,6 +350,20 @@ fault:
        return -EFAULT;
 }
+unsigned int __skb_checksum_complete(struct sk_buff *skb)
+{
+        unsigned int sum;
+        sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+        if (likely(!sum)) {
+                if (unlikely(skb->ip_summed == CHECKSUM_HW))
+                        netdev_rx_csum_fault(skb->dev);
+                skb->ip_summed = CHECKSUM_UNNECESSARY;
+        }
+        return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
 /**
 *      skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
 *      @skb: skbuff
@@ -363,7 +377,7 @@ fault:
 *               -EFAULT - fault during copy. Beware, in this case iovec
 *                         can be modified!
 */
-int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
                                     int hlen, struct iovec *iov)
 {
        unsigned int csum;
@@ -376,8 +390,7 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
                iov++;
        if (iov->iov_len < chunk) {
-                if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
+                if (__skb_checksum_complete(skb))
-                                                           skb->csum)))
                        goto csum_error;
                if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
                        goto fault;
@@ -388,6 +401,8 @@ int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
                        goto fault;
                if ((unsigned short)csum_fold(csum))
                        goto csum_error;
+                if (unlikely(skb->ip_summed == CHECKSUM_HW))
+                        netdev_rx_csum_fault(skb->dev);
                iov->iov_len -= chunk;
                iov->iov_base += chunk;
        }
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d1541595277..0b48e294aafe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1108,6 +1108,18 @@ out:
        return ret;
 }
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+        if (net_ratelimit()) {
+                printk(KERN_ERR "%s: hw csum failure.\n", dev->name);
+                dump_stack();
+        }
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
 #ifdef CONFIG_HIGHMEM
 /* Actually, we should eliminate this check as soon as we know, that:
 * 1. IOMMU is present and allows to map all the memory.
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 802fe11efad0..49424a42a2c0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -101,16 +101,20 @@ void netpoll_queue(struct sk_buff *skb)
 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
                             unsigned short ulen, u32 saddr, u32 daddr)
 {
-        if (uh->check == 0)
+        unsigned int psum;
+        if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
                return 0;
-        if (skb->ip_summed == CHECKSUM_HW)
+        psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
-                return csum_tcpudp_magic(
-                        saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
+        if (skb->ip_summed == CHECKSUM_HW &&
+            !(u16)csum_fold(csum_add(psum, skb->csum)))
+                return 0;
-        skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+        skb->csum = psum;
-        return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+        return __skb_checksum_complete(skb);
 }
 /*
@@ -489,7 +493,7 @@ int __netpoll_rx(struct sk_buff *skb)
        if (ulen != len)
                goto out;
-        if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
+        if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
                goto out;
        if (np->local_ip && np->local_ip != ntohl(iph->daddr))
                goto out;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3f25cadccddd..f89e55f814d9 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1664,17 +1664,15 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
                goto out;
        }
-        rv = dn_check_state(sk, NULL, 0, &timeo, flags);
-        if (rv)
-                goto out;
        if (sk->sk_shutdown & RCV_SHUTDOWN) {
-                if (!(flags & MSG_NOSIGNAL))
+                rv = 0;
-                        send_sig(SIGPIPE, current, 0);
-                rv = -EPIPE;
                goto out;
        }
+        rv = dn_check_state(sk, NULL, 0, &timeo, flags);
+        if (rv)
+                goto out;
        if (flags & ~(MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
                rv = -EOPNOTSUPP;
                goto out;
@@ -1928,6 +1926,8 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
                err = -EPIPE;
+                if (!(flags & MSG_NOSIGNAL))
+                        send_sig(SIGPIPE, current, 0);
                goto out_err;
        }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 175e093ec564..e3eceecd0496 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb)
        case CHECKSUM_HW:
                if (!(u16)csum_fold(skb->csum))
                        break;
-                LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
+                /* fall through */
        case CHECKSUM_NONE:
-                if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+                skb->csum = 0;
+                if (__skb_checksum_complete(skb))
                        goto error;
-        default:;
        }
        if (!pskb_pull(skb, sizeof(struct icmphdr)))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c6247fc84060..c04607b49212 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb)
                return 0;
        }
-        if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || 
+        if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
-            (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
+                goto drop;
-                in_dev_put(in_dev);
-                kfree_skb(skb);
+        switch (skb->ip_summed) {
-                return 0;
+        case CHECKSUM_HW:
+                if (!(u16)csum_fold(skb->csum))
+                        break;
+                /* fall through */
+        case CHECKSUM_NONE:
+                skb->csum = 0;
+                if (__skb_checksum_complete(skb))
+                        goto drop;
        }
        ih = skb->h.igmph;
@@ -906,6 +913,8 @@ int igmp_rcv(struct sk_buff *skb)
        default:
                NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
        }
+drop:
        in_dev_put(in_dev);
        kfree_skb(skb);
        return 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 896ce3f8f53a..4e9c74b54b15 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb)
                        goto drop_nolock;
                if (flags&GRE_CSUM) {
-                        if (skb->ip_summed == CHECKSUM_HW) {
+                        switch (skb->ip_summed) {
+                        case CHECKSUM_HW:
                                csum = (u16)csum_fold(skb->csum);
-                                if (csum)
+                                if (!csum)
-                                        skb->ip_summed = CHECKSUM_NONE;
+                                        break;
-                        }
+                                /* fall through */
-                        if (skb->ip_summed == CHECKSUM_NONE) {
+                        case CHECKSUM_NONE:
-                                skb->csum = skb_checksum(skb, 0, skb->len, 0);
+                                skb->csum = 0;
+                                csum = __skb_checksum_complete(skb);
                                skb->ip_summed = CHECKSUM_HW;
-                                csum = (u16)csum_fold(skb->csum);
                        }
                        offset += 4;
                }
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5198f3a1e2cd..e4d6b268e8c4 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/icmp.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/netfilter.h>
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
        case CHECKSUM_HW:
                if (!(u16)csum_fold(skb->csum)) 
                        break;
-                if (LOG_INVALID(IPPROTO_ICMP))
+                /* fall through */
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-                                      "ip_ct_icmp: bad HW ICMP checksum ");
-                return -NF_ACCEPT;
        case CHECKSUM_NONE:
-                if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+                skb->csum = 0;
+                if (__skb_checksum_complete(skb)) {
                        if (LOG_INVALID(IPPROTO_ICMP))
                                nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                              "ip_ct_icmp: bad ICMP checksum ");
                        return -NF_ACCEPT;
                }
-        default:
-                break;
        }
 checksum_skipped:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 652685623519..01444a02b48b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
                .proc_handler   = &proc_tcp_congestion_control,
                .strategy       = &sysctl_tcp_congestion_control,
        },
+        {
+                .ctl_name       = NET_TCP_ABC,
+                .procname       = "tcp_abc",
+                .data           = &sysctl_tcp_abc,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
        { .ctl_name = 0 }
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22e1ea5..9ac7a4f46bd8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        } else if (tcp_need_reset(old_state) ||
                   (tp->snd_nxt != tp->write_seq &&
                    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
-                /* The last check adjusts for discrepance of Linux wrt. RFC
+                /* The last check adjusts for discrepancy of Linux wrt. RFC
                 * states
                 */
                tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        tp->packets_out = 0;
        tp->snd_ssthresh = 0x7fffffff;
        tp->snd_cwnd_cnt = 0;
+        tp->bytes_acked = 0;
        tcp_set_ca_state(sk, TCP_CA_Open);
        tcp_clear_retrans(tp);
        inet_csk_delack_init(sk);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ae35e0609047..1d0cd86621b1 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
        bictcp_low_utilization(sk, data_acked);
-        if (in_flight < tp->snd_cwnd)
+        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                /* In "safe" area, increase. */
+                tcp_slow_start(tp);
-                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+        else {
-                        tp->snd_cwnd++;
-        } else {
                bictcp_update(ca, tp->snd_cwnd);
-                /* In dangerous area, increase slowly.
+                /* In dangerous area, increase slowly.
                 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
                 */
                if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d6624e89..c7cc62c8dc12 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (in_flight < tp->snd_cwnd)
+        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+        /* In "safe" area, increase. */
-                /* In "safe" area, increase. */
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                tcp_slow_start(tp);
-                        tp->snd_cwnd++;
-        } else {
+        /* In dangerous area, increase slowly. */
-                /* In dangerous area, increase slowly.
+        else if (sysctl_tcp_abc) {
-                 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+                /* RFC3465: Apppriate Byte Count
-                 */
+                 * increase once for each full cwnd acked
-                if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+                 */
-                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
-                                tp->snd_cwnd++;
+                        tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
-                        tp->snd_cwnd_cnt = 0;
+                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-                } else
+                                tp->snd_cwnd++;
-                        tp->snd_cwnd_cnt++;
+                }
-        }
+        } else {
+                /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+                if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                                tp->snd_cwnd++;
+                        tp->snd_cwnd_cnt = 0;
+                } else
+                        tp->snd_cwnd_cnt++;
+        }
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04bde080..82b3c189bd7d 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
 }
 static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
-                             u32 in_flight, int good)
+                             u32 in_flight, u32 pkts_acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct hstcp *ca = inet_csk_ca(sk);
-        if (in_flight < tp->snd_cwnd)
+        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                tcp_slow_start(tp);
-                        tp->snd_cwnd++;
+        else {
-        } else {
                /* Update AIMD parameters */
                if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
                        while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b37984e95..3284cfb993e6 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
        struct tcp_sock *tp = tcp_sk(sk);
        struct htcp *ca = inet_csk_ca(sk);
-        if (in_flight < tp->snd_cwnd)
+        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                /* In "safe" area, increase. */
+                tcp_slow_start(tp);
-                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+        else {
-                        tp->snd_cwnd++;
-        } else {
                measure_rtt(sk);
                /* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                        htcp_alpha_update(ca);
                }
-                /* In dangerous area, increase slowly.
+                /* In dangerous area, increase slowly.
                 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
                 */
                if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63623df..40dbb3877510 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                ca->minrtt = tp->srtt;
        }
+        if (!tcp_is_cwnd_limited(sk, in_flight))
+                return;
        if (!ca->hybla_en)
                return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
-        if (in_flight < tp->snd_cwnd)
-                return;
        if (ca->rho == 0)
                hybla_recalc_param(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e98b57578dc..40a26b7157b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
 *              Andi Kleen      :       Moved open_request checking here
 *                                      and process RSTs for open_requests.
 *              Andi Kleen      :       Better prune_queue, and other fixes.
- *              Andrey Savochkin:       Fix RTT measurements in the presnce of
+ *              Andrey Savochkin:       Fix RTT measurements in the presence of
 *                                      timestamps.
 *              Andrey Savochkin:       Check sequence numbers correctly when
 *                                      removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 #define FLAG_DATA               0x01 /* Incoming frame contained data.          */
 #define FLAG_WIN_UPDATE         0x02 /* Incoming ACK was a window update.       */
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
 *   of receiver window. Check #2.
 *
 * The scheme does not work when sender sends good segments opening
- * window and then starts to feed us spagetti. But it should work
+ * window and then starts to feed us spaghetti. But it should work
 * in common situations. Otherwise, we have to rely on queue collapsing.
 */
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 {
        /* Optimize this! */
        int truesize = tcp_win_from_space(skb->truesize)/2;
-        int window = tcp_full_space(sk)/2;
+        int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
        while (tp->rcv_ssthresh <= window) {
                if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
        int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
        /* Try to select rcvbuf so that 4 mss-sized segments
-         * will fit to window and correspoding skbs will fit to our rcvbuf.
+         * will fit to window and corresponding skbs will fit to our rcvbuf.
         * (was 3; 4 is minimum to allow fast retransmit to work.)
         */
        while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
                sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
 }
-/* 4. Try to fixup all. It is made iimediately after connection enters
+/* 4. Try to fixup all. It is made immediately after connection enters
 *    established state.
 */
 static void tcp_init_buffer_space(struct sock *sk)
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-        struct sk_buff *skb;
-        unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
-        int ofo_win = 0;
        icsk->icsk_ack.quick = 0;
-        skb_queue_walk(&tp->out_of_order_queue, skb) {
+        if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
-                ofo_win += skb->len;
+            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-        }
+            !tcp_memory_pressure &&
+            atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
-        /* If overcommit is due to out of order segments,
+                sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-         * do not clamp window. Try to expand rcvbuf instead.
+                                    sysctl_tcp_rmem[2]);
-         */
-        if (ofo_win) {
-                if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
-                    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-                    !tcp_memory_pressure &&
-                    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
-                        sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-                                            sysctl_tcp_rmem[2]);
        }
-        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
+        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
-                app_win += ofo_win;
-                if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
-                        app_win >>= 1;
-                if (app_win > icsk->icsk_ack.rcv_mss)
-                        app_win -= icsk->icsk_ack.rcv_mss;
-                app_win = max(app_win, 2U*tp->advmss);
                tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
-        }
 }
 /* Receiver "autotuning" code.
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                 * are stalled on filesystem I/O.
                 *
                 * Also, since we are only going for a minimum in the
-                 * non-timestamp case, we do not smoothe things out
+                 * non-timestamp case, we do not smoother things out
-                 * else with timestamps disabled convergance takes too
+                 * else with timestamps disabled convergence takes too
                 * long.
                 */
                if (!win_dep) {
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                } else if (m < new_sample)
                        new_sample = m << 3;
        } else {
-                /* No previous mesaure. */
+                /* No previous measure. */
                new_sample = m << 3;
        }
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
                        if (icsk->icsk_ack.ato > icsk->icsk_rto)
                                icsk->icsk_ack.ato = icsk->icsk_rto;
                } else if (m > icsk->icsk_rto) {
-                        /* Too long gap. Apparently sender falled to
+                        /* Too long gap. Apparently sender failed to
                         * restart window, so that we send ACKs quickly.
                         */
                        tcp_incr_quickack(sk);
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 * To save cycles in the RFC 1323 implementation it was better to break
 * it up into three procedures. -- erics
 */
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        const struct inet_connection_sock *icsk = inet_csk(sk);
        long m = mrtt; /* RTT */
        /*      The following amusing code comes from Jacobson's
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
         *
         * Funny. This algorithm seems to be very broken.
         * These formulae increase RTO, when it should be decreased, increase
-         * too slowly, when it should be incresed fastly, decrease too fastly
+         * too slowly, when it should be increased fastly, decrease too fastly
         * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
         * does not matter how to _calculate_ it. Seems, it was trap
         * that VJ failed to avoid. 8)
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
                tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
                tp->rtt_seq = tp->snd_nxt;
        }
-        if (icsk->icsk_ca_ops->rtt_sample)
-                icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
 }
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
         *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
         *    to do with delayed acks, because at cwnd>2 true delack timeout
         *    is invisible. Actually, Linux-2.4 also generates erratic
-         *    ACKs in some curcumstances.
+         *    ACKs in some circumstances.
         */
        inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
        /* 2. Fixups made earlier cannot be right.
         *    If we do not estimate RTO correctly without them,
         *    all the algo is pure shit and should be replaced
-         *    with correct one. It is exaclty, which we pretend to do.
+         *    with correct one. It is exactly, which we pretend to do.
         */
 }
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
         * to make it more realistic.
         *
         * A bit of theory. RTT is time passed after "normal" sized packet
-         * is sent until it is ACKed. In normal curcumstances sending small
+         * is sent until it is ACKed. In normal circumstances sending small
         * packets force peer to delay ACKs and calculation is correct too.
         * The algorithm is adaptive and, provided we follow specs, it
         * NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        int prior_fackets;
        u32 lost_retrans = 0;
        int flag = 0;
+        int dup_sack = 0;
        int i;
        if (!tp->sacked_out)
                tp->fackets_out = 0;
        prior_fackets = tp->fackets_out;
-        for (i=0; i<num_sacks; i++, sp++) {
+        /* SACK fastpath:
-                struct sk_buff *skb;
+         * if the only SACK change is the increase of the end_seq of
-                __u32 start_seq = ntohl(sp->start_seq);
+         * the first block then only apply that SACK block
-                __u32 end_seq = ntohl(sp->end_seq);
+         * and use retrans queue hinting otherwise slowpath */
-                int fack_count = 0;
+        flag = 1;
-                int dup_sack = 0;
+        for (i = 0; i< num_sacks; i++) {
+                __u32 start_seq = ntohl(sp[i].start_seq);
+                __u32 end_seq =  ntohl(sp[i].end_seq);
+                if (i == 0){
+                        if (tp->recv_sack_cache[i].start_seq != start_seq)
+                                flag = 0;
+                } else {
+                        if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+                            (tp->recv_sack_cache[i].end_seq != end_seq))
+                                flag = 0;
+                }
+                tp->recv_sack_cache[i].start_seq = start_seq;
+                tp->recv_sack_cache[i].end_seq = end_seq;
                /* Check for D-SACK. */
                if (i == 0) {
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        if (before(ack, prior_snd_una - tp->max_window))
                                return 0;
                }
+        }
+        if (flag)
+                num_sacks = 1;
+        else {
+                int j;
+                tp->fastpath_skb_hint = NULL;
+                /* order SACK blocks to allow in order walk of the retrans queue */
+                for (i = num_sacks-1; i > 0; i--) {
+                        for (j = 0; j < i; j++){
+                                if (after(ntohl(sp[j].start_seq),
+                                          ntohl(sp[j+1].start_seq))){
+                                        sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+                                        sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+                                        sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+                                        sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+                                }
+                        }
+                }
+        }
+        /* clear flag as used for different purpose in following code */
+        flag = 0;
+        for (i=0; i<num_sacks; i++, sp++) {
+                struct sk_buff *skb;
+                __u32 start_seq = ntohl(sp->start_seq);
+                __u32 end_seq = ntohl(sp->end_seq);
+                int fack_count;
+                /* Use SACK fastpath hint if valid */
+                if (tp->fastpath_skb_hint) {
+                        skb = tp->fastpath_skb_hint;
+                        fack_count = tp->fastpath_cnt_hint;
+                } else {
+                        skb = sk->sk_write_queue.next;
+                        fack_count = 0;
+                }
                /* Event "B" in the comment above. */
                if (after(end_seq, tp->high_seq))
                        flag |= FLAG_DATA_LOST;
-                sk_stream_for_retrans_queue(skb, sk) {
+                sk_stream_for_retrans_queue_from(skb, sk) {
                        int in_sack, pcount;
                        u8 sacked;
+                        tp->fastpath_skb_hint = skb;
+                        tp->fastpath_cnt_hint = fack_count;
                        /* The retransmission queue is always in order, so
                         * we can short-circuit the walk early.
                         */
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                                TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
                                                tp->lost_out -= tcp_skb_pcount(skb);
                                                tp->retrans_out -= tcp_skb_pcount(skb);
+                                                /* clear lost hint */
+                                                tp->retransmit_skb_hint = NULL;
                                        }
                                } else {
                                        /* New sack for not retransmitted frame,
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                        if (sacked & TCPCB_LOST) {
                                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                                                tp->lost_out -= tcp_skb_pcount(skb);
+                                                /* clear lost hint */
+                                                tp->retransmit_skb_hint = NULL;
                                        }
                                }
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                            (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                                tp->retrans_out -= tcp_skb_pcount(skb);
+                                tp->retransmit_skb_hint = NULL;
                        }
                }
        }
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                                tp->retrans_out -= tcp_skb_pcount(skb);
+                                /* clear lost hint */
+                                tp->retransmit_skb_hint = NULL;
                                if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
                                        tp->lost_out += tcp_skb_pcount(skb);
                                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->frto_highmark;
        TCP_ECN_queue_cwr(tp);
+        clear_all_retrans_hints(tp);
 }
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
        tp->snd_cwnd_cnt   = 0;
        tp->snd_cwnd_stamp = tcp_time_stamp;
+        tp->bytes_acked = 0;
        tcp_clear_retrans(tp);
        /* Push undo marker, if it was plain RTO and nothing
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
+        clear_all_retrans_hints(tp);
 }
 static int tcp_check_sack_reneging(struct sock *sk)
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
                               int packets, u32 high_seq)
 {
        struct sk_buff *skb;
-        int cnt = packets;
+        int cnt;
-        BUG_TRAP(cnt <= tp->packets_out);
+        BUG_TRAP(packets <= tp->packets_out);
+        if (tp->lost_skb_hint) {
+                skb = tp->lost_skb_hint;
+                cnt = tp->lost_cnt_hint;
+        } else {
+                skb = sk->sk_write_queue.next;
+                cnt = 0;
+        }
-        sk_stream_for_retrans_queue(skb, sk) {
+        sk_stream_for_retrans_queue_from(skb, sk) {
-                cnt -= tcp_skb_pcount(skb);
+                /* TODO: do this better */
-                if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+                /* this is not the most efficient way to do this... */
+                tp->lost_skb_hint = skb;
+                tp->lost_cnt_hint = cnt;
+                cnt += tcp_skb_pcount(skb);
+                if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
                        break;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
+                        /* clear xmit_retransmit_queue hints
+                         *  if this is beyond hint */
+                        if(tp->retransmit_skb_hint != NULL &&
+                           before(TCP_SKB_CB(skb)->seq,
+                                  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
+                                tp->retransmit_skb_hint = NULL;
+                        }
                }
        }
        tcp_sync_left_out(tp);
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
        if (tcp_head_timedout(sk, tp)) {
                struct sk_buff *skb;
-                sk_stream_for_retrans_queue(skb, sk) {
+                skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
-                        if (tcp_skb_timedout(sk, skb) &&
+                        : sk->sk_write_queue.next;
-                            !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+                sk_stream_for_retrans_queue_from(skb, sk) {
+                        if (!tcp_skb_timedout(sk, skb))
+                                break;
+                        if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                                tp->lost_out += tcp_skb_pcount(skb);
+                                /* clear xmit_retrans hint */
+                                if (tp->retransmit_skb_hint &&
+                                    before(TCP_SKB_CB(skb)->seq,
+                                           TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+                                        tp->retransmit_skb_hint = NULL;
                        }
                }
+                tp->scoreboard_skb_hint = skb;
                tcp_sync_left_out(tp);
        }
 }
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
        }
        tcp_moderate_cwnd(tp);
        tp->snd_cwnd_stamp = tcp_time_stamp;
+        /* There is something screwy going on with the retrans hints after
+           an undo */
+        clear_all_retrans_hints(tp);
 }
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
                sk_stream_for_retrans_queue(skb, sk) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                }
+                clear_all_retrans_hints(tp);
                DBGUNDO(sk, tp, "partial loss");
                tp->lost_out = 0;
                tp->left_out = tp->sacked_out;
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                        TCP_ECN_queue_cwr(tp);
                }
+                tp->bytes_acked = 0;
                tp->snd_cwnd_cnt = 0;
                tcp_set_ca_state(sk, TCP_CA_Recovery);
        }
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 }
 /* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+ * with this code. (Supersedes RFC1323)
 */
-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
 {
        /* RTTM Rule: A TSecr value received in a segment is used to
         * update the averaged RTT measurement only if the segment
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
         * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
         *
         * Changed: reset backoff as soon as we see the first valid sample.
-         * If we do not, we get strongly overstimated rto. With timestamps
+         * If we do not, we get strongly overestimated rto. With timestamps
         * samples are accepted even from very old segments: f.e., when rtt=1
         * increases to 8, we retransmit 5 times and after 8 seconds delayed
         * answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
         */
        struct tcp_sock *tp = tcp_sk(sk);
        const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-        tcp_rtt_estimator(sk, seq_rtt, usrtt);
+        tcp_rtt_estimator(sk, seq_rtt);
        tcp_set_rto(sk);
        inet_csk(sk)->icsk_backoff = 0;
        tcp_bound_rto(sk);
 }
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
 {
        /* We don't have a timestamp. Can only use
         * packets that are not retransmitted to determine
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
        if (flag & FLAG_RETRANS_DATA_ACKED)
                return;
-        tcp_rtt_estimator(sk, seq_rtt, usrtt);
+        tcp_rtt_estimator(sk, seq_rtt);
        tcp_set_rto(sk);
        inet_csk(sk)->icsk_backoff = 0;
        tcp_bound_rto(sk);
 }
 static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-                                      const s32 seq_rtt, u32 *usrtt)
+                                      const s32 seq_rtt)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-                tcp_ack_saw_tstamp(sk, usrtt, flag);
+                tcp_ack_saw_tstamp(sk, flag);
        else if (seq_rtt >= 0)
-                tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
+                tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
        return acked;
 }
+static inline u32 tcp_usrtt(const struct sk_buff *skb)
+{
+        struct timeval tv, now;
+        do_gettimeofday(&now);
+        skb_get_timestamp(skb, &tv);
+        return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+}
 /* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        __u32 now = tcp_time_stamp;
        int acked = 0;
        __s32 seq_rtt = -1;
-        struct timeval usnow;
        u32 pkts_acked = 0;
+        void (*rtt_sample)(struct sock *sk, u32 usrtt)
-        if (seq_usrtt)
+                = icsk->icsk_ca_ops->rtt_sample;
-                do_gettimeofday(&usnow);
        while ((skb = skb_peek(&sk->sk_write_queue)) &&
               skb != sk->sk_send_head) {
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                                        tp->retrans_out -= tcp_skb_pcount(skb);
                                acked |= FLAG_RETRANS_DATA_ACKED;
                                seq_rtt = -1;
-                        } else if (seq_rtt < 0)
+                        } else if (seq_rtt < 0) {
                                seq_rtt = now - scb->when;
-                        if (seq_usrtt) {
+                                if (rtt_sample)
-                                struct timeval tv;
+                                        (*rtt_sample)(sk, tcp_usrtt(skb));
-                        
-                                skb_get_timestamp(skb, &tv);
-                                *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
-                                        + (usnow.tv_usec - tv.tv_usec);
                        }
                        if (sacked & TCPCB_SACKED_ACKED)
                                tp->sacked_out -= tcp_skb_pcount(skb);
                        if (sacked & TCPCB_LOST)
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                                    !before(scb->end_seq, tp->snd_up))
                                        tp->urg_mode = 0;
                        }
-                } else if (seq_rtt < 0)
+                } else if (seq_rtt < 0) {
                        seq_rtt = now - scb->when;
+                        if (rtt_sample)
+                                (*rtt_sample)(sk, tcp_usrtt(skb));
+                }
                tcp_dec_pcount_approx(&tp->fackets_out, skb);
                tcp_packets_out_dec(tp, skb);
                __skb_unlink(skb, &sk->sk_write_queue);
                sk_stream_free_skb(sk, skb);
+                clear_all_retrans_hints(tp);
        }
        if (acked&FLAG_ACKED) {
-                const struct inet_connection_sock *icsk = inet_csk(sk);
+                tcp_ack_update_rtt(sk, acked, seq_rtt);
-                tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
                tcp_ack_packets_out(sk, tp);
                if (icsk->icsk_ca_ops->pkts_acked)
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
        }
        /* F-RTO affects on two new ACKs following RTO.
-         * At latest on third ACK the TCP behavor is back to normal.
+         * At latest on third ACK the TCP behavior is back to normal.
         */
        tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
        u32 prior_in_flight;
        s32 seq_rtt;
-        s32 seq_usrtt = 0;
        int prior_packets;
        /* If the ack is newer than sent or older than previous acks
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        if (before(ack, prior_snd_una))
                goto old_ack;
+        if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+                tp->bytes_acked += ack - prior_snd_una;
        if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
                /* Window is constant, pure forward advance.
                 * No more checks are required.
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        prior_in_flight = tcp_packets_in_flight(tp);
        /* See if we can take anything off of the retransmit queue. */
-        flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
+        flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
-                                    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
        if (tp->frto_counter)
                tcp_process_frto(sk, prior_snd_una);
        if (tcp_ack_is_dubious(sk, flag)) {
-                /* Advanve CWND, if state allows this. */
+                /* Advance CWND, if state allows this. */
                if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
                        tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
                tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 {
        struct sk_buff *skb;
-        /* First, check that queue is collapsable and find
+        /* First, check that queue is collapsible and find
         * the point where collapsing can be useful. */
        for (skb = head; skb != tail; ) {
                /* No new bits? It is possible on ofo queue. */
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
 /*
 *      This routine is only called when we have urgent data
- *      signalled. Its the 'slow' part of tcp_urg. It could be
+ *      signaled. Its the 'slow' part of tcp_urg. It could be
 *      moved inline now as tcp_urg is only called from one
 *      place. We handle URGent data wrong. We have to - as
 *      BSD still doesn't use the correction from RFC961.
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
         * urgent. To do this requires some care. We cannot just ignore
         * tp->copied_seq since we would read the last urgent byte again
         * as data, nor can we alter copied_seq until this data arrives
-         * or we break the sematics of SIOCATMARK (and thus sockatmark())
+         * or we break the semantics of SIOCATMARK (and thus sockatmark())
         *
         * NOTE. Double Dutch. Rendering to plain English: author of comment
         * above did something sort of  send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
        tp->rx_opt.saw_tstamp = 0;
        /*      pred_flags is 0xS?10 << 16 + snd_wnd
-         *      if header_predition is to be made
+         *      if header_prediction is to be made
         *      'S' will always be tp->tcp_header_len >> 2
         *      '?' will be 0 for the fast path, otherwise pred_flags is 0 to
         *  turn it off (when there are holes in the receive 
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                 */
                                if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
                                    !tp->srtt)
-                                        tcp_ack_saw_tstamp(sk, NULL, 0);
+                                        tcp_ack_saw_tstamp(sk, 0);
                                if (tp->rx_opt.tstamp_ok)
                                        tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4372,6 +4471,7 @@ discard:
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 634dabb558fd..4d5021e1929b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
 *                                      request_sock handling and moved
 *                                      most of it into the af independent code.
 *                                      Added tail drop and some other bugfixes.
- *                                      Added new listen sematics.
+ *                                      Added new listen semantics.
 *              Mike McLagan    :       Routing by source
 *      Juan Jose Ciarlante:            ip_dynaddr bits
 *              Andi Kleen:             various fixes.
@@ -1110,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
        if (skb->ip_summed == CHECKSUM_HW) {
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-                                  skb->nh.iph->daddr, skb->csum))
+                                  skb->nh.iph->daddr, skb->csum)) {
+                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        return 0;
+                }
-                LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
-                skb->ip_summed = CHECKSUM_NONE;
        }
+        skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+                                       skb->len, IPPROTO_TCP, 0);
        if (skb->len <= 76) {
-                if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
+                return __skb_checksum_complete(skb);
-                                 skb->nh.iph->daddr,
-                                 skb_checksum(skb, 0, skb->len, 0)))
-                        return -1;
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        } else {
-                skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
-                                          skb->nh.iph->saddr,
-                                          skb->nh.iph->daddr, 0);
        }
        return 0;
 }
@@ -1216,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
        /* An explanation is required here, I think.
         * Packet length and doff are validated by header prediction,
-         * provided case of th->doff==0 is elimineted.
+         * provided case of th->doff==0 is eliminated.
         * So, we defer the checks. */
        if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-             tcp_v4_checksum_init(skb) < 0))
+             tcp_v4_checksum_init(skb)))
                goto bad_packet;
        th = skb->h.th;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4a..1b66a2ac4321 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
                /* I am shamed, but failed to make it more elegant.
                 * Yes, it is direct reference to IP, which is impossible
                 * to generalize to IPv6. Taking into account that IPv6
-                 * do not undertsnad recycling in any case, it not
+                 * do not understand recycling in any case, it not
                 * a big problem in practice. --ANK */
                if (tw->tw_family == AF_INET &&
                    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
                /* In window segment, it may be only reset or bare ack. */
                if (th->rst) {
-                        /* This is TIME_WAIT assasination, in two flavors.
+                        /* This is TIME_WAIT assassination, in two flavors.
                         * Oh well... nobody has a sufficient solution to this
                         * protocol bug yet.
                         */
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                 */
                newtp->snd_cwnd = 2;
                newtp->snd_cwnd_cnt = 0;
+                newtp->bytes_acked = 0;
                newtp->frto_counter = 0;
                newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
        /* RFC793 page 36: "If the connection is in any non-synchronized state ...
         *                  and the incoming segment acknowledges something not yet
-         *                  sent (the segment carries an unaccaptable ACK) ...
+         *                  sent (the segment carries an unacceptable ACK) ...
         *                  a reset is sent."
         *
         * Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456a79f4..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
        u16 flags;
        BUG_ON(len > skb->len);
+        clear_all_retrans_hints(tp);
        nsize = skb_headlen(skb) - len;
        if (nsize < 0)
                nsize = 0;
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
   for TCP options, but includes only bare TCP header.
   tp->rx_opt.mss_clamp is mss negotiated at connection setup.
-   It is minumum of user_mss and mss received with SYN.
+   It is minimum of user_mss and mss received with SYN.
   It also does not include TCP options.
   tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-        /* MSS for the peer's data.  Previous verions used mss_clamp
+        /* MSS for the peer's data.  Previous versions used mss_clamp
         * here.  I don't know if the value based on our guesses
         * of peer's MSS is better for the performance.  It's more correct
         * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                BUG_ON(tcp_skb_pcount(skb) != 1 ||
                       tcp_skb_pcount(next_skb) != 1);
-                /* Ok.  We will be able to collapse the packet. */
+                /* changing transmit queue under us so clear hints */
+                clear_all_retrans_hints(tp);
+                /* Ok.  We will be able to collapse the packet. */
                __skb_unlink(next_skb, &sk->sk_write_queue);
                memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
                }
        }
+        clear_all_retrans_hints(tp);
        if (!lost)
                return;
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        int err;
        /* Do not sent more than we queued. 1/4 is reserved for possible
-         * copying overhead: frgagmentation, tunneling, mangling etc.
+         * copying overhead: fragmentation, tunneling, mangling etc.
         */
        if (atomic_read(&sk->sk_wmem_alloc) >
            min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-        int packet_cnt = tp->lost_out;
+        int packet_cnt;
+        if (tp->retransmit_skb_hint) {
+                skb = tp->retransmit_skb_hint;
+                packet_cnt = tp->retransmit_cnt_hint;
+        }else{
+                skb = sk->sk_write_queue.next;
+                packet_cnt = 0;
+        }
        /* First pass: retransmit lost packets. */
-        if (packet_cnt) {
+        if (tp->lost_out) {
-                sk_stream_for_retrans_queue(skb, sk) {
+                sk_stream_for_retrans_queue_from(skb, sk) {
                        __u8 sacked = TCP_SKB_CB(skb)->sacked;
+                        /* we could do better than to assign each time */
+                        tp->retransmit_skb_hint = skb;
+                        tp->retransmit_cnt_hint = packet_cnt;
                        /* Assume this retransmit will generate
                         * only one packet for congestion window
                         * calculation purposes.  This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                        if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
                                return;
-                        if (sacked&TCPCB_LOST) {
+                        if (sacked & TCPCB_LOST) {
                                if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-                                        if (tcp_retransmit_skb(sk, skb))
+                                        if (tcp_retransmit_skb(sk, skb)) {
+                                                tp->retransmit_skb_hint = NULL;
                                                return;
+                                        }
                                        if (icsk->icsk_ca_state != TCP_CA_Loss)
                                                NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
                                        else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                                                                          TCP_RTO_MAX);
                                }
-                                packet_cnt -= tcp_skb_pcount(skb);
+                                packet_cnt += tcp_skb_pcount(skb);
-                                if (packet_cnt <= 0)
+                                if (packet_cnt >= tp->lost_out)
                                        break;
                        }
                }
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
        if (tcp_may_send_now(sk, tp))
                return;
-        packet_cnt = 0;
+        if (tp->forward_skb_hint) {
+                skb = tp->forward_skb_hint;
+                packet_cnt = tp->forward_cnt_hint;
+        } else{
+                skb = sk->sk_write_queue.next;
+                packet_cnt = 0;
+        }
+        sk_stream_for_retrans_queue_from(skb, sk) {
+                tp->forward_cnt_hint = packet_cnt;
+                tp->forward_skb_hint = skb;
-        sk_stream_for_retrans_queue(skb, sk) {
                /* Similar to the retransmit loop above we
                 * can pretend that the retransmitted SKB
                 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                        continue;
                /* Ok, retransmit it. */
-                if (tcp_retransmit_skb(sk, skb))
+                if (tcp_retransmit_skb(sk, skb)) {
+                        tp->forward_skb_hint = NULL;
                        break;
+                }
                if (skb == skb_peek(&sk->sk_write_queue))
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770bf5522..26d7486ee501 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                                    u32 in_flight, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (in_flight < tp->snd_cwnd)
+        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                tp->snd_cwnd++;
+                tcp_slow_start(tp);
-        } else {
+        else {
                tp->snd_cwnd_cnt++;
                if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
-                        tp->snd_cwnd++;
+                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                                tp->snd_cwnd++;
                        tp->snd_cwnd_cnt = 0;
                }
        }
-        tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47ac1c5..e1880959614a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
 * to prevent DoS attacks. It is called when a retransmission timeout
 * or zero probe timeout occurs on orphaned socket.
 *
- * Criterium is still not confirmed experimentally and may change.
+ * Criteria is still not confirmed experimentally and may change.
 * We kill the socket, if:
 * 1. If number of orphaned sockets exceeds an administratively configured
 *    limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
                           hole detection. :-(
                           It is place to make it. It is not made. I do not want
-                           to make it. It is disguisting. It does not work in any
+                           to make it. It is disgusting. It does not work in any
                           case. Let me to cite the same draft, which requires for
                           us to implement this:
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92070f9..b7d296a8ac6d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                        /* We don't have enough RTT samples to do the Vegas
                         * calculation, so we'll behave like Reno.
                         */
-                        if (tp->snd_cwnd > tp->snd_ssthresh)
+                        tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
-                                tp->snd_cwnd++;
                } else {
                        u32 rtt, target_cwnd, diff;
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                         */
                        diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
-                        if (tp->snd_cwnd < tp->snd_ssthresh) {
+                        if (tp->snd_cwnd <= tp->snd_ssthresh) {
                                /* Slow start.  */
                                if (diff > gamma) {
                                        /* Going too fast. Time to slow down
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                                                            V_PARAM_SHIFT)+1);
                                }
+                                tcp_slow_start(tp);
                        } else {
                                /* Congestion avoidance. */
                                u32 next_snd_cwnd;
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                                else if (next_snd_cwnd < tp->snd_cwnd)
                                        tp->snd_cwnd--;
                        }
-                }
-                /* Wipe the slate clean for the next RTT. */
+                        if (tp->snd_cwnd < 2)
-                vegas->cntRTT = 0;
+                                tp->snd_cwnd = 2;
-                vegas->minRTT = 0x7fffffff;
+                        else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+                                tp->snd_cwnd = tp->snd_cwnd_clamp;
+                }
        }
-        /* The following code is executed for every ack we receive,
+        /* Wipe the slate clean for the next RTT. */
-         * except for conditions checked in should_advance_cwnd()
+        vegas->cntRTT = 0;
-         * before the call to tcp_cong_avoid(). Mainly this means that
+        vegas->minRTT = 0x7fffffff;
-         * we only execute this code if the ack actually acked some
-         * data.
-         */
-        /* If we are in slow start, increase our cwnd in response to this ACK.
-         * (If we are not in slow start then we are in congestion avoidance,
-         * and adjust our congestion window only once per RTT. See the code
-         * above.)
-         */
-        if (tp->snd_cwnd <= tp->snd_ssthresh)
-                tp->snd_cwnd++;
-        /* to keep cwnd from growing without bound */
-        tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-        /* Make sure that we are never so timid as to reduce our cwnd below
-         * 2 MSS.
-         *
-         * Going below 2 MSS would risk huge delayed ACKs from our receiver.
-         */
-        tp->snd_cwnd = max(tp->snd_cwnd, 2U);
 }
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0bd1013cb0d..2422a5f7195d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
 {
-        return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+        return __skb_checksum_complete(skb);
 }
 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
        if (uh->check == 0) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
        } else if (skb->ip_summed == CHECKSUM_HW) {
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
-                        return 0;
+                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-                LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
-                skb->ip_summed = CHECKSUM_NONE;
        }
        if (skb->ip_summed != CHECKSUM_UNNECESSARY)
                skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 23e540365a14..1bdf0fb8bf8a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -585,17 +585,16 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
        daddr = &skb->nh.ipv6h->daddr;
        /* Perform checksum. */
-        if (skb->ip_summed == CHECKSUM_HW) {
+        switch (skb->ip_summed) {
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
+        case CHECKSUM_HW:
-                if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+                if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-                                    skb->csum)) {
+                                     skb->csum))
-                        LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
+                        break;
-                        skb->ip_summed = CHECKSUM_NONE;
+                /* fall through */
-                }
+        case CHECKSUM_NONE:
-        }
+                skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
-        if (skb->ip_summed == CHECKSUM_NONE) {
+                                             IPPROTO_ICMPV6, 0);
-                if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+                if (__skb_checksum_complete(skb)) {
-                                    skb_checksum(skb, 0, skb->len, 0))) {
                        LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
                                       NIP6(*saddr), NIP6(*daddr));
                        goto discard_it;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 651c79b41eeb..8e9628f1c4c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -298,13 +298,10 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
        if ((raw6_sk(sk)->checksum || sk->sk_filter) && 
-            skb->ip_summed != CHECKSUM_UNNECESSARY) {
+            skb_checksum_complete(skb)) {
-                if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+                /* FIXME: increment a raw6 drops counter here */
-                        /* FIXME: increment a raw6 drops counter here */
+                kfree_skb(skb);
-                        kfree_skb(skb);
+                return 0;
-                        return 0;
-                }
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
        /* Charge it to the socket. */
@@ -337,32 +334,25 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
        if (!rp->checksum)
                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+        if (skb->ip_summed == CHECKSUM_HW) {
-                if (skb->ip_summed == CHECKSUM_HW) {
+                skb_postpull_rcsum(skb, skb->nh.raw,
-                        skb_postpull_rcsum(skb, skb->nh.raw,
+                                   skb->h.raw - skb->nh.raw);
-                                           skb->h.raw - skb->nh.raw);
+                if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+                                     &skb->nh.ipv6h->daddr,
+                                     skb->len, inet->num, skb->csum))
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-                        if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-                                            &skb->nh.ipv6h->daddr,
-                                            skb->len, inet->num, skb->csum)) {
-                                LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
-                                skb->ip_summed = CHECKSUM_NONE;
-                        }
-                }
-                if (skb->ip_summed == CHECKSUM_NONE)
-                        skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-                                                     &skb->nh.ipv6h->daddr,
-                                                     skb->len, inet->num, 0);
        }
+        if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+                skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+                                             &skb->nh.ipv6h->daddr,
+                                             skb->len, inet->num, 0);
        if (inet->hdrincl) {
-                if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+                if (skb_checksum_complete(skb)) {
-                    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
                        /* FIXME: increment a raw6 drops counter here */
                        kfree_skb(skb);
                        return 0;
                }
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
        rawv6_rcv_skb(sk, skb);
@@ -407,7 +397,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
        if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
                err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
        } else if (msg->msg_flags&MSG_TRUNC) {
-                if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+                if (__skb_checksum_complete(skb))
                        goto csum_copy_err;
                err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
        } else {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d746d3b27efb..62c0e5bd931c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1401,20 +1401,18 @@ out:
 static int tcp_v6_checksum_init(struct sk_buff *skb)
 {
        if (skb->ip_summed == CHECKSUM_HW) {
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-                                  &skb->nh.ipv6h->daddr,skb->csum))
+                                  &skb->nh.ipv6h->daddr,skb->csum)) {
+                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        return 0;
-                LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
+                }
        }
+        skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+                                  &skb->nh.ipv6h->daddr, 0);
        if (skb->len <= 76) {
-                if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+                return __skb_checksum_complete(skb);
-                                 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
-                        return -1;
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        } else {
-                skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-                                          &skb->nh.ipv6h->daddr,0);
        }
        return 0;
 }
@@ -1575,7 +1573,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                goto discard_it;
        if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-             tcp_v6_checksum_init(skb) < 0))
+             tcp_v6_checksum_init(skb)))
                goto bad_packet;
        th = skb->h.th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index bf9519341fd3..e671153b47b2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -248,7 +248,7 @@ try_again:
                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
                                              copied);
        } else if (msg->msg_flags&MSG_TRUNC) {
-                if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+                if (__skb_checksum_complete(skb))
                        goto csum_copy_err;
                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
                                              copied);
@@ -363,13 +363,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
                return -1;
        }
-        if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+        if (skb_checksum_complete(skb)) {
-                if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+                UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
-                        UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+                kfree_skb(skb);
-                        kfree_skb(skb);
+                return 0;
-                        return 0;
-                }
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
        if (sock_queue_rcv_skb(sk,skb)<0) {
@@ -491,13 +488,10 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                uh = skb->h.uh;
        }
-        if (skb->ip_summed==CHECKSUM_HW) {
+        if (skb->ip_summed == CHECKSUM_HW &&
+            !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
-                if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
-                        LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
-                        skb->ip_summed = CHECKSUM_NONE;
-                }
-        }
        if (skb->ip_summed != CHECKSUM_UNNECESSARY)
                skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
@@ -521,8 +515,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
                        goto discard;
-                if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+                if (skb_checksum_complete(skb))
-                    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
                        goto discard;
                UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index 122c086ee2db..dbe6105e83a5 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -23,6 +23,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/icmp.h>
+#include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -475,15 +476,11 @@ void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
                /* we'll probably need to checksum it (didn't call
                 * sock_recvmsg) */
-                if (pkt->ip_summed != CHECKSUM_UNNECESSARY) {
+                if (skb_checksum_complete(pkt)) {
-                        if ((unsigned short)
+                        kfree_skb(pkt);
-                            csum_fold(skb_checksum(pkt, 0, pkt->len,
+                        rxrpc_krxiod_queue_transport(trans);
-                                                   pkt->csum))) {
+                        _leave(" CSUM failed");
-                                kfree_skb(pkt);
+                        return;
-                                rxrpc_krxiod_queue_transport(trans);
-                                _leave(" CSUM failed");
-                                return;
-                        }
                }
                addr = pkt->nh.iph->saddr;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 8f97e90f36c8..eb330d4f66d6 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -6,6 +6,9 @@
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */
+#include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/pagemap.h>
 #include <linux/udp.h>
@@ -165,6 +168,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
                return -1;
        if ((unsigned short)csum_fold(desc.csum))
                return -1;
+        if (unlikely(skb->ip_summed == CHECKSUM_HW))
+                netdev_rx_csum_fault(skb->dev);
        return 0;
 no_checksum:
        if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f16e7cdd6150..e50e7cf43737 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -623,12 +623,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                /* we can use it in-place */
                rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
                rqstp->rq_arg.head[0].iov_len = len;
-                if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+                if (skb_checksum_complete(skb)) {
-                        if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+                        skb_free_datagram(svsk->sk_sk, skb);
-                                skb_free_datagram(svsk->sk_sk, skb);
+                        return 0;
-                                return 0;
-                        }
-                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                }
                rqstp->rq_skbuff = skb;
        }