Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Davem says: 1) Fix JIT code generation on x86-64 for divide by zero, from Eric Dumazet. 2) tg3 header length computation correction from Eric Dumazet. 3) More build and reference counting fixes for socket memory cgroup code from Glauber Costa. 4) module.h snuck back into a core header after all the hard work we did to remove that, from Paul Gortmaker and Jesper Dangaard Brouer. 5) Fix PHY naming regression and add some new PCI IDs in stmmac, from Alessandro Rubini. 6) Netlink message generation fix in new team driver, should only advertise the entries that changed during events, from Jiri Pirko. 7) SRIOV VF registration and unregistration fixes, and also add a missing PCI ID, from Roopa Prabhu. 8) Fix infinite loop in tx queue flush code of brcmsmac, from Stanislaw Gruszka. 9) ftgmac100/ftmac100 build fix, missing interrupt.h include. 10) Memory leak fix in net/hyperv do_set_mutlicast() handling, from Wei Yongjun. 11) Off by one fix in netem packet scheduler, from Vijay Subramanian. 12) TCP loss detection fix from Yuchung Cheng. 13) TCP reset packet MD5 calculation uses wrong address, fix from Shawn Lu. 14) skge carrier assertion and DMA mapping fixes from Stephen Hemminger. 15) Congestion recovery undo performed at the wrong spot in BIC and CUBIC congestion control modules, fix from Neal Cardwell. 16) Ethtool ETHTOOL_GSSET_INFO is unnecessarily restrictive, from Michał Mirosław. 17) Fix triggerable race in ipv6 sysctl handling, from Francesco Ruggeri. 18) Statistics bug fixes in mlx4 from Eugenia Emantayev. 19) rds locking bug fix during info dumps, from your's truly. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (67 commits) rds: Make rds_sock_lock BH rather than IRQ safe. netprio_cgroup.h: dont include module.h from other includes net: flow_dissector.c missing include linux/export.h team: send only changed options/ports via netlink net/hyperv: fix possible memory leak in do_set_multicast() drivers/net: dsa/mv88e6xxx.c files need linux/module.h stmmac: added PCI identifiers llc: Fix race condition in llc_ui_recvmsg stmmac: fix phy naming inconsistency dsa: Add reporting of silicon revision for Marvell 88E6123/88E6161/88E6165 switches. tg3: fix ipv6 header length computation skge: add byte queue limit support mv643xx_eth: Add Rx Discard and Rx Overrun statistics bnx2x: fix compilation error with SOE in fw_dump bnx2x: handle CHIP_REVISION during init_one bnx2x: allow user to change ring size in ISCSI SD mode bnx2x: fix Big-Endianess in ethtool -t bnx2x: fixed ethtool statistics for MF modes bnx2x: credit-leakage fixup on vlan_mac_del_all macvlan: fix a possible use after free ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-24 18:51:40 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-24 18:51:40 -0500
commit: 701b259f446be2f3625fb852bceb93afe76e206d (patch)
tree: 93f15bcd00bd59c38b4e59fed9af7ddf6b06c8b3 /net/ipv4
parent: d2346963bfcbb9a8ee783ca3c3b3bdd7448ec9d5 (diff)
parent: efc3dbc37412c027e363736b4f4c74ee5e8ecffc (diff)
5 files changed, 29 insertions, 36 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3569d8ecaeac..6afc807ee2ad 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
        SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
        SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
-        SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
        SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
        SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
        SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 6187eb4d1dcf..f45e1c242440 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 {
        ca->cnt = 0;
        ca->last_max_cwnd = 0;
-        ca->loss_cwnd = 0;
        ca->last_cwnd = 0;
        ca->last_time = 0;
        ca->epoch_start = 0;
@@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca)
 static void bictcp_init(struct sock *sk)
 {
-        bictcp_reset(inet_csk_ca(sk));
+        struct bictcp *ca = inet_csk_ca(sk);
+        bictcp_reset(ca);
+        ca->loss_cwnd = 0;
        if (initial_ssthresh)
                tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
@@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
        }
        /* if in slow start or link utilization is very low */
-        if (ca->loss_cwnd == 0) {
+        if (ca->last_max_cwnd == 0) {
                if (ca->cnt > 20) /* increase cwnd 5% per RTT */
                        ca->cnt = 20;
        }
@@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct bictcp *ca = inet_csk_ca(sk);
-        return max(tp->snd_cwnd, ca->last_max_cwnd);
+        return max(tp->snd_cwnd, ca->loss_cwnd);
 }
 static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index f376b05cca81..a9077f441cb2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 {
        ca->cnt = 0;
        ca->last_max_cwnd = 0;
-        ca->loss_cwnd = 0;
        ca->last_cwnd = 0;
        ca->last_time = 0;
        ca->bic_origin_point = 0;
@@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk)
 static void bictcp_init(struct sock *sk)
 {
-        bictcp_reset(inet_csk_ca(sk));
+        struct bictcp *ca = inet_csk_ca(sk);
+        bictcp_reset(ca);
+        ca->loss_cwnd = 0;
        if (hystart)
                bictcp_hystart_reset(sk);
@@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
         * The initial growth of cubic function may be too conservative
         * when the available bandwidth is still unknown.
         */
-        if (ca->loss_cwnd == 0 && ca->cnt > 20)
+        if (ca->last_max_cwnd == 0 && ca->cnt > 20)
                ca->cnt = 20;   /* increase cwnd 5% per RTT */
        /* TCP Friendly */
@@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
 {
        struct bictcp *ca = inet_csk_ca(sk);
-        return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
+        return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
 }
 static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..976034f82320 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SYN_ACKED          0x10 /* This ACK acknowledged SYN.              */
 #define FLAG_DATA_SACKED        0x20 /* New SACK.                               */
 #define FLAG_ECE                0x40 /* ECE in this ACK                         */
-#define FLAG_DATA_LOST          0x80 /* SACK detected data lossage.             */
 #define FLAG_SLOWPATH           0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ONLY_ORIG_SACKED   0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED   0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
 * These 6 states form finite state machine, controlled by the following events:
 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of one of three flavors:
+ * 3. Loss detection event of two flavors:
 *      A. Scoreboard estimator decided the packet is lost.
 *         A'. Reno "three dupacks" marks head of queue lost.
- *         A''. Its FACK modfication, head until snd.fack is lost.
+ *         A''. Its FACK modification, head until snd.fack is lost.
- *      B. SACK arrives sacking data transmitted after never retransmitted
+ *      B. SACK arrives sacking SND.NXT at the moment, when the
- *         hole was sent out.
- *      C. SACK arrives sacking SND.NXT at the moment, when the
 *         segment was retransmitted.
 * 4. D-SACK added new rule: D-SACK changes any tag to S.
 *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
 }
 /* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "C". Later note: FACK people cheated me again 8), we have to account
+ * Event "B". Later note: FACK people cheated me again 8), we have to account
 * for reordering! Ugly, but should help.
 *
 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                if (found_dup_sack && ((i + 1) == first_sack_index))
                        next_dup = &sp[i + 1];
-                /* Event "B" in the comment above. */
-                if (after(end_seq, tp->high_seq))
-                        state.flag |= FLAG_DATA_LOST;
                /* Skip too early cached blocks */
                while (tcp_sack_cache_ok(tp, cache) &&
                       !before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
        tcp_verify_left_out(tp);
 }
-/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
+/* Detect loss in event "A" above by marking head of queue up as lost.
- * is against sacked "cnt", otherwise it's against facked "cnt"
+ * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
 */
 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
        int cnt, oldcnt;
        int err;
        unsigned int mss;
+        /* Use SACK to deduce losses of new sequences sent during recovery */
+        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
        WARN_ON(packets > tp->packets_out);
        if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                tp->lost_skb_hint = skb;
                tp->lost_cnt_hint = cnt;
-                if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+                if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
                        break;
                oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
        if (tcp_check_sack_reneging(sk, flag))
                return;
-        /* C. Process data loss notification, provided it is valid. */
+        /* C. Check consistency of the current state. */
-        if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
-            before(tp->snd_una, tp->high_seq) &&
-            icsk->icsk_ca_state != TCP_CA_Open &&
-            tp->fackets_out > tp->reordering) {
-                tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
-        }
-        /* D. Check consistency of the current state. */
        tcp_verify_left_out(tp);
-        /* E. Check state exit conditions. State can be terminated
+        /* D. Check state exit conditions. State can be terminated
         *    when high_seq is ACKed. */
        if (icsk->icsk_ca_state == TCP_CA_Open) {
                WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                }
        }
-        /* F. Process state. */
+        /* E. Process state. */
        switch (icsk->icsk_ca_state) {
        case TCP_CA_Recovery:
                if (!(flag & FLAG_SND_UNA_ADVANCED)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1eb4ad57670e..337ba4cca052 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -631,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
        arg.iov[0].iov_len  = sizeof(rep.th);
 #ifdef CONFIG_TCP_MD5SIG
-        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
+        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
        if (key) {
                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
                                   (TCPOPT_NOP << 16) |
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-24 18:51:40 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-24 18:51:40 -0500
commit	701b259f446be2f3625fb852bceb93afe76e206d (patch)
tree	93f15bcd00bd59c38b4e59fed9af7ddf6b06c8b3 /net/ipv4
parent	d2346963bfcbb9a8ee783ca3c3b3bdd7448ec9d5 (diff)
parent	efc3dbc37412c027e363736b4f4c74ee5e8ecffc (diff)