From 4c90d3b30334833450ccbb02f452d4972a3c3c3f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 26 Feb 2012 10:06:19 +0000 Subject: tcp: fix false reordering signal in tcp_shifted_skb When tcp_shifted_skb() shifts bytes from the skb that is currently pointed to by 'highest_sack' then the increment of TCP_SKB_CB(skb)->seq implicitly advances tcp_highest_sack_seq(). This implicit advancement, combined with the recent fix to pass the correct SACKed range into tcp_sacktag_one(), caused tcp_sacktag_one() to think that the newly SACKed range was before the tcp_highest_sack_seq(), leading to a call to tcp_update_reordering() with a degree of reordering matching the size of the newly SACKed range (typically just 1 packet, which is a NOP, but potentially larger). This commit fixes this by simply calling tcp_sacktag_one() before the TCP_SKB_CB(skb)->seq advancement that can advance our notion of the highest SACKed sequence. Correspondingly, we can simplify the code a little now that tcp_shifted_skb() should update the lost_cnt_hint in all cases where skb == tp->lost_skb_hint. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53c8ce4046b2..ee42d42b2f45 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1403,8 +1403,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ - if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) + /* Adjust counters and hints for the newly sacked sequence + * range but discard the return value since prev is already + * marked. We must tag the range first because the seq + * advancement below implicitly advances + * tcp_highest_sack_seq() when skb is highest_sack. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); + + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; @@ -1430,12 +1438,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* Adjust counters and hints for the newly sacked sequence range but - * discard the return value since prev is already marked. - */ - tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, - start_seq, end_seq, dup_sack, pcount); - /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); -- cgit v1.2.2 From c0638c247f559e1a16ee79e54df14bca2cb679ea Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 2 Mar 2012 21:36:51 +0000 Subject: tcp: don't fragment SACKed skbs in tcp_mark_head_lost() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In tcp_mark_head_lost() we should not attempt to fragment a SACKed skb to mark the first portion as lost. This is for two primary reasons: (1) tcp_shifted_skb() coalesces adjacent regions of SACKed skbs. When doing this, it preserves the sum of their packet counts in order to reflect the real-world dynamics on the wire. But given that skbs can have remainders that do not align to MSS boundaries, this packet count preservation means that for SACKed skbs there is not necessarily a direct linear relationship between tcp_skb_pcount(skb) and skb->len. Thus tcp_mark_head_lost()'s previous attempts to fragment off and mark as lost a prefix of length (packets - oldcnt)*mss from SACKed skbs were leading to occasional failures of the WARN_ON(len > skb->len) in tcp_fragment() (which used to be a BUG_ON(); see the recent "crash in tcp_fragment" thread on netdev). (2) there is no real point in fragmenting off part of a SACKed skb and calling tcp_skb_mark_lost() on it, since tcp_skb_mark_lost() is a NOP for SACKed skbs. Signed-off-by: Neal Cardwell Acked-by: Ilpo Järvinen Acked-by: Yuchung Cheng Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ee42d42b2f45..d9b83d198c3d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2569,6 +2569,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) { if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; -- cgit v1.2.2