aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2016-01-25 17:01:53 -0500
committerDavid S. Miller <davem@davemloft.net>2016-01-28 19:02:48 -0500
commitd88270eef4b56bd7973841dd1fed387ccfa83709 (patch)
tree9be6ebbcc863d352917a0c26582e850ec4917674 /net
parent8282f27449bf15548cb82c77b6e04ee0ab827bdc (diff)
tcp: fix tcp_mark_head_lost to check skb len before fragmenting
This commit fixes a corner case in tcp_mark_head_lost() which was causing the WARN_ON(len > skb->len) in tcp_fragment() to fire. tcp_mark_head_lost() was assuming that if a packet has tcp_skb_pcount(skb) of N, then it's safe to fragment off a prefix of M*mss bytes, for any M < N. But with the tricky way TCP pcounts are maintained, this is not always true. For example, suppose the sender sends 4 1-byte packets and have the last 3 packet sacked. It will merge the last 3 packets in the write queue into an skb with pcount = 3 and len = 3 bytes. If another recovery happens after a sack reneging event, tcp_mark_head_lost() may attempt to split the skb assuming it has more than 2*MSS bytes. This sounds very counterintuitive, but as the commit description for the related commit c0638c247f55 ("tcp: don't fragment SACKed skbs in tcp_mark_head_lost()") notes, this is because tcp_shifted_skb() coalesces adjacent regions of SACKed skbs, and when doing this it preserves the sum of their packet counts in order to reflect the real-world dynamics on the wire. The c0638c247f55 commit tried to avoid problems by not fragmenting SACKed skbs, since SACKed skbs are where the non-proportionality between pcount and skb->len/mss is known to be possible. However, that commit did not handle the case where during a reneging event one of these weird SACKed skbs becomes an un-SACKed skb, which tcp_mark_head_lost() can then try to fragment. The fix is to simply mark the entire skb lost when this happens. This makes the recovery slightly more aggressive in such corner cases before we detect reordering. But once we detect reordering this code path is by-passed because FACK is disabled. Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0003d409fec5..d2ad4337b63d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2164{ 2164{
2165 struct tcp_sock *tp = tcp_sk(sk); 2165 struct tcp_sock *tp = tcp_sk(sk);
2166 struct sk_buff *skb; 2166 struct sk_buff *skb;
2167 int cnt, oldcnt; 2167 int cnt, oldcnt, lost;
2168 int err;
2169 unsigned int mss; 2168 unsigned int mss;
2170 /* Use SACK to deduce losses of new sequences sent during recovery */ 2169 /* Use SACK to deduce losses of new sequences sent during recovery */
2171 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; 2170 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2205 break; 2204 break;
2206 2205
2207 mss = tcp_skb_mss(skb); 2206 mss = tcp_skb_mss(skb);
2208 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, 2207 /* If needed, chop off the prefix to mark as lost. */
2209 mss, GFP_ATOMIC); 2208 lost = (packets - oldcnt) * mss;
2210 if (err < 0) 2209 if (lost < skb->len &&
2210 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
2211 break; 2211 break;
2212 cnt = packets; 2212 cnt = packets;
2213 } 2213 }