aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2008-04-08 01:33:07 -0400
committerDavid S. Miller <davem@davemloft.net>2008-04-08 01:33:07 -0400
commit882bebaaca4bb1484078d44ef011f918c0e1e14e (patch)
treed6542dd9fc3d941ecc5f418b66ea09e91f47b71f
parentc137f3dda04b0aee1bc6889cdc69185f53df8a82 (diff)
[TCP]: tcp_simple_retransmit can cause S+L
This fixes Bugzilla #10384 tcp_simple_retransmit does L increment without any checking whatsoever for overflowing S+L when Reno is in use. The simplest scenario I can currently think of is rather complex in practice (there might be some more straightforward cases though). Ie., if mss is reduced during mtu probing, it may end up marking everything lost and if some duplicate ACKs arrived prior to that sacked_out will be non-zero as well, leading to S+L > packets_out, tcp_clean_rtx_queue on the next cumulative ACK or tcp_fastretrans_alert on the next duplicate ACK will fix the S counter. More straightforward (but questionable) solution would be to just call tcp_reset_reno_sack() in tcp_simple_retransmit but it would negatively impact the probe's retransmission, ie., the retransmissions would not occur if some duplicate ACKs had arrived. So I had to add reno sacked_out reseting to CA_Loss state when the first cumulative ACK arrives (this stale sacked_out might actually be the explanation for the reports of left_out overflows in kernel prior to 2.6.23 and S+L overflow reports of 2.6.24). However, this alone won't be enough to fix kernel before 2.6.24 because it is building on top of the commit 1b6d427bb7e ([TCP]: Reduce sacked_out with reno when purging write_queue) to keep the sacked_out from overflowing. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Reported-by: Alessandro Suardi <alessandro.suardi@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/ipv4/tcp_input.c24
-rw-r--r--net/ipv4/tcp_output.c3
3 files changed, 23 insertions, 6 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7de4ea3a04d9..4fd3eb2f8ec2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -752,6 +752,8 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
752 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; 752 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
753} 753}
754 754
755extern int tcp_limit_reno_sacked(struct tcp_sock *tp);
756
755/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. 757/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
756 * The exception is rate halving phase, when cwnd is decreasing towards 758 * The exception is rate halving phase, when cwnd is decreasing towards
757 * ssthresh. 759 * ssthresh.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7d0958785bfb..b4812c3cbbcf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1625,13 +1625,11 @@ out:
1625 return flag; 1625 return flag;
1626} 1626}
1627 1627
1628/* If we receive more dupacks than we expected counting segments 1628/* Limits sacked_out so that sum with lost_out isn't ever larger than
1629 * in assumption of absent reordering, interpret this as reordering. 1629 * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
1630 * The only another reason could be bug in receiver TCP.
1631 */ 1630 */
1632static void tcp_check_reno_reordering(struct sock *sk, const int addend) 1631int tcp_limit_reno_sacked(struct tcp_sock *tp)
1633{ 1632{
1634 struct tcp_sock *tp = tcp_sk(sk);
1635 u32 holes; 1633 u32 holes;
1636 1634
1637 holes = max(tp->lost_out, 1U); 1635 holes = max(tp->lost_out, 1U);
@@ -1639,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1639 1637
1640 if ((tp->sacked_out + holes) > tp->packets_out) { 1638 if ((tp->sacked_out + holes) > tp->packets_out) {
1641 tp->sacked_out = tp->packets_out - holes; 1639 tp->sacked_out = tp->packets_out - holes;
1642 tcp_update_reordering(sk, tp->packets_out + addend, 0); 1640 return 1;
1643 } 1641 }
1642 return 0;
1643}
1644
1645/* If we receive more dupacks than we expected counting segments
1646 * in assumption of absent reordering, interpret this as reordering.
1647 * The only another reason could be bug in receiver TCP.
1648 */
1649static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1650{
1651 struct tcp_sock *tp = tcp_sk(sk);
1652 if (tcp_limit_reno_sacked(tp))
1653 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1644} 1654}
1645 1655
1646/* Emulate SACKs for SACKless connection: account for a new dupack. */ 1656/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -2600,6 +2610,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2600 case TCP_CA_Loss: 2610 case TCP_CA_Loss:
2601 if (flag & FLAG_DATA_ACKED) 2611 if (flag & FLAG_DATA_ACKED)
2602 icsk->icsk_retransmits = 0; 2612 icsk->icsk_retransmits = 0;
2613 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2614 tcp_reset_reno_sack(tp);
2603 if (!tcp_try_undo_loss(sk)) { 2615 if (!tcp_try_undo_loss(sk)) {
2604 tcp_moderate_cwnd(tp); 2616 tcp_moderate_cwnd(tp);
2605 tcp_xmit_retransmit_queue(sk); 2617 tcp_xmit_retransmit_queue(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 72b9350006fe..d29ef79c00ca 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1808,6 +1808,9 @@ void tcp_simple_retransmit(struct sock *sk)
1808 if (!lost) 1808 if (!lost)
1809 return; 1809 return;
1810 1810
1811 if (tcp_is_reno(tp))
1812 tcp_limit_reno_sacked(tp);
1813
1811 tcp_verify_left_out(tp); 1814 tcp_verify_left_out(tp);
1812 1815
1813 /* Don't muck with the congestion window here. 1816 /* Don't muck with the congestion window here.