aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-11-15 22:39:31 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 17:54:03 -0500
commit85cc391c0e4584db594bfc4005c63c07c76c5077 (patch)
treed62f02260161ca6d0fa03986ae70a24e260d1c75
parentf577111302677e6d1448475821cc19ba8835f60e (diff)
[TCP]: non-FACK SACK follows conservative SACK loss recovery
Many assumptions that are true when no reordering or other strange events happen are not a part of the RFC3517. FACK implementation is based on such assumptions. Previously (before the rewrite) the non-FACK SACK was basically doing fast rexmit and then it times out all skbs when first cumulative ACK arrives, which cannot really be called SACK based recovery :-). RFC3517 SACK disables these things: - Per SKB timeouts & head timeout entry to recovery - Marking at least one skb while in recovery (RFC3517 does this only for the fast retransmission but not for the other skbs when cumulative ACKs arrive in the recovery) - Sacktag's loss detection flavors B and C (see comment before tcp_sacktag_write_queue) This does not implement the "last resort" rule 3 of NextSeg, which allows retransmissions also when not enough SACK blocks have yet arrived above a segment for IsLost to return true [RFC3517]. The implementation differs from RFC3517 in these points: - Rate-halving is used instead of FlightSize / 2 - Instead of using dupACKs to trigger the recovery, the number of SACK blocks is used as FACK does with SACK blocks+holes (which provides more accurate number). It seems that the difference can affect negatively only if the receiver does not generate SACK blocks at all even though it claimed to be SACK-capable. - Dupthresh is not a constant one. Dynamical adjustments include both holes and sacked segments (equal to what FACK has) due to complexity involved in determining the number sacked blocks between highest_sack and the reordered segment. Thus it's will be an over-estimate. Implementation note: tcp_clean_rtx_queue doesn't need a lost_cnt tweak because head skb at that point cannot be SACKED_ACKED (nor would such situation last for long enough to cause problems). Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_input.c80
1 files changed, 62 insertions, 18 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26713e5d89db..c0e8f2b1fa7e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -863,6 +863,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
863 */ 863 */
864static void tcp_disable_fack(struct tcp_sock *tp) 864static void tcp_disable_fack(struct tcp_sock *tp)
865{ 865{
866 /* RFC3517 uses different metric in lost marker => reset on change */
867 if (tcp_is_fack(tp))
868 tp->lost_skb_hint = NULL;
866 tp->rx_opt.sack_ok &= ~2; 869 tp->rx_opt.sack_ok &= ~2;
867} 870}
868 871
@@ -1470,6 +1473,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1470 tp->sacked_out += tcp_skb_pcount(skb); 1473 tp->sacked_out += tcp_skb_pcount(skb);
1471 1474
1472 fack_count += tcp_skb_pcount(skb); 1475 fack_count += tcp_skb_pcount(skb);
1476
1477 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1478 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1479 before(TCP_SKB_CB(skb)->seq,
1480 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1481 tp->lost_cnt_hint += tcp_skb_pcount(skb);
1482
1473 if (fack_count > tp->fackets_out) 1483 if (fack_count > tp->fackets_out)
1474 tp->fackets_out = fack_count; 1484 tp->fackets_out = fack_count;
1475 1485
@@ -1504,7 +1514,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1504 flag &= ~FLAG_ONLY_ORIG_SACKED; 1514 flag &= ~FLAG_ONLY_ORIG_SACKED;
1505 } 1515 }
1506 1516
1507 if (tp->retrans_out && 1517 if (tcp_is_fack(tp) && tp->retrans_out &&
1508 after(highest_sack_end_seq, tp->lost_retrans_low) && 1518 after(highest_sack_end_seq, tp->lost_retrans_low) &&
1509 icsk->icsk_ca_state == TCP_CA_Recovery) 1519 icsk->icsk_ca_state == TCP_CA_Recovery)
1510 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); 1520 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
@@ -1858,6 +1868,26 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
1858 return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; 1868 return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
1859} 1869}
1860 1870
1871/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
1872 * counter when SACK is enabled (without SACK, sacked_out is used for
1873 * that purpose).
1874 *
1875 * Instead, with FACK TCP uses fackets_out that includes both SACKed
1876 * segments up to the highest received SACK block so far and holes in
1877 * between them.
1878 *
1879 * With reordering, holes may still be in flight, so RFC3517 recovery
1880 * uses pure sacked_out (total number of SACKed segments) even though
1881 * it violates the RFC that uses duplicate ACKs, often these are equal
1882 * but when e.g. out-of-window ACKs or packet duplication occurs,
1883 * they differ. Since neither occurs due to loss, TCP should really
1884 * ignore them.
1885 */
1886static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
1887{
1888 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
1889}
1890
1861static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 1891static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
1862{ 1892{
1863 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 1893 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
@@ -1978,13 +2008,13 @@ static int tcp_time_to_recover(struct sock *sk)
1978 return 1; 2008 return 1;
1979 2009
1980 /* Not-A-Trick#2 : Classic rule... */ 2010 /* Not-A-Trick#2 : Classic rule... */
1981 if (tcp_fackets_out(tp) > tp->reordering) 2011 if (tcp_dupack_heurestics(tp) > tp->reordering)
1982 return 1; 2012 return 1;
1983 2013
1984 /* Trick#3 : when we use RFC2988 timer restart, fast 2014 /* Trick#3 : when we use RFC2988 timer restart, fast
1985 * retransmit can be triggered by timeout of queue head. 2015 * retransmit can be triggered by timeout of queue head.
1986 */ 2016 */
1987 if (tcp_head_timedout(sk)) 2017 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
1988 return 1; 2018 return 1;
1989 2019
1990 /* Trick#4: It is still not OK... But will it be useful to delay 2020 /* Trick#4: It is still not OK... But will it be useful to delay
@@ -2017,8 +2047,10 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
2017 tp->retransmit_skb_hint = NULL; 2047 tp->retransmit_skb_hint = NULL;
2018} 2048}
2019 2049
2020/* Mark head of queue up as lost. */ 2050/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2021static void tcp_mark_head_lost(struct sock *sk, int packets) 2051 * is against sacked "cnt", otherwise it's against facked "cnt"
2052 */
2053static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
2022{ 2054{
2023 struct tcp_sock *tp = tcp_sk(sk); 2055 struct tcp_sock *tp = tcp_sk(sk);
2024 struct sk_buff *skb; 2056 struct sk_buff *skb;
@@ -2040,8 +2072,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2040 /* this is not the most efficient way to do this... */ 2072 /* this is not the most efficient way to do this... */
2041 tp->lost_skb_hint = skb; 2073 tp->lost_skb_hint = skb;
2042 tp->lost_cnt_hint = cnt; 2074 tp->lost_cnt_hint = cnt;
2043 cnt += tcp_skb_pcount(skb); 2075
2044 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2076 if (tcp_is_fack(tp) ||
2077 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2078 cnt += tcp_skb_pcount(skb);
2079
2080 if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
2081 after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2045 break; 2082 break;
2046 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2083 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2047 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 2084 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2054,17 +2091,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2054 2091
2055/* Account newly detected lost packet(s) */ 2092/* Account newly detected lost packet(s) */
2056 2093
2057static void tcp_update_scoreboard(struct sock *sk) 2094static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2058{ 2095{
2059 struct tcp_sock *tp = tcp_sk(sk); 2096 struct tcp_sock *tp = tcp_sk(sk);
2060 2097
2061 if (tcp_is_fack(tp)) { 2098 if (tcp_is_reno(tp)) {
2099 tcp_mark_head_lost(sk, 1, fast_rexmit);
2100 } else if (tcp_is_fack(tp)) {
2062 int lost = tp->fackets_out - tp->reordering; 2101 int lost = tp->fackets_out - tp->reordering;
2063 if (lost <= 0) 2102 if (lost <= 0)
2064 lost = 1; 2103 lost = 1;
2065 tcp_mark_head_lost(sk, lost); 2104 tcp_mark_head_lost(sk, lost, fast_rexmit);
2066 } else { 2105 } else {
2067 tcp_mark_head_lost(sk, 1); 2106 int sacked_upto = tp->sacked_out - tp->reordering;
2107 if (sacked_upto < 0)
2108 sacked_upto = 0;
2109 tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
2068 } 2110 }
2069 2111
2070 /* New heuristics: it is possible only after we switched 2112 /* New heuristics: it is possible only after we switched
@@ -2072,7 +2114,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2072 * Hence, we can detect timed out packets during fast 2114 * Hence, we can detect timed out packets during fast
2073 * retransmit without falling to slow start. 2115 * retransmit without falling to slow start.
2074 */ 2116 */
2075 if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { 2117 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
2076 struct sk_buff *skb; 2118 struct sk_buff *skb;
2077 2119
2078 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint 2120 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -2245,7 +2287,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2245{ 2287{
2246 struct tcp_sock *tp = tcp_sk(sk); 2288 struct tcp_sock *tp = tcp_sk(sk);
2247 /* Partial ACK arrived. Force Hoe's retransmit. */ 2289 /* Partial ACK arrived. Force Hoe's retransmit. */
2248 int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; 2290 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2249 2291
2250 if (tcp_may_undo(tp)) { 2292 if (tcp_may_undo(tp)) {
2251 /* Plain luck! Hole if filled with delayed 2293 /* Plain luck! Hole if filled with delayed
@@ -2379,7 +2421,8 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2379 struct tcp_sock *tp = tcp_sk(sk); 2421 struct tcp_sock *tp = tcp_sk(sk);
2380 int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); 2422 int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
2381 int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && 2423 int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
2382 (tp->fackets_out > tp->reordering)); 2424 (tcp_fackets_out(tp) > tp->reordering));
2425 int fast_rexmit = 0;
2383 2426
2384 /* Some technical things: 2427 /* Some technical things:
2385 * 1. Reno does not count dupacks (sacked_out) automatically. */ 2428 * 1. Reno does not count dupacks (sacked_out) automatically. */
@@ -2399,11 +2442,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2399 return; 2442 return;
2400 2443
2401 /* C. Process data loss notification, provided it is valid. */ 2444 /* C. Process data loss notification, provided it is valid. */
2402 if ((flag&FLAG_DATA_LOST) && 2445 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2403 before(tp->snd_una, tp->high_seq) && 2446 before(tp->snd_una, tp->high_seq) &&
2404 icsk->icsk_ca_state != TCP_CA_Open && 2447 icsk->icsk_ca_state != TCP_CA_Open &&
2405 tp->fackets_out > tp->reordering) { 2448 tp->fackets_out > tp->reordering) {
2406 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2449 tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0);
2407 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2450 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
2408 } 2451 }
2409 2452
@@ -2522,10 +2565,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2522 tp->bytes_acked = 0; 2565 tp->bytes_acked = 0;
2523 tp->snd_cwnd_cnt = 0; 2566 tp->snd_cwnd_cnt = 0;
2524 tcp_set_ca_state(sk, TCP_CA_Recovery); 2567 tcp_set_ca_state(sk, TCP_CA_Recovery);
2568 fast_rexmit = 1;
2525 } 2569 }
2526 2570
2527 if (do_lost || tcp_head_timedout(sk)) 2571 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
2528 tcp_update_scoreboard(sk); 2572 tcp_update_scoreboard(sk, fast_rexmit);
2529 tcp_cwnd_down(sk, flag); 2573 tcp_cwnd_down(sk, flag);
2530 tcp_xmit_retransmit_queue(sk); 2574 tcp_xmit_retransmit_queue(sk);
2531} 2575}