diff options
author | Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> | 2007-02-22 02:16:11 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-04-26 01:23:16 -0400 |
commit | 4dc2665e3634d720a62bd27128fc8781fcdad2dc (patch) | |
tree | 6829101a6eeadb446e45b4a825f148988332d84b /net/ipv4/tcp_input.c | |
parent | 288035f915686a9a9e85e0358c5392bb5d7ae58d (diff) |
[TCP]: SACK enhanced FRTO
Implements the SACK-enhanced FRTO given in RFC4138 using the
variant given in Appendix B.
RFC4138, Appendix B:
"This means that in order to declare timeout spurious, the TCP
sender must receive an acknowledgment for non-retransmitted
segment between SND.UNA and RecoveryPoint in algorithm step 3.
RecoveryPoint is defined in conservative SACK-recovery
algorithm [RFC3517]"
The basic version of the FRTO algorithm can still be used also
when SACK is enabled. To enabled SACK-enhanced version, tcp_frto
sysctl is set to 2.
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 76 |
1 files changed, 65 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index df516d4eca96..bb3f234668b3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -100,6 +100,7 @@ int sysctl_tcp_abc __read_mostly; | |||
100 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 100 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
101 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | 101 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ |
102 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 102 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
103 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | ||
103 | 104 | ||
104 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) | 105 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) |
105 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) | 106 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) |
@@ -110,6 +111,8 @@ int sysctl_tcp_abc __read_mostly; | |||
110 | #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) | 111 | #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) |
111 | #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) | 112 | #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) |
112 | 113 | ||
114 | #define IsSackFrto() (sysctl_tcp_frto == 0x2) | ||
115 | |||
113 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) | 116 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) |
114 | 117 | ||
115 | /* Adapt the MSS value used to make delayed ack decision to the | 118 | /* Adapt the MSS value used to make delayed ack decision to the |
@@ -1159,6 +1162,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1159 | /* clear lost hint */ | 1162 | /* clear lost hint */ |
1160 | tp->retransmit_skb_hint = NULL; | 1163 | tp->retransmit_skb_hint = NULL; |
1161 | } | 1164 | } |
1165 | /* SACK enhanced F-RTO detection. | ||
1166 | * Set flag if and only if non-rexmitted | ||
1167 | * segments below frto_highmark are | ||
1168 | * SACKed (RFC4138; Appendix B). | ||
1169 | * Clearing correct due to in-order walk | ||
1170 | */ | ||
1171 | if (after(end_seq, tp->frto_highmark)) { | ||
1172 | flag &= ~FLAG_ONLY_ORIG_SACKED; | ||
1173 | } else { | ||
1174 | if (!(sacked & TCPCB_RETRANS)) | ||
1175 | flag |= FLAG_ONLY_ORIG_SACKED; | ||
1176 | } | ||
1162 | } | 1177 | } |
1163 | 1178 | ||
1164 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; | 1179 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; |
@@ -1240,7 +1255,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1240 | /* F-RTO can only be used if these conditions are satisfied: | 1255 | /* F-RTO can only be used if these conditions are satisfied: |
1241 | * - there must be some unsent new data | 1256 | * - there must be some unsent new data |
1242 | * - the advertised window should allow sending it | 1257 | * - the advertised window should allow sending it |
1243 | * - TCP has never retransmitted anything other than head | 1258 | * - TCP has never retransmitted anything other than head (SACK enhanced |
1259 | * variant from Appendix B of RFC4138 is more robust here) | ||
1244 | */ | 1260 | */ |
1245 | int tcp_use_frto(struct sock *sk) | 1261 | int tcp_use_frto(struct sock *sk) |
1246 | { | 1262 | { |
@@ -1252,6 +1268,9 @@ int tcp_use_frto(struct sock *sk) | |||
1252 | tp->snd_una + tp->snd_wnd)) | 1268 | tp->snd_una + tp->snd_wnd)) |
1253 | return 0; | 1269 | return 0; |
1254 | 1270 | ||
1271 | if (IsSackFrto()) | ||
1272 | return 1; | ||
1273 | |||
1255 | /* Avoid expensive walking of rexmit queue if possible */ | 1274 | /* Avoid expensive walking of rexmit queue if possible */ |
1256 | if (tp->retrans_out > 1) | 1275 | if (tp->retrans_out > 1) |
1257 | return 0; | 1276 | return 0; |
@@ -1328,9 +1347,18 @@ void tcp_enter_frto(struct sock *sk) | |||
1328 | } | 1347 | } |
1329 | tcp_sync_left_out(tp); | 1348 | tcp_sync_left_out(tp); |
1330 | 1349 | ||
1350 | /* Earlier loss recovery underway (see RFC4138; Appendix B). | ||
1351 | * The last condition is necessary at least in tp->frto_counter case. | ||
1352 | */ | ||
1353 | if (IsSackFrto() && (tp->frto_counter || | ||
1354 | ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && | ||
1355 | after(tp->high_seq, tp->snd_una)) { | ||
1356 | tp->frto_highmark = tp->high_seq; | ||
1357 | } else { | ||
1358 | tp->frto_highmark = tp->snd_nxt; | ||
1359 | } | ||
1331 | tcp_set_ca_state(sk, TCP_CA_Disorder); | 1360 | tcp_set_ca_state(sk, TCP_CA_Disorder); |
1332 | tp->high_seq = tp->snd_nxt; | 1361 | tp->high_seq = tp->snd_nxt; |
1333 | tp->frto_highmark = tp->snd_nxt; | ||
1334 | tp->frto_counter = 1; | 1362 | tp->frto_counter = 1; |
1335 | } | 1363 | } |
1336 | 1364 | ||
@@ -2566,6 +2594,10 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
2566 | * Rationale: if the RTO was spurious, new ACKs should arrive from the | 2594 | * Rationale: if the RTO was spurious, new ACKs should arrive from the |
2567 | * original window even after we transmit two new data segments. | 2595 | * original window even after we transmit two new data segments. |
2568 | * | 2596 | * |
2597 | * SACK version: | ||
2598 | * on first step, wait until first cumulative ACK arrives, then move to | ||
2599 | * the second step. In second step, the next ACK decides. | ||
2600 | * | ||
2569 | * F-RTO is implemented (mainly) in four functions: | 2601 | * F-RTO is implemented (mainly) in four functions: |
2570 | * - tcp_use_frto() is used to determine if TCP is can use F-RTO | 2602 | * - tcp_use_frto() is used to determine if TCP is can use F-RTO |
2571 | * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is | 2603 | * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is |
@@ -2590,16 +2622,38 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | |||
2590 | return 1; | 2622 | return 1; |
2591 | } | 2623 | } |
2592 | 2624 | ||
2593 | /* RFC4138 shortcoming in step 2; should also have case c): ACK isn't | 2625 | if (!IsSackFrto() || IsReno(tp)) { |
2594 | * duplicate nor advances window, e.g., opposite dir data, winupdate | 2626 | /* RFC4138 shortcoming in step 2; should also have case c): |
2595 | */ | 2627 | * ACK isn't duplicate nor advances window, e.g., opposite dir |
2596 | if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && | 2628 | * data, winupdate |
2597 | !(flag&FLAG_FORWARD_PROGRESS)) | 2629 | */ |
2598 | return 1; | 2630 | if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && |
2631 | !(flag&FLAG_FORWARD_PROGRESS)) | ||
2632 | return 1; | ||
2599 | 2633 | ||
2600 | if (!(flag&FLAG_DATA_ACKED)) { | 2634 | if (!(flag&FLAG_DATA_ACKED)) { |
2601 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag); | 2635 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), |
2602 | return 1; | 2636 | flag); |
2637 | return 1; | ||
2638 | } | ||
2639 | } else { | ||
2640 | if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { | ||
2641 | /* Prevent sending of new data. */ | ||
2642 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
2643 | tcp_packets_in_flight(tp)); | ||
2644 | return 1; | ||
2645 | } | ||
2646 | |||
2647 | if ((tp->frto_counter == 2) && | ||
2648 | (!(flag&FLAG_FORWARD_PROGRESS) || | ||
2649 | ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { | ||
2650 | /* RFC4138 shortcoming (see comment above) */ | ||
2651 | if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP)) | ||
2652 | return 1; | ||
2653 | |||
2654 | tcp_enter_frto_loss(sk, 3, flag); | ||
2655 | return 1; | ||
2656 | } | ||
2603 | } | 2657 | } |
2604 | 2658 | ||
2605 | if (tp->frto_counter == 1) { | 2659 | if (tp->frto_counter == 1) { |