aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-02-22 02:16:11 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:23:16 -0400
commit4dc2665e3634d720a62bd27128fc8781fcdad2dc (patch)
tree6829101a6eeadb446e45b4a825f148988332d84b /net/ipv4/tcp_input.c
parent288035f915686a9a9e85e0358c5392bb5d7ae58d (diff)
[TCP]: SACK enhanced FRTO
Implements the SACK-enhanced FRTO given in RFC4138 using the variant given in Appendix B. RFC4138, Appendix B: "This means that in order to declare timeout spurious, the TCP sender must receive an acknowledgment for non-retransmitted segment between SND.UNA and RecoveryPoint in algorithm step 3. RecoveryPoint is defined in conservative SACK-recovery algorithm [RFC3517]" The basic version of the FRTO algorithm can still be used also when SACK is enabled. To enabled SACK-enhanced version, tcp_frto sysctl is set to 2. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c76
1 files changed, 65 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df516d4eca96..bb3f234668b3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_abc __read_mostly;
100#define FLAG_ECE 0x40 /* ECE in this ACK */ 100#define FLAG_ECE 0x40 /* ECE in this ACK */
101#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ 101#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
102#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 102#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
103#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
103 104
104#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 105#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
105#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 106#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +111,8 @@ int sysctl_tcp_abc __read_mostly;
110#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) 111#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
111#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) 112#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
112 113
114#define IsSackFrto() (sysctl_tcp_frto == 0x2)
115
113#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 116#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
114 117
115/* Adapt the MSS value used to make delayed ack decision to the 118/* Adapt the MSS value used to make delayed ack decision to the
@@ -1159,6 +1162,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1159 /* clear lost hint */ 1162 /* clear lost hint */
1160 tp->retransmit_skb_hint = NULL; 1163 tp->retransmit_skb_hint = NULL;
1161 } 1164 }
1165 /* SACK enhanced F-RTO detection.
1166 * Set flag if and only if non-rexmitted
1167 * segments below frto_highmark are
1168 * SACKed (RFC4138; Appendix B).
1169 * Clearing correct due to in-order walk
1170 */
1171 if (after(end_seq, tp->frto_highmark)) {
1172 flag &= ~FLAG_ONLY_ORIG_SACKED;
1173 } else {
1174 if (!(sacked & TCPCB_RETRANS))
1175 flag |= FLAG_ONLY_ORIG_SACKED;
1176 }
1162 } 1177 }
1163 1178
1164 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; 1179 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1240,7 +1255,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1240/* F-RTO can only be used if these conditions are satisfied: 1255/* F-RTO can only be used if these conditions are satisfied:
1241 * - there must be some unsent new data 1256 * - there must be some unsent new data
1242 * - the advertised window should allow sending it 1257 * - the advertised window should allow sending it
1243 * - TCP has never retransmitted anything other than head 1258 * - TCP has never retransmitted anything other than head (SACK enhanced
1259 * variant from Appendix B of RFC4138 is more robust here)
1244 */ 1260 */
1245int tcp_use_frto(struct sock *sk) 1261int tcp_use_frto(struct sock *sk)
1246{ 1262{
@@ -1252,6 +1268,9 @@ int tcp_use_frto(struct sock *sk)
1252 tp->snd_una + tp->snd_wnd)) 1268 tp->snd_una + tp->snd_wnd))
1253 return 0; 1269 return 0;
1254 1270
1271 if (IsSackFrto())
1272 return 1;
1273
1255 /* Avoid expensive walking of rexmit queue if possible */ 1274 /* Avoid expensive walking of rexmit queue if possible */
1256 if (tp->retrans_out > 1) 1275 if (tp->retrans_out > 1)
1257 return 0; 1276 return 0;
@@ -1328,9 +1347,18 @@ void tcp_enter_frto(struct sock *sk)
1328 } 1347 }
1329 tcp_sync_left_out(tp); 1348 tcp_sync_left_out(tp);
1330 1349
1350 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1351 * The last condition is necessary at least in tp->frto_counter case.
1352 */
1353 if (IsSackFrto() && (tp->frto_counter ||
1354 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1355 after(tp->high_seq, tp->snd_una)) {
1356 tp->frto_highmark = tp->high_seq;
1357 } else {
1358 tp->frto_highmark = tp->snd_nxt;
1359 }
1331 tcp_set_ca_state(sk, TCP_CA_Disorder); 1360 tcp_set_ca_state(sk, TCP_CA_Disorder);
1332 tp->high_seq = tp->snd_nxt; 1361 tp->high_seq = tp->snd_nxt;
1333 tp->frto_highmark = tp->snd_nxt;
1334 tp->frto_counter = 1; 1362 tp->frto_counter = 1;
1335} 1363}
1336 1364
@@ -2566,6 +2594,10 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
2566 * Rationale: if the RTO was spurious, new ACKs should arrive from the 2594 * Rationale: if the RTO was spurious, new ACKs should arrive from the
2567 * original window even after we transmit two new data segments. 2595 * original window even after we transmit two new data segments.
2568 * 2596 *
2597 * SACK version:
2598 * on first step, wait until first cumulative ACK arrives, then move to
2599 * the second step. In second step, the next ACK decides.
2600 *
2569 * F-RTO is implemented (mainly) in four functions: 2601 * F-RTO is implemented (mainly) in four functions:
2570 * - tcp_use_frto() is used to determine if TCP is can use F-RTO 2602 * - tcp_use_frto() is used to determine if TCP is can use F-RTO
2571 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is 2603 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
@@ -2590,16 +2622,38 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
2590 return 1; 2622 return 1;
2591 } 2623 }
2592 2624
2593 /* RFC4138 shortcoming in step 2; should also have case c): ACK isn't 2625 if (!IsSackFrto() || IsReno(tp)) {
2594 * duplicate nor advances window, e.g., opposite dir data, winupdate 2626 /* RFC4138 shortcoming in step 2; should also have case c):
2595 */ 2627 * ACK isn't duplicate nor advances window, e.g., opposite dir
2596 if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && 2628 * data, winupdate
2597 !(flag&FLAG_FORWARD_PROGRESS)) 2629 */
2598 return 1; 2630 if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
2631 !(flag&FLAG_FORWARD_PROGRESS))
2632 return 1;
2599 2633
2600 if (!(flag&FLAG_DATA_ACKED)) { 2634 if (!(flag&FLAG_DATA_ACKED)) {
2601 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag); 2635 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
2602 return 1; 2636 flag);
2637 return 1;
2638 }
2639 } else {
2640 if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
2641 /* Prevent sending of new data. */
2642 tp->snd_cwnd = min(tp->snd_cwnd,
2643 tcp_packets_in_flight(tp));
2644 return 1;
2645 }
2646
2647 if ((tp->frto_counter == 2) &&
2648 (!(flag&FLAG_FORWARD_PROGRESS) ||
2649 ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
2650 /* RFC4138 shortcoming (see comment above) */
2651 if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
2652 return 1;
2653
2654 tcp_enter_frto_loss(sk, 3, flag);
2655 return 1;
2656 }
2603 } 2657 }
2604 2658
2605 if (tp->frto_counter == 1) { 2659 if (tp->frto_counter == 1) {