aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt18
-rw-r--r--include/linux/tcp.h3
-rw-r--r--net/ipv4/tcp_input.c73
3 files changed, 68 insertions, 26 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 8a977a0aaede..f98ca633b528 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -225,19 +225,13 @@ tcp_fin_timeout - INTEGER
225 Default: 60 seconds 225 Default: 60 seconds
226 226
227tcp_frto - INTEGER 227tcp_frto - INTEGER
228 Enables Forward RTO-Recovery (F-RTO) defined in RFC4138. 228 Enables Forward RTO-Recovery (F-RTO) defined in RFC5682.
229 F-RTO is an enhanced recovery algorithm for TCP retransmission 229 F-RTO is an enhanced recovery algorithm for TCP retransmission
230 timeouts. It is particularly beneficial in wireless environments 230 timeouts. It is particularly beneficial in networks where the
231 where packet loss is typically due to random radio interference 231 RTT fluctuates (e.g., wireless). F-RTO is sender-side only
232 rather than intermediate router congestion. F-RTO is sender-side 232 modification. It does not require any support from the peer.
233 only modification. Therefore it does not require any support from 233
234 the peer. 234 By default it's enabled with a non-zero value. 0 disables F-RTO.
235
236 If set to 1, basic version is enabled. 2 enables SACK enhanced
237 F-RTO if flow uses SACK. The basic version can be used also when
238 SACK is in use though scenario(s) with it exists where F-RTO
239 interacts badly with the packet counting of the SACK enabled TCP
240 flow.
241 235
242tcp_keepalive_time - INTEGER 236tcp_keepalive_time - INTEGER
243 How often TCP sends out keepalive messages when keepalive is enabled. 237 How often TCP sends out keepalive messages when keepalive is enabled.
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f5f203b36379..5adbc33d1ab3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -192,7 +192,8 @@ struct tcp_sock {
192 u8 nonagle : 4,/* Disable Nagle algorithm? */ 192 u8 nonagle : 4,/* Disable Nagle algorithm? */
193 thin_lto : 1,/* Use linear timeouts for thin streams */ 193 thin_lto : 1,/* Use linear timeouts for thin streams */
194 thin_dupack : 1,/* Fast retransmit on first dupack */ 194 thin_dupack : 1,/* Fast retransmit on first dupack */
195 repair : 1; 195 repair : 1,
196 frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
196 u8 repair_queue; 197 u8 repair_queue;
197 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 198 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
198 syn_data:1, /* SYN includes data */ 199 syn_data:1, /* SYN includes data */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8d821e45b917..b2b36196b342 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
107#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 107#define FLAG_DATA_SACKED 0x20 /* New SACK. */
108#define FLAG_ECE 0x40 /* ECE in this ACK */ 108#define FLAG_ECE 0x40 /* ECE in this ACK */
109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
110#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
110#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
111#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 112#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
112#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 113#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
@@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
1155 tcp_highest_sack_seq(tp))) 1156 tcp_highest_sack_seq(tp)))
1156 state->reord = min(fack_count, 1157 state->reord = min(fack_count,
1157 state->reord); 1158 state->reord);
1159 if (!after(end_seq, tp->high_seq))
1160 state->flag |= FLAG_ORIG_SACK_ACKED;
1158 } 1161 }
1159 1162
1160 if (sacked & TCPCB_LOST) { 1163 if (sacked & TCPCB_LOST) {
@@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how)
1835 const struct inet_connection_sock *icsk = inet_csk(sk); 1838 const struct inet_connection_sock *icsk = inet_csk(sk);
1836 struct tcp_sock *tp = tcp_sk(sk); 1839 struct tcp_sock *tp = tcp_sk(sk);
1837 struct sk_buff *skb; 1840 struct sk_buff *skb;
1841 bool new_recovery = false;
1838 1842
1839 /* Reduce ssthresh if it has not yet been made inside this window. */ 1843 /* Reduce ssthresh if it has not yet been made inside this window. */
1840 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || 1844 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1845 !after(tp->high_seq, tp->snd_una) ||
1841 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 1846 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1847 new_recovery = true;
1842 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1848 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1843 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1849 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1844 tcp_ca_event(sk, CA_EVENT_LOSS); 1850 tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how)
1883 tcp_set_ca_state(sk, TCP_CA_Loss); 1889 tcp_set_ca_state(sk, TCP_CA_Loss);
1884 tp->high_seq = tp->snd_nxt; 1890 tp->high_seq = tp->snd_nxt;
1885 TCP_ECN_queue_cwr(tp); 1891 TCP_ECN_queue_cwr(tp);
1892
1893 /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
1894 * loss recovery is underway except recurring timeout(s) on
1895 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
1896 */
1897 tp->frto = sysctl_tcp_frto &&
1898 (new_recovery || icsk->icsk_retransmits) &&
1899 !inet_csk(sk)->icsk_mtup.probe_size;
1886} 1900}
1887 1901
1888/* If ACK arrived pointing to a remembered SACK, it means that our 1902/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2426 return failed; 2440 return failed;
2427} 2441}
2428 2442
2429/* Undo during loss recovery after partial ACK. */ 2443/* Undo during loss recovery after partial ACK or using F-RTO. */
2430static bool tcp_try_undo_loss(struct sock *sk) 2444static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2431{ 2445{
2432 struct tcp_sock *tp = tcp_sk(sk); 2446 struct tcp_sock *tp = tcp_sk(sk);
2433 2447
2434 if (tcp_may_undo(tp)) { 2448 if (frto_undo || tcp_may_undo(tp)) {
2435 struct sk_buff *skb; 2449 struct sk_buff *skb;
2436 tcp_for_write_queue(skb, sk) { 2450 tcp_for_write_queue(skb, sk) {
2437 if (skb == tcp_send_head(sk)) 2451 if (skb == tcp_send_head(sk))
@@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
2445 tp->lost_out = 0; 2459 tp->lost_out = 0;
2446 tcp_undo_cwr(sk, true); 2460 tcp_undo_cwr(sk, true);
2447 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2461 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2462 if (frto_undo)
2463 NET_INC_STATS_BH(sock_net(sk),
2464 LINUX_MIB_TCPSPURIOUSRTOS);
2448 inet_csk(sk)->icsk_retransmits = 0; 2465 inet_csk(sk)->icsk_retransmits = 0;
2449 tp->undo_marker = 0; 2466 tp->undo_marker = 0;
2450 if (tcp_is_sack(tp)) 2467 if (frto_undo || tcp_is_sack(tp))
2451 tcp_set_ca_state(sk, TCP_CA_Open); 2468 tcp_set_ca_state(sk, TCP_CA_Open);
2452 return true; 2469 return true;
2453 } 2470 }
@@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2667/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are 2684/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
2668 * recovered or spurious. Otherwise retransmits more on partial ACKs. 2685 * recovered or spurious. Otherwise retransmits more on partial ACKs.
2669 */ 2686 */
2670static void tcp_process_loss(struct sock *sk, int flag) 2687static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2671{ 2688{
2672 struct inet_connection_sock *icsk = inet_csk(sk); 2689 struct inet_connection_sock *icsk = inet_csk(sk);
2673 struct tcp_sock *tp = tcp_sk(sk); 2690 struct tcp_sock *tp = tcp_sk(sk);
2691 bool recovered = !before(tp->snd_una, tp->high_seq);
2674 2692
2675 if (!before(tp->snd_una, tp->high_seq)) { 2693 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
2694 if (flag & FLAG_ORIG_SACK_ACKED) {
2695 /* Step 3.b. A timeout is spurious if not all data are
2696 * lost, i.e., never-retransmitted data are (s)acked.
2697 */
2698 tcp_try_undo_loss(sk, true);
2699 return;
2700 }
2701 if (after(tp->snd_nxt, tp->high_seq) &&
2702 (flag & FLAG_DATA_SACKED || is_dupack)) {
2703 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
2704 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2705 tp->high_seq = tp->snd_nxt;
2706 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2707 TCP_NAGLE_OFF);
2708 if (after(tp->snd_nxt, tp->high_seq))
2709 return; /* Step 2.b */
2710 tp->frto = 0;
2711 }
2712 }
2713
2714 if (recovered) {
2715 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
2676 icsk->icsk_retransmits = 0; 2716 icsk->icsk_retransmits = 0;
2677 tcp_try_undo_recovery(sk); 2717 tcp_try_undo_recovery(sk);
2678 return; 2718 return;
2679 } 2719 }
2680
2681 if (flag & FLAG_DATA_ACKED) 2720 if (flag & FLAG_DATA_ACKED)
2682 icsk->icsk_retransmits = 0; 2721 icsk->icsk_retransmits = 0;
2683 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) 2722 if (tcp_is_reno(tp)) {
2684 tcp_reset_reno_sack(tp); 2723 /* A Reno DUPACK means new data in F-RTO step 2.b above are
2685 if (tcp_try_undo_loss(sk)) 2724 * delivered. Lower inflight to clock out (re)tranmissions.
2725 */
2726 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2727 tcp_add_reno_sack(sk);
2728 else if (flag & FLAG_SND_UNA_ADVANCED)
2729 tcp_reset_reno_sack(tp);
2730 }
2731 if (tcp_try_undo_loss(sk, false))
2686 return; 2732 return;
2687 tcp_moderate_cwnd(tp);
2688 tcp_xmit_retransmit_queue(sk); 2733 tcp_xmit_retransmit_queue(sk);
2689} 2734}
2690 2735
@@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2764 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; 2809 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2765 break; 2810 break;
2766 case TCP_CA_Loss: 2811 case TCP_CA_Loss:
2767 tcp_process_loss(sk, flag); 2812 tcp_process_loss(sk, flag, is_dupack);
2768 if (icsk->icsk_ca_state != TCP_CA_Open) 2813 if (icsk->icsk_ca_state != TCP_CA_Open)
2769 return; 2814 return;
2770 /* Fall through to processing in Open state. */ 2815 /* Fall through to processing in Open state. */
@@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3003 } 3048 }
3004 if (!(sacked & TCPCB_SACKED_ACKED)) 3049 if (!(sacked & TCPCB_SACKED_ACKED))
3005 reord = min(pkts_acked, reord); 3050 reord = min(pkts_acked, reord);
3051 if (!after(scb->end_seq, tp->high_seq))
3052 flag |= FLAG_ORIG_SACK_ACKED;
3006 } 3053 }
3007 3054
3008 if (sacked & TCPCB_SACKED_ACKED) 3055 if (sacked & TCPCB_SACKED_ACKED)