diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-01-19 09:42:21 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-01-22 15:08:44 -0500 |
commit | 974c12360dfe6ab01201fe9e708e7755c413f8b6 (patch) | |
tree | 8cdf87f44c8be550af2ec1530f8cb271fb1a2e1f /net/ipv4 | |
parent | d0249e44432aa0ffcf710b64449b8eaa3722547e (diff) |
tcp: detect loss above high_seq in recovery
Correctly implement a loss detection heuristic: New sequences (above
high_seq) sent during the fast recovery are deemed lost when higher
sequences are SACKed.
Current code does not catch these losses, because tcp_mark_head_lost()
does not check packets beyond high_seq. The fix is straight-forward by
checking packets until the highest sacked packet. In addition, all the
FLAG_DATA_LOST logic are in-effective and redundant and can be removed.
Update the loss heuristic comments. The algorithm above is documented
as heuristic B, but it is redundant too because heuristic A already
covers B.
Note that this change only marks some forward-retransmitted packets LOST.
It does NOT forbid TCP performing further CWR on new losses. A potential
follow-up patch under preparation is to perform another CWR on "new"
losses such as
1) sequence above high_seq is lost (by resetting high_seq to snd_nxt)
2) retransmission is lost.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 41 |
2 files changed, 15 insertions, 27 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3569d8ecaeac..6afc807ee2ad 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), | 216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), |
217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), | 217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), |
218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), | 218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), |
219 | SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS), | ||
220 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), | 219 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), |
221 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), | 220 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), |
222 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), | 221 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2877c3e09587..976034f82320 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; | |||
105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ | 105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ |
106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ | 106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ |
107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
108 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | ||
109 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 108 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
110 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | 109 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ |
111 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ | 110 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ |
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, | |||
1040 | * These 6 states form finite state machine, controlled by the following events: | 1039 | * These 6 states form finite state machine, controlled by the following events: |
1041 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) | 1040 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) |
1042 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) | 1041 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) |
1043 | * 3. Loss detection event of one of three flavors: | 1042 | * 3. Loss detection event of two flavors: |
1044 | * A. Scoreboard estimator decided the packet is lost. | 1043 | * A. Scoreboard estimator decided the packet is lost. |
1045 | * A'. Reno "three dupacks" marks head of queue lost. | 1044 | * A'. Reno "three dupacks" marks head of queue lost. |
1046 | * A''. Its FACK modfication, head until snd.fack is lost. | 1045 | * A''. Its FACK modification, head until snd.fack is lost. |
1047 | * B. SACK arrives sacking data transmitted after never retransmitted | 1046 | * B. SACK arrives sacking SND.NXT at the moment, when the |
1048 | * hole was sent out. | ||
1049 | * C. SACK arrives sacking SND.NXT at the moment, when the | ||
1050 | * segment was retransmitted. | 1047 | * segment was retransmitted. |
1051 | * 4. D-SACK added new rule: D-SACK changes any tag to S. | 1048 | * 4. D-SACK added new rule: D-SACK changes any tag to S. |
1052 | * | 1049 | * |
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | |||
1153 | } | 1150 | } |
1154 | 1151 | ||
1155 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". | 1152 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". |
1156 | * Event "C". Later note: FACK people cheated me again 8), we have to account | 1153 | * Event "B". Later note: FACK people cheated me again 8), we have to account |
1157 | * for reordering! Ugly, but should help. | 1154 | * for reordering! Ugly, but should help. |
1158 | * | 1155 | * |
1159 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was | 1156 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was |
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1844 | if (found_dup_sack && ((i + 1) == first_sack_index)) | 1841 | if (found_dup_sack && ((i + 1) == first_sack_index)) |
1845 | next_dup = &sp[i + 1]; | 1842 | next_dup = &sp[i + 1]; |
1846 | 1843 | ||
1847 | /* Event "B" in the comment above. */ | ||
1848 | if (after(end_seq, tp->high_seq)) | ||
1849 | state.flag |= FLAG_DATA_LOST; | ||
1850 | |||
1851 | /* Skip too early cached blocks */ | 1844 | /* Skip too early cached blocks */ |
1852 | while (tcp_sack_cache_ok(tp, cache) && | 1845 | while (tcp_sack_cache_ok(tp, cache) && |
1853 | !before(start_seq, cache->end_seq)) | 1846 | !before(start_seq, cache->end_seq)) |
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
2515 | tcp_verify_left_out(tp); | 2508 | tcp_verify_left_out(tp); |
2516 | } | 2509 | } |
2517 | 2510 | ||
2518 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2511 | /* Detect loss in event "A" above by marking head of queue up as lost. |
2519 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2512 | * For FACK or non-SACK(Reno) senders, the first "packets" number of segments |
2513 | * are considered lost. For RFC3517 SACK, a segment is considered lost if it | ||
2514 | * has at least tp->reordering SACKed seqments above it; "packets" refers to | ||
2515 | * the maximum SACKed segments to pass before reaching this limit. | ||
2520 | */ | 2516 | */ |
2521 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | 2517 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
2522 | { | 2518 | { |
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2525 | int cnt, oldcnt; | 2521 | int cnt, oldcnt; |
2526 | int err; | 2522 | int err; |
2527 | unsigned int mss; | 2523 | unsigned int mss; |
2524 | /* Use SACK to deduce losses of new sequences sent during recovery */ | ||
2525 | const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; | ||
2528 | 2526 | ||
2529 | WARN_ON(packets > tp->packets_out); | 2527 | WARN_ON(packets > tp->packets_out); |
2530 | if (tp->lost_skb_hint) { | 2528 | if (tp->lost_skb_hint) { |
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2546 | tp->lost_skb_hint = skb; | 2544 | tp->lost_skb_hint = skb; |
2547 | tp->lost_cnt_hint = cnt; | 2545 | tp->lost_cnt_hint = cnt; |
2548 | 2546 | ||
2549 | if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | 2547 | if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) |
2550 | break; | 2548 | break; |
2551 | 2549 | ||
2552 | oldcnt = cnt; | 2550 | oldcnt = cnt; |
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3033 | if (tcp_check_sack_reneging(sk, flag)) | 3031 | if (tcp_check_sack_reneging(sk, flag)) |
3034 | return; | 3032 | return; |
3035 | 3033 | ||
3036 | /* C. Process data loss notification, provided it is valid. */ | 3034 | /* C. Check consistency of the current state. */ |
3037 | if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && | ||
3038 | before(tp->snd_una, tp->high_seq) && | ||
3039 | icsk->icsk_ca_state != TCP_CA_Open && | ||
3040 | tp->fackets_out > tp->reordering) { | ||
3041 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); | ||
3042 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | ||
3043 | } | ||
3044 | |||
3045 | /* D. Check consistency of the current state. */ | ||
3046 | tcp_verify_left_out(tp); | 3035 | tcp_verify_left_out(tp); |
3047 | 3036 | ||
3048 | /* E. Check state exit conditions. State can be terminated | 3037 | /* D. Check state exit conditions. State can be terminated |
3049 | * when high_seq is ACKed. */ | 3038 | * when high_seq is ACKed. */ |
3050 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 3039 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
3051 | WARN_ON(tp->retrans_out != 0); | 3040 | WARN_ON(tp->retrans_out != 0); |
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3077 | } | 3066 | } |
3078 | } | 3067 | } |
3079 | 3068 | ||
3080 | /* F. Process state. */ | 3069 | /* E. Process state. */ |
3081 | switch (icsk->icsk_ca_state) { | 3070 | switch (icsk->icsk_ca_state) { |
3082 | case TCP_CA_Recovery: | 3071 | case TCP_CA_Recovery: |
3083 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { | 3072 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { |