aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c86
1 files changed, 44 insertions, 42 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..53c8ce4046b2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
106#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 106#define FLAG_DATA_SACKED 0x20 /* New SACK. */
107#define FLAG_ECE 0x40 /* ECE in this ACK */ 107#define FLAG_ECE 0x40 /* ECE in this ACK */
108#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 108#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
110#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 109#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 110#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
1040 * These 6 states form finite state machine, controlled by the following events: 1039 * These 6 states form finite state machine, controlled by the following events:
1041 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) 1040 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
1042 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) 1041 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
1043 * 3. Loss detection event of one of three flavors: 1042 * 3. Loss detection event of two flavors:
1044 * A. Scoreboard estimator decided the packet is lost. 1043 * A. Scoreboard estimator decided the packet is lost.
1045 * A'. Reno "three dupacks" marks head of queue lost. 1044 * A'. Reno "three dupacks" marks head of queue lost.
1046 * A''. Its FACK modfication, head until snd.fack is lost. 1045 * A''. Its FACK modification, head until snd.fack is lost.
1047 * B. SACK arrives sacking data transmitted after never retransmitted 1046 * B. SACK arrives sacking SND.NXT at the moment, when the
1048 * hole was sent out.
1049 * C. SACK arrives sacking SND.NXT at the moment, when the
1050 * segment was retransmitted. 1047 * segment was retransmitted.
1051 * 4. D-SACK added new rule: D-SACK changes any tag to S. 1048 * 4. D-SACK added new rule: D-SACK changes any tag to S.
1052 * 1049 *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1153} 1150}
1154 1151
1155/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". 1152/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
1156 * Event "C". Later note: FACK people cheated me again 8), we have to account 1153 * Event "B". Later note: FACK people cheated me again 8), we have to account
1157 * for reordering! Ugly, but should help. 1154 * for reordering! Ugly, but should help.
1158 * 1155 *
1159 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1156 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1310,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1310 return in_sack; 1307 return in_sack;
1311} 1308}
1312 1309
1313static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, 1310/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
1314 struct tcp_sacktag_state *state, 1311static u8 tcp_sacktag_one(struct sock *sk,
1312 struct tcp_sacktag_state *state, u8 sacked,
1313 u32 start_seq, u32 end_seq,
1315 int dup_sack, int pcount) 1314 int dup_sack, int pcount)
1316{ 1315{
1317 struct tcp_sock *tp = tcp_sk(sk); 1316 struct tcp_sock *tp = tcp_sk(sk);
1318 u8 sacked = TCP_SKB_CB(skb)->sacked;
1319 int fack_count = state->fack_count; 1317 int fack_count = state->fack_count;
1320 1318
1321 /* Account D-SACK for retransmitted packet. */ 1319 /* Account D-SACK for retransmitted packet. */
1322 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1320 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1323 if (tp->undo_marker && tp->undo_retrans && 1321 if (tp->undo_marker && tp->undo_retrans &&
1324 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1322 after(end_seq, tp->undo_marker))
1325 tp->undo_retrans--; 1323 tp->undo_retrans--;
1326 if (sacked & TCPCB_SACKED_ACKED) 1324 if (sacked & TCPCB_SACKED_ACKED)
1327 state->reord = min(fack_count, state->reord); 1325 state->reord = min(fack_count, state->reord);
1328 } 1326 }
1329 1327
1330 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1328 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1331 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 1329 if (!after(end_seq, tp->snd_una))
1332 return sacked; 1330 return sacked;
1333 1331
1334 if (!(sacked & TCPCB_SACKED_ACKED)) { 1332 if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1347,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1347 /* New sack for not retransmitted frame, 1345 /* New sack for not retransmitted frame,
1348 * which was in hole. It is reordering. 1346 * which was in hole. It is reordering.
1349 */ 1347 */
1350 if (before(TCP_SKB_CB(skb)->seq, 1348 if (before(start_seq,
1351 tcp_highest_sack_seq(tp))) 1349 tcp_highest_sack_seq(tp)))
1352 state->reord = min(fack_count, 1350 state->reord = min(fack_count,
1353 state->reord); 1351 state->reord);
1354 1352
1355 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1353 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1356 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) 1354 if (!after(end_seq, tp->frto_highmark))
1357 state->flag |= FLAG_ONLY_ORIG_SACKED; 1355 state->flag |= FLAG_ONLY_ORIG_SACKED;
1358 } 1356 }
1359 1357
@@ -1371,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1371 1369
1372 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1370 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1373 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && 1371 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1374 before(TCP_SKB_CB(skb)->seq, 1372 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1375 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1376 tp->lost_cnt_hint += pcount; 1373 tp->lost_cnt_hint += pcount;
1377 1374
1378 if (fack_count > tp->fackets_out) 1375 if (fack_count > tp->fackets_out)
@@ -1391,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1391 return sacked; 1388 return sacked;
1392} 1389}
1393 1390
1391/* Shift newly-SACKed bytes from this skb to the immediately previous
1392 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
1393 */
1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1395 struct tcp_sacktag_state *state, 1395 struct tcp_sacktag_state *state,
1396 unsigned int pcount, int shifted, int mss, 1396 unsigned int pcount, int shifted, int mss,
@@ -1398,10 +1398,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1398{ 1398{
1399 struct tcp_sock *tp = tcp_sk(sk); 1399 struct tcp_sock *tp = tcp_sk(sk);
1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb); 1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1401 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1402 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1401 1403
1402 BUG_ON(!pcount); 1404 BUG_ON(!pcount);
1403 1405
1404 if (skb == tp->lost_skb_hint) 1406 /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
1407 if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
1405 tp->lost_cnt_hint += pcount; 1408 tp->lost_cnt_hint += pcount;
1406 1409
1407 TCP_SKB_CB(prev)->end_seq += shifted; 1410 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1427,8 +1430,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1427 skb_shinfo(skb)->gso_type = 0; 1430 skb_shinfo(skb)->gso_type = 0;
1428 } 1431 }
1429 1432
1430 /* We discard results */ 1433 /* Adjust counters and hints for the newly sacked sequence range but
1431 tcp_sacktag_one(skb, sk, state, dup_sack, pcount); 1434 * discard the return value since prev is already marked.
1435 */
1436 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1437 start_seq, end_seq, dup_sack, pcount);
1432 1438
1433 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1439 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1434 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1440 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1667,10 +1673,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1667 break; 1673 break;
1668 1674
1669 if (in_sack) { 1675 if (in_sack) {
1670 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, 1676 TCP_SKB_CB(skb)->sacked =
1671 state, 1677 tcp_sacktag_one(sk,
1672 dup_sack, 1678 state,
1673 tcp_skb_pcount(skb)); 1679 TCP_SKB_CB(skb)->sacked,
1680 TCP_SKB_CB(skb)->seq,
1681 TCP_SKB_CB(skb)->end_seq,
1682 dup_sack,
1683 tcp_skb_pcount(skb));
1674 1684
1675 if (!before(TCP_SKB_CB(skb)->seq, 1685 if (!before(TCP_SKB_CB(skb)->seq,
1676 tcp_highest_sack_seq(tp))) 1686 tcp_highest_sack_seq(tp)))
@@ -1844,10 +1854,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1844 if (found_dup_sack && ((i + 1) == first_sack_index)) 1854 if (found_dup_sack && ((i + 1) == first_sack_index))
1845 next_dup = &sp[i + 1]; 1855 next_dup = &sp[i + 1];
1846 1856
1847 /* Event "B" in the comment above. */
1848 if (after(end_seq, tp->high_seq))
1849 state.flag |= FLAG_DATA_LOST;
1850
1851 /* Skip too early cached blocks */ 1857 /* Skip too early cached blocks */
1852 while (tcp_sack_cache_ok(tp, cache) && 1858 while (tcp_sack_cache_ok(tp, cache) &&
1853 !before(start_seq, cache->end_seq)) 1859 !before(start_seq, cache->end_seq))
@@ -2515,8 +2521,11 @@ static void tcp_timeout_skbs(struct sock *sk)
2515 tcp_verify_left_out(tp); 2521 tcp_verify_left_out(tp);
2516} 2522}
2517 2523
2518/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2524/* Detect loss in event "A" above by marking head of queue up as lost.
2519 * is against sacked "cnt", otherwise it's against facked "cnt" 2525 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
2526 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2527 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2528 * the maximum SACKed segments to pass before reaching this limit.
2520 */ 2529 */
2521static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) 2530static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2522{ 2531{
@@ -2525,6 +2534,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2525 int cnt, oldcnt; 2534 int cnt, oldcnt;
2526 int err; 2535 int err;
2527 unsigned int mss; 2536 unsigned int mss;
2537 /* Use SACK to deduce losses of new sequences sent during recovery */
2538 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2528 2539
2529 WARN_ON(packets > tp->packets_out); 2540 WARN_ON(packets > tp->packets_out);
2530 if (tp->lost_skb_hint) { 2541 if (tp->lost_skb_hint) {
@@ -2546,7 +2557,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2546 tp->lost_skb_hint = skb; 2557 tp->lost_skb_hint = skb;
2547 tp->lost_cnt_hint = cnt; 2558 tp->lost_cnt_hint = cnt;
2548 2559
2549 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2560 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2550 break; 2561 break;
2551 2562
2552 oldcnt = cnt; 2563 oldcnt = cnt;
@@ -3033,19 +3044,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3033 if (tcp_check_sack_reneging(sk, flag)) 3044 if (tcp_check_sack_reneging(sk, flag))
3034 return; 3045 return;
3035 3046
3036 /* C. Process data loss notification, provided it is valid. */ 3047 /* C. Check consistency of the current state. */
3037 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
3038 before(tp->snd_una, tp->high_seq) &&
3039 icsk->icsk_ca_state != TCP_CA_Open &&
3040 tp->fackets_out > tp->reordering) {
3041 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
3042 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
3043 }
3044
3045 /* D. Check consistency of the current state. */
3046 tcp_verify_left_out(tp); 3048 tcp_verify_left_out(tp);
3047 3049
3048 /* E. Check state exit conditions. State can be terminated 3050 /* D. Check state exit conditions. State can be terminated
3049 * when high_seq is ACKed. */ 3051 * when high_seq is ACKed. */
3050 if (icsk->icsk_ca_state == TCP_CA_Open) { 3052 if (icsk->icsk_ca_state == TCP_CA_Open) {
3051 WARN_ON(tp->retrans_out != 0); 3053 WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3079,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3077 } 3079 }
3078 } 3080 }
3079 3081
3080 /* F. Process state. */ 3082 /* E. Process state. */
3081 switch (icsk->icsk_ca_state) { 3083 switch (icsk->icsk_ca_state) {
3082 case TCP_CA_Recovery: 3084 case TCP_CA_Recovery:
3083 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 3085 if (!(flag & FLAG_SND_UNA_ADVANCED)) {