diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 86 |
1 files changed, 44 insertions, 42 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2877c3e09587..53c8ce4046b2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; | |||
105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ | 105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ |
106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ | 106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ |
107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
108 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | ||
109 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 108 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
110 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | 109 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ |
111 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ | 110 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ |
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, | |||
1040 | * These 6 states form finite state machine, controlled by the following events: | 1039 | * These 6 states form finite state machine, controlled by the following events: |
1041 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) | 1040 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) |
1042 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) | 1041 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) |
1043 | * 3. Loss detection event of one of three flavors: | 1042 | * 3. Loss detection event of two flavors: |
1044 | * A. Scoreboard estimator decided the packet is lost. | 1043 | * A. Scoreboard estimator decided the packet is lost. |
1045 | * A'. Reno "three dupacks" marks head of queue lost. | 1044 | * A'. Reno "three dupacks" marks head of queue lost. |
1046 | * A''. Its FACK modfication, head until snd.fack is lost. | 1045 | * A''. Its FACK modification, head until snd.fack is lost. |
1047 | * B. SACK arrives sacking data transmitted after never retransmitted | 1046 | * B. SACK arrives sacking SND.NXT at the moment, when the |
1048 | * hole was sent out. | ||
1049 | * C. SACK arrives sacking SND.NXT at the moment, when the | ||
1050 | * segment was retransmitted. | 1047 | * segment was retransmitted. |
1051 | * 4. D-SACK added new rule: D-SACK changes any tag to S. | 1048 | * 4. D-SACK added new rule: D-SACK changes any tag to S. |
1052 | * | 1049 | * |
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | |||
1153 | } | 1150 | } |
1154 | 1151 | ||
1155 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". | 1152 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". |
1156 | * Event "C". Later note: FACK people cheated me again 8), we have to account | 1153 | * Event "B". Later note: FACK people cheated me again 8), we have to account |
1157 | * for reordering! Ugly, but should help. | 1154 | * for reordering! Ugly, but should help. |
1158 | * | 1155 | * |
1159 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was | 1156 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was |
@@ -1310,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1310 | return in_sack; | 1307 | return in_sack; |
1311 | } | 1308 | } |
1312 | 1309 | ||
1313 | static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | 1310 | /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ |
1314 | struct tcp_sacktag_state *state, | 1311 | static u8 tcp_sacktag_one(struct sock *sk, |
1312 | struct tcp_sacktag_state *state, u8 sacked, | ||
1313 | u32 start_seq, u32 end_seq, | ||
1315 | int dup_sack, int pcount) | 1314 | int dup_sack, int pcount) |
1316 | { | 1315 | { |
1317 | struct tcp_sock *tp = tcp_sk(sk); | 1316 | struct tcp_sock *tp = tcp_sk(sk); |
1318 | u8 sacked = TCP_SKB_CB(skb)->sacked; | ||
1319 | int fack_count = state->fack_count; | 1317 | int fack_count = state->fack_count; |
1320 | 1318 | ||
1321 | /* Account D-SACK for retransmitted packet. */ | 1319 | /* Account D-SACK for retransmitted packet. */ |
1322 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1320 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1323 | if (tp->undo_marker && tp->undo_retrans && | 1321 | if (tp->undo_marker && tp->undo_retrans && |
1324 | after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1322 | after(end_seq, tp->undo_marker)) |
1325 | tp->undo_retrans--; | 1323 | tp->undo_retrans--; |
1326 | if (sacked & TCPCB_SACKED_ACKED) | 1324 | if (sacked & TCPCB_SACKED_ACKED) |
1327 | state->reord = min(fack_count, state->reord); | 1325 | state->reord = min(fack_count, state->reord); |
1328 | } | 1326 | } |
1329 | 1327 | ||
1330 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ | 1328 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ |
1331 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1329 | if (!after(end_seq, tp->snd_una)) |
1332 | return sacked; | 1330 | return sacked; |
1333 | 1331 | ||
1334 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 1332 | if (!(sacked & TCPCB_SACKED_ACKED)) { |
@@ -1347,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1347 | /* New sack for not retransmitted frame, | 1345 | /* New sack for not retransmitted frame, |
1348 | * which was in hole. It is reordering. | 1346 | * which was in hole. It is reordering. |
1349 | */ | 1347 | */ |
1350 | if (before(TCP_SKB_CB(skb)->seq, | 1348 | if (before(start_seq, |
1351 | tcp_highest_sack_seq(tp))) | 1349 | tcp_highest_sack_seq(tp))) |
1352 | state->reord = min(fack_count, | 1350 | state->reord = min(fack_count, |
1353 | state->reord); | 1351 | state->reord); |
1354 | 1352 | ||
1355 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1353 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ |
1356 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) | 1354 | if (!after(end_seq, tp->frto_highmark)) |
1357 | state->flag |= FLAG_ONLY_ORIG_SACKED; | 1355 | state->flag |= FLAG_ONLY_ORIG_SACKED; |
1358 | } | 1356 | } |
1359 | 1357 | ||
@@ -1371,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1371 | 1369 | ||
1372 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1370 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
1373 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1371 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && |
1374 | before(TCP_SKB_CB(skb)->seq, | 1372 | before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
1375 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) | ||
1376 | tp->lost_cnt_hint += pcount; | 1373 | tp->lost_cnt_hint += pcount; |
1377 | 1374 | ||
1378 | if (fack_count > tp->fackets_out) | 1375 | if (fack_count > tp->fackets_out) |
@@ -1391,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1391 | return sacked; | 1388 | return sacked; |
1392 | } | 1389 | } |
1393 | 1390 | ||
1391 | /* Shift newly-SACKed bytes from this skb to the immediately previous | ||
1392 | * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. | ||
1393 | */ | ||
1394 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | 1394 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, |
1395 | struct tcp_sacktag_state *state, | 1395 | struct tcp_sacktag_state *state, |
1396 | unsigned int pcount, int shifted, int mss, | 1396 | unsigned int pcount, int shifted, int mss, |
@@ -1398,10 +1398,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1398 | { | 1398 | { |
1399 | struct tcp_sock *tp = tcp_sk(sk); | 1399 | struct tcp_sock *tp = tcp_sk(sk); |
1400 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | 1400 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); |
1401 | u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ | ||
1402 | u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ | ||
1401 | 1403 | ||
1402 | BUG_ON(!pcount); | 1404 | BUG_ON(!pcount); |
1403 | 1405 | ||
1404 | if (skb == tp->lost_skb_hint) | 1406 | /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ |
1407 | if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) | ||
1405 | tp->lost_cnt_hint += pcount; | 1408 | tp->lost_cnt_hint += pcount; |
1406 | 1409 | ||
1407 | TCP_SKB_CB(prev)->end_seq += shifted; | 1410 | TCP_SKB_CB(prev)->end_seq += shifted; |
@@ -1427,8 +1430,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1427 | skb_shinfo(skb)->gso_type = 0; | 1430 | skb_shinfo(skb)->gso_type = 0; |
1428 | } | 1431 | } |
1429 | 1432 | ||
1430 | /* We discard results */ | 1433 | /* Adjust counters and hints for the newly sacked sequence range but |
1431 | tcp_sacktag_one(skb, sk, state, dup_sack, pcount); | 1434 | * discard the return value since prev is already marked. |
1435 | */ | ||
1436 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | ||
1437 | start_seq, end_seq, dup_sack, pcount); | ||
1432 | 1438 | ||
1433 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1439 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
1434 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1440 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
@@ -1667,10 +1673,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1667 | break; | 1673 | break; |
1668 | 1674 | ||
1669 | if (in_sack) { | 1675 | if (in_sack) { |
1670 | TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, | 1676 | TCP_SKB_CB(skb)->sacked = |
1671 | state, | 1677 | tcp_sacktag_one(sk, |
1672 | dup_sack, | 1678 | state, |
1673 | tcp_skb_pcount(skb)); | 1679 | TCP_SKB_CB(skb)->sacked, |
1680 | TCP_SKB_CB(skb)->seq, | ||
1681 | TCP_SKB_CB(skb)->end_seq, | ||
1682 | dup_sack, | ||
1683 | tcp_skb_pcount(skb)); | ||
1674 | 1684 | ||
1675 | if (!before(TCP_SKB_CB(skb)->seq, | 1685 | if (!before(TCP_SKB_CB(skb)->seq, |
1676 | tcp_highest_sack_seq(tp))) | 1686 | tcp_highest_sack_seq(tp))) |
@@ -1844,10 +1854,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1844 | if (found_dup_sack && ((i + 1) == first_sack_index)) | 1854 | if (found_dup_sack && ((i + 1) == first_sack_index)) |
1845 | next_dup = &sp[i + 1]; | 1855 | next_dup = &sp[i + 1]; |
1846 | 1856 | ||
1847 | /* Event "B" in the comment above. */ | ||
1848 | if (after(end_seq, tp->high_seq)) | ||
1849 | state.flag |= FLAG_DATA_LOST; | ||
1850 | |||
1851 | /* Skip too early cached blocks */ | 1857 | /* Skip too early cached blocks */ |
1852 | while (tcp_sack_cache_ok(tp, cache) && | 1858 | while (tcp_sack_cache_ok(tp, cache) && |
1853 | !before(start_seq, cache->end_seq)) | 1859 | !before(start_seq, cache->end_seq)) |
@@ -2515,8 +2521,11 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
2515 | tcp_verify_left_out(tp); | 2521 | tcp_verify_left_out(tp); |
2516 | } | 2522 | } |
2517 | 2523 | ||
2518 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2524 | /* Detect loss in event "A" above by marking head of queue up as lost. |
2519 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2525 | * For FACK or non-SACK(Reno) senders, the first "packets" number of segments |
2526 | * are considered lost. For RFC3517 SACK, a segment is considered lost if it | ||
2527 | * has at least tp->reordering SACKed seqments above it; "packets" refers to | ||
2528 | * the maximum SACKed segments to pass before reaching this limit. | ||
2520 | */ | 2529 | */ |
2521 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | 2530 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
2522 | { | 2531 | { |
@@ -2525,6 +2534,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2525 | int cnt, oldcnt; | 2534 | int cnt, oldcnt; |
2526 | int err; | 2535 | int err; |
2527 | unsigned int mss; | 2536 | unsigned int mss; |
2537 | /* Use SACK to deduce losses of new sequences sent during recovery */ | ||
2538 | const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; | ||
2528 | 2539 | ||
2529 | WARN_ON(packets > tp->packets_out); | 2540 | WARN_ON(packets > tp->packets_out); |
2530 | if (tp->lost_skb_hint) { | 2541 | if (tp->lost_skb_hint) { |
@@ -2546,7 +2557,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2546 | tp->lost_skb_hint = skb; | 2557 | tp->lost_skb_hint = skb; |
2547 | tp->lost_cnt_hint = cnt; | 2558 | tp->lost_cnt_hint = cnt; |
2548 | 2559 | ||
2549 | if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | 2560 | if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) |
2550 | break; | 2561 | break; |
2551 | 2562 | ||
2552 | oldcnt = cnt; | 2563 | oldcnt = cnt; |
@@ -3033,19 +3044,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3033 | if (tcp_check_sack_reneging(sk, flag)) | 3044 | if (tcp_check_sack_reneging(sk, flag)) |
3034 | return; | 3045 | return; |
3035 | 3046 | ||
3036 | /* C. Process data loss notification, provided it is valid. */ | 3047 | /* C. Check consistency of the current state. */ |
3037 | if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && | ||
3038 | before(tp->snd_una, tp->high_seq) && | ||
3039 | icsk->icsk_ca_state != TCP_CA_Open && | ||
3040 | tp->fackets_out > tp->reordering) { | ||
3041 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); | ||
3042 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | ||
3043 | } | ||
3044 | |||
3045 | /* D. Check consistency of the current state. */ | ||
3046 | tcp_verify_left_out(tp); | 3048 | tcp_verify_left_out(tp); |
3047 | 3049 | ||
3048 | /* E. Check state exit conditions. State can be terminated | 3050 | /* D. Check state exit conditions. State can be terminated |
3049 | * when high_seq is ACKed. */ | 3051 | * when high_seq is ACKed. */ |
3050 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 3052 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
3051 | WARN_ON(tp->retrans_out != 0); | 3053 | WARN_ON(tp->retrans_out != 0); |
@@ -3077,7 +3079,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3077 | } | 3079 | } |
3078 | } | 3080 | } |
3079 | 3081 | ||
3080 | /* F. Process state. */ | 3082 | /* E. Process state. */ |
3081 | switch (icsk->icsk_ca_state) { | 3083 | switch (icsk->icsk_ca_state) { |
3082 | case TCP_CA_Recovery: | 3084 | case TCP_CA_Recovery: |
3083 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { | 3085 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { |