aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-10-11 20:35:41 -0400
committerDavid S. Miller <davem@davemloft.net>2007-10-11 20:35:41 -0400
commitf785a8e28b9d103c7473655743b6ac1bc3cd3a58 (patch)
tree44f3b88cca49ffa696a7e082cdb89e2f30550c63 /net/ipv4/tcp_input.c
parent4cd829995b86e0359796780d43d2f210cb5cf021 (diff)
[TCP]: Fix lost_retrans loop vs fastpath problems
Detection implemented with lost_retrans must work also when fastpath is taken, yet most of the queue is skipped including (very likely) those retransmitted skb's we're interested in. This problem appeared when the hints got added, which removed a need to always walk over the whole write queue head. Therefore decicion for the lost_retrans worker loop entry must be separated from the sacktag processing more than it was necessary before. It turns out to be problematic to optimize the worker loop very heavily because ack_seqs of skb may have a number of discontinuity points. Maybe similar approach as currently is implemented could be attempted but that's becoming more and more complex because the trend is towards less skb walking in sacktag marker. Trying a simple work until all rexmitted skbs heve been processed approach. Maybe after(highest_sack_end_seq, tp->high_seq) checking is not sufficiently accurate and causes entry too often in no-work-to-do cases. Since that's not known, I've separated solution to that from this patch. Noticed because of report against a related problem from TAKANO Ryousei <takano@axe-inc.co.jp>. He also provided a patch to that part of the problem. This patch includes solution to it (though this patch has to use somewhat different placement). TAKANO's description and patch is available here: http://marc.info/?l=linux-netdev&m=119149311913288&w=2 ...In short, TAKANO's problem is that end_seq the loop is using not necessarily the largest SACK block's end_seq because the current ACK may still have higher SACK blocks which are later by the loop. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c37
1 files changed, 22 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a0660391213..d5e0fcc22a3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1109,27 +1109,34 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1109/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". 1109/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
1110 * Event "C". Later note: FACK people cheated me again 8), we have to account 1110 * Event "C". Later note: FACK people cheated me again 8), we have to account
1111 * for reordering! Ugly, but should help. 1111 * for reordering! Ugly, but should help.
1112 *
1113 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1114 * less than what is now known to be received by the other end (derived from
1115 * SACK blocks by the caller).
1112 */ 1116 */
1113static int tcp_mark_lost_retrans(struct sock *sk, u32 lost_retrans) 1117static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1114{ 1118{
1115 struct tcp_sock *tp = tcp_sk(sk); 1119 struct tcp_sock *tp = tcp_sk(sk);
1116 struct sk_buff *skb; 1120 struct sk_buff *skb;
1117 int flag = 0; 1121 int flag = 0;
1122 int cnt = 0;
1118 1123
1119 tcp_for_write_queue(skb, sk) { 1124 tcp_for_write_queue(skb, sk) {
1120 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; 1125 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1121 1126
1122 if (skb == tcp_send_head(sk)) 1127 if (skb == tcp_send_head(sk))
1123 break; 1128 break;
1124 if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) 1129 if (cnt == tp->retrans_out)
1125 break; 1130 break;
1126 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 1131 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1127 continue; 1132 continue;
1128 1133
1129 if ((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) && 1134 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1130 after(lost_retrans, ack_seq) && 1135 continue;
1136
1137 if (after(received_upto, ack_seq) &&
1131 (tcp_is_fack(tp) || 1138 (tcp_is_fack(tp) ||
1132 !before(lost_retrans, 1139 !before(received_upto,
1133 ack_seq + tp->reordering * tp->mss_cache))) { 1140 ack_seq + tp->reordering * tp->mss_cache))) {
1134 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1141 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1135 tp->retrans_out -= tcp_skb_pcount(skb); 1142 tp->retrans_out -= tcp_skb_pcount(skb);
@@ -1143,6 +1150,8 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 lost_retrans)
1143 flag |= FLAG_DATA_SACKED; 1150 flag |= FLAG_DATA_SACKED;
1144 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); 1151 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1145 } 1152 }
1153 } else {
1154 cnt += tcp_skb_pcount(skb);
1146 } 1155 }
1147 } 1156 }
1148 return flag; 1157 return flag;
@@ -1225,7 +1234,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1225 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 1234 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
1226 int reord = tp->packets_out; 1235 int reord = tp->packets_out;
1227 int prior_fackets; 1236 int prior_fackets;
1228 u32 lost_retrans = 0; 1237 u32 highest_sack_end_seq = 0;
1229 int flag = 0; 1238 int flag = 0;
1230 int found_dup_sack = 0; 1239 int found_dup_sack = 0;
1231 int cached_fack_count; 1240 int cached_fack_count;
@@ -1396,11 +1405,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1396 continue; 1405 continue;
1397 } 1406 }
1398 1407
1399 if ((sacked&TCPCB_SACKED_RETRANS) &&
1400 after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
1401 (!lost_retrans || after(end_seq, lost_retrans)))
1402 lost_retrans = end_seq;
1403
1404 if (!in_sack) 1408 if (!in_sack)
1405 continue; 1409 continue;
1406 1410
@@ -1454,9 +1458,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1454 if (fack_count > tp->fackets_out) 1458 if (fack_count > tp->fackets_out)
1455 tp->fackets_out = fack_count; 1459 tp->fackets_out = fack_count;
1456 1460
1457 if (after(TCP_SKB_CB(skb)->seq, 1461 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
1458 tp->highest_sack))
1459 tp->highest_sack = TCP_SKB_CB(skb)->seq; 1462 tp->highest_sack = TCP_SKB_CB(skb)->seq;
1463 highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
1464 }
1460 } else { 1465 } else {
1461 if (dup_sack && (sacked&TCPCB_RETRANS)) 1466 if (dup_sack && (sacked&TCPCB_RETRANS))
1462 reord = min(fack_count, reord); 1467 reord = min(fack_count, reord);
@@ -1476,8 +1481,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1476 } 1481 }
1477 } 1482 }
1478 1483
1479 if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) 1484 if (tp->retrans_out && highest_sack_end_seq &&
1480 flag |= tcp_mark_lost_retrans(sk, lost_retrans); 1485 after(highest_sack_end_seq, tp->high_seq) &&
1486 icsk->icsk_ca_state == TCP_CA_Recovery)
1487 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
1481 1488
1482 tcp_verify_left_out(tp); 1489 tcp_verify_left_out(tp);
1483 1490