aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-10-04 15:59:58 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-06 00:24:47 -0400
commite2080072ed2d98a55ae69d95dea60ff7a17cddd5 (patch)
tree6ad479e3db638db9c6469cab047b59685d04c5b1 /net/ipv4/tcp_input.c
parentb1fb67fa501c4787035317f84db6caf013385581 (diff)
tcp: new list for sent but unacked skbs for RACK recovery
This patch adds a new queue (list) that tracks the sent but not yet acked or SACKed skbs for a TCP connection. The list is chronologically ordered by skb->skb_mstamp (the head is the oldest sent skb). This list will be used to optimize TCP Rack recovery, which checks an skb's timestamp to judge if it has been lost and needs to be retransmitted. Since TCP write queue is ordered by sequence instead of sent time, RACK has to scan over the write queue to catch all eligible packets to detect lost retransmission, and iterates through SACKed skbs repeatedly. Special cares for rare events: 1. TCP repair fakes skb transmission so the send queue needs adjusted 2. SACK reneging would require re-inserting SACKed skbs into the send queue. For now I believe it's not worth the complexity to make RACK work perfectly on SACK reneging, so we do nothing here. 3. Fast Open: currently for non-TFO, send-queue correctly queues the pure SYN packet. For TFO which queues a pure SYN and then a data packet, send-queue only queues the data packet but not the pure SYN due to the structure of TFO code. This is okay because the SYN receiver would never respond with a SACK on a missing SYN (i.e. SYN is never fast-retransmitted by SACK/RACK). In order to not grow sk_buff, we use an union for the new list and _skb_refdst/destructor fields. This is a bit complicated because we need to make sure _skb_refdst and destructor are properly zeroed before skb is cloned/copied at transmit, and before being freed. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5b8d61846c2..fb0d7ed84b94 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1593,6 +1593,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1593 tcp_skb_pcount(skb), 1593 tcp_skb_pcount(skb),
1594 skb->skb_mstamp); 1594 skb->skb_mstamp);
1595 tcp_rate_skb_delivered(sk, skb, state->rate); 1595 tcp_rate_skb_delivered(sk, skb, state->rate);
1596 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1597 list_del_init(&skb->tcp_tsorted_anchor);
1596 1598
1597 if (!before(TCP_SKB_CB(skb)->seq, 1599 if (!before(TCP_SKB_CB(skb)->seq,
1598 tcp_highest_sack_seq(tp))) 1600 tcp_highest_sack_seq(tp)))
@@ -3054,8 +3056,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3054 3056
3055 shinfo = skb_shinfo(skb); 3057 shinfo = skb_shinfo(skb);
3056 if (!before(shinfo->tskey, prior_snd_una) && 3058 if (!before(shinfo->tskey, prior_snd_una) &&
3057 before(shinfo->tskey, tcp_sk(sk)->snd_una)) 3059 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3058 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3060 tcp_skb_tsorted_save(skb) {
3061 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3062 } tcp_skb_tsorted_restore(skb);
3063 }
3059} 3064}
3060 3065
3061/* Remove acknowledged frames from the retransmission queue. If our packet 3066/* Remove acknowledged frames from the retransmission queue. If our packet