diff options
author | Mike Maloney <maloney@google.com> | 2017-08-22 17:08:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-23 23:30:47 -0400 |
commit | 98aaa913b4ed250324429f0a9e6d5f77a3b5276c (patch) | |
tree | 651cb3f820b76e3b199d25c4997dd0e494cdcc06 /net/ipv4/tcp_input.c | |
parent | b28547728d4fd42a004df2b662724e16ff778db6 (diff) |
tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg
When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the
timestamp corresponding to the highest sequence number data returned.
Previously the skb->tstamp is overwritten when a TCP packet is placed
in the out of order queue. While the packet is in the ooo queue, save the
timestamp in the TCB_SKB_CB. This space is shared with the gso_*
options which are only used on the tx path, and a previously unused 4
byte hole.
When skbs are coalesced either in the sk_receive_queue or the
out_of_order_queue always choose the timestamp of the appended skb to
maintain the invariant of returning the timestamp of the last byte in
the recvmsg buffer.
Signed-off-by: Mike Maloney <maloney@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 35 |
1 files changed, 31 insertions, 4 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d3421ee9a10a..568ccfd6dd37 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4246,9 +4246,15 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4246 | tp->rx_opt.num_sacks = num_sacks; | 4246 | tp->rx_opt.num_sacks = num_sacks; |
4247 | } | 4247 | } |
4248 | 4248 | ||
4249 | enum tcp_queue { | ||
4250 | OOO_QUEUE, | ||
4251 | RCV_QUEUE, | ||
4252 | }; | ||
4253 | |||
4249 | /** | 4254 | /** |
4250 | * tcp_try_coalesce - try to merge skb to prior one | 4255 | * tcp_try_coalesce - try to merge skb to prior one |
4251 | * @sk: socket | 4256 | * @sk: socket |
4257 | * @dest: destination queue | ||
4252 | * @to: prior buffer | 4258 | * @to: prior buffer |
4253 | * @from: buffer to add in queue | 4259 | * @from: buffer to add in queue |
4254 | * @fragstolen: pointer to boolean | 4260 | * @fragstolen: pointer to boolean |
@@ -4260,6 +4266,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4260 | * Returns true if caller should free @from instead of queueing it | 4266 | * Returns true if caller should free @from instead of queueing it |
4261 | */ | 4267 | */ |
4262 | static bool tcp_try_coalesce(struct sock *sk, | 4268 | static bool tcp_try_coalesce(struct sock *sk, |
4269 | enum tcp_queue dest, | ||
4263 | struct sk_buff *to, | 4270 | struct sk_buff *to, |
4264 | struct sk_buff *from, | 4271 | struct sk_buff *from, |
4265 | bool *fragstolen) | 4272 | bool *fragstolen) |
@@ -4281,6 +4288,15 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4281 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | 4288 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; |
4282 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | 4289 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; |
4283 | TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; | 4290 | TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; |
4291 | |||
4292 | if (TCP_SKB_CB(from)->has_rxtstamp) { | ||
4293 | TCP_SKB_CB(to)->has_rxtstamp = true; | ||
4294 | if (dest == OOO_QUEUE) | ||
4295 | TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; | ||
4296 | else | ||
4297 | to->tstamp = from->tstamp; | ||
4298 | } | ||
4299 | |||
4284 | return true; | 4300 | return true; |
4285 | } | 4301 | } |
4286 | 4302 | ||
@@ -4315,6 +4331,9 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4315 | } | 4331 | } |
4316 | p = rb_next(p); | 4332 | p = rb_next(p); |
4317 | rb_erase(&skb->rbnode, &tp->out_of_order_queue); | 4333 | rb_erase(&skb->rbnode, &tp->out_of_order_queue); |
4334 | /* Replace tstamp which was stomped by rbnode */ | ||
4335 | if (TCP_SKB_CB(skb)->has_rxtstamp) | ||
4336 | skb->tstamp = TCP_SKB_CB(skb)->swtstamp; | ||
4318 | 4337 | ||
4319 | if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { | 4338 | if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { |
4320 | SOCK_DEBUG(sk, "ofo packet was already received\n"); | 4339 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
@@ -4326,7 +4345,8 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4326 | TCP_SKB_CB(skb)->end_seq); | 4345 | TCP_SKB_CB(skb)->end_seq); |
4327 | 4346 | ||
4328 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4347 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4329 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | 4348 | eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, |
4349 | tail, skb, &fragstolen); | ||
4330 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); | 4350 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4331 | fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; | 4351 | fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; |
4332 | if (!eaten) | 4352 | if (!eaten) |
@@ -4380,6 +4400,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4380 | return; | 4400 | return; |
4381 | } | 4401 | } |
4382 | 4402 | ||
4403 | /* Stash tstamp to avoid being stomped on by rbnode */ | ||
4404 | if (TCP_SKB_CB(skb)->has_rxtstamp) | ||
4405 | TCP_SKB_CB(skb)->swtstamp = skb->tstamp; | ||
4406 | |||
4383 | inet_csk_schedule_ack(sk); | 4407 | inet_csk_schedule_ack(sk); |
4384 | 4408 | ||
4385 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); | 4409 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); |
@@ -4405,7 +4429,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4405 | /* In the typical case, we are adding an skb to the end of the list. | 4429 | /* In the typical case, we are adding an skb to the end of the list. |
4406 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4430 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4407 | */ | 4431 | */ |
4408 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { | 4432 | if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, |
4433 | skb, &fragstolen)) { | ||
4409 | coalesce_done: | 4434 | coalesce_done: |
4410 | tcp_grow_window(sk, skb); | 4435 | tcp_grow_window(sk, skb); |
4411 | kfree_skb_partial(skb, fragstolen); | 4436 | kfree_skb_partial(skb, fragstolen); |
@@ -4455,7 +4480,8 @@ coalesce_done: | |||
4455 | __kfree_skb(skb1); | 4480 | __kfree_skb(skb1); |
4456 | goto merge_right; | 4481 | goto merge_right; |
4457 | } | 4482 | } |
4458 | } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | 4483 | } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, |
4484 | skb, &fragstolen)) { | ||
4459 | goto coalesce_done; | 4485 | goto coalesce_done; |
4460 | } | 4486 | } |
4461 | p = &parent->rb_right; | 4487 | p = &parent->rb_right; |
@@ -4506,7 +4532,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4506 | 4532 | ||
4507 | __skb_pull(skb, hdrlen); | 4533 | __skb_pull(skb, hdrlen); |
4508 | eaten = (tail && | 4534 | eaten = (tail && |
4509 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | 4535 | tcp_try_coalesce(sk, RCV_QUEUE, tail, |
4536 | skb, fragstolen)) ? 1 : 0; | ||
4510 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); | 4537 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
4511 | if (!eaten) { | 4538 | if (!eaten) { |
4512 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4539 | __skb_queue_tail(&sk->sk_receive_queue, skb); |