aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorMike Maloney <maloney@google.com>2017-08-22 17:08:48 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-23 23:30:47 -0400
commit98aaa913b4ed250324429f0a9e6d5f77a3b5276c (patch)
tree651cb3f820b76e3b199d25c4997dd0e494cdcc06 /net/ipv4/tcp_input.c
parentb28547728d4fd42a004df2b662724e16ff778db6 (diff)
tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg
When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the timestamp corresponding to the highest sequence number data returned. Previously the skb->tstamp is overwritten when a TCP packet is placed in the out of order queue. While the packet is in the ooo queue, save the timestamp in the TCB_SKB_CB. This space is shared with the gso_* options which are only used on the tx path, and a previously unused 4 byte hole. When skbs are coalesced either in the sk_receive_queue or the out_of_order_queue always choose the timestamp of the appended skb to maintain the invariant of returning the timestamp of the last byte in the recvmsg buffer. Signed-off-by: Mike Maloney <maloney@google.com> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c35
1 files changed, 31 insertions, 4 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d3421ee9a10a..568ccfd6dd37 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4246,9 +4246,15 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4246 tp->rx_opt.num_sacks = num_sacks; 4246 tp->rx_opt.num_sacks = num_sacks;
4247} 4247}
4248 4248
4249enum tcp_queue {
4250 OOO_QUEUE,
4251 RCV_QUEUE,
4252};
4253
4249/** 4254/**
4250 * tcp_try_coalesce - try to merge skb to prior one 4255 * tcp_try_coalesce - try to merge skb to prior one
4251 * @sk: socket 4256 * @sk: socket
4257 * @dest: destination queue
4252 * @to: prior buffer 4258 * @to: prior buffer
4253 * @from: buffer to add in queue 4259 * @from: buffer to add in queue
4254 * @fragstolen: pointer to boolean 4260 * @fragstolen: pointer to boolean
@@ -4260,6 +4266,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4260 * Returns true if caller should free @from instead of queueing it 4266 * Returns true if caller should free @from instead of queueing it
4261 */ 4267 */
4262static bool tcp_try_coalesce(struct sock *sk, 4268static bool tcp_try_coalesce(struct sock *sk,
4269 enum tcp_queue dest,
4263 struct sk_buff *to, 4270 struct sk_buff *to,
4264 struct sk_buff *from, 4271 struct sk_buff *from,
4265 bool *fragstolen) 4272 bool *fragstolen)
@@ -4281,6 +4288,15 @@ static bool tcp_try_coalesce(struct sock *sk,
4281 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; 4288 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4282 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; 4289 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4283 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; 4290 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4291
4292 if (TCP_SKB_CB(from)->has_rxtstamp) {
4293 TCP_SKB_CB(to)->has_rxtstamp = true;
4294 if (dest == OOO_QUEUE)
4295 TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
4296 else
4297 to->tstamp = from->tstamp;
4298 }
4299
4284 return true; 4300 return true;
4285} 4301}
4286 4302
@@ -4315,6 +4331,9 @@ static void tcp_ofo_queue(struct sock *sk)
4315 } 4331 }
4316 p = rb_next(p); 4332 p = rb_next(p);
4317 rb_erase(&skb->rbnode, &tp->out_of_order_queue); 4333 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4334 /* Replace tstamp which was stomped by rbnode */
4335 if (TCP_SKB_CB(skb)->has_rxtstamp)
4336 skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
4318 4337
4319 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { 4338 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4320 SOCK_DEBUG(sk, "ofo packet was already received\n"); 4339 SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4326,7 +4345,8 @@ static void tcp_ofo_queue(struct sock *sk)
4326 TCP_SKB_CB(skb)->end_seq); 4345 TCP_SKB_CB(skb)->end_seq);
4327 4346
4328 tail = skb_peek_tail(&sk->sk_receive_queue); 4347 tail = skb_peek_tail(&sk->sk_receive_queue);
4329 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); 4348 eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
4349 tail, skb, &fragstolen);
4330 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 4350 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4331 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 4351 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4332 if (!eaten) 4352 if (!eaten)
@@ -4380,6 +4400,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4380 return; 4400 return;
4381 } 4401 }
4382 4402
4403 /* Stash tstamp to avoid being stomped on by rbnode */
4404 if (TCP_SKB_CB(skb)->has_rxtstamp)
4405 TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
4406
4383 inet_csk_schedule_ack(sk); 4407 inet_csk_schedule_ack(sk);
4384 4408
4385 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); 4409 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4405,7 +4429,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4405 /* In the typical case, we are adding an skb to the end of the list. 4429 /* In the typical case, we are adding an skb to the end of the list.
4406 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. 4430 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4407 */ 4431 */
4408 if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { 4432 if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
4433 skb, &fragstolen)) {
4409coalesce_done: 4434coalesce_done:
4410 tcp_grow_window(sk, skb); 4435 tcp_grow_window(sk, skb);
4411 kfree_skb_partial(skb, fragstolen); 4436 kfree_skb_partial(skb, fragstolen);
@@ -4455,7 +4480,8 @@ coalesce_done:
4455 __kfree_skb(skb1); 4480 __kfree_skb(skb1);
4456 goto merge_right; 4481 goto merge_right;
4457 } 4482 }
4458 } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { 4483 } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
4484 skb, &fragstolen)) {
4459 goto coalesce_done; 4485 goto coalesce_done;
4460 } 4486 }
4461 p = &parent->rb_right; 4487 p = &parent->rb_right;
@@ -4506,7 +4532,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4506 4532
4507 __skb_pull(skb, hdrlen); 4533 __skb_pull(skb, hdrlen);
4508 eaten = (tail && 4534 eaten = (tail &&
4509 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; 4535 tcp_try_coalesce(sk, RCV_QUEUE, tail,
4536 skb, fragstolen)) ? 1 : 0;
4510 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); 4537 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4511 if (!eaten) { 4538 if (!eaten) {
4512 __skb_queue_tail(&sk->sk_receive_queue, skb); 4539 __skb_queue_tail(&sk->sk_receive_queue, skb);