diff options
author | Eric Dumazet <edumazet@google.com> | 2017-09-19 08:14:24 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-09-19 18:20:22 -0400 |
commit | bffa72cf7f9df842f0016ba03586039296b4caaf (patch) | |
tree | e27752d22b270c0c607ff8025e44dd8f884aa1cb | |
parent | a38b2fa37e2e2ac897e7159738c5763ee65ee405 (diff) |
net: sk_buff rbnode reorg
skb->rbnode shares space with skb->next, skb->prev and skb->tstamp
Current uses (TCP receive ofo queue and netem) need to save/restore
tstamp, while skb->dev is either NULL (TCP) or a constant for a given
queue (netem).
Since we plan using an RB tree for TCP retransmit queue to speedup SACK
processing with large BDP, this patch exchanges skb->dev and
skb->tstamp.
This saves some overhead in both TCP and netem.
v2: removes the swtstamp field from struct tcp_skb_cb
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 16 | ||||
-rw-r--r-- | include/net/tcp.h | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 27 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 7 |
4 files changed, 17 insertions, 39 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..492828801acb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -661,8 +661,12 @@ struct sk_buff { | |||
661 | struct sk_buff *prev; | 661 | struct sk_buff *prev; |
662 | 662 | ||
663 | union { | 663 | union { |
664 | ktime_t tstamp; | 664 | struct net_device *dev; |
665 | u64 skb_mstamp; | 665 | /* Some protocols might use this space to store information, |
666 | * while device pointer would be NULL. | ||
667 | * UDP receive path is one user. | ||
668 | */ | ||
669 | unsigned long dev_scratch; | ||
666 | }; | 670 | }; |
667 | }; | 671 | }; |
668 | struct rb_node rbnode; /* used in netem & tcp stack */ | 672 | struct rb_node rbnode; /* used in netem & tcp stack */ |
@@ -670,12 +674,8 @@ struct sk_buff { | |||
670 | struct sock *sk; | 674 | struct sock *sk; |
671 | 675 | ||
672 | union { | 676 | union { |
673 | struct net_device *dev; | 677 | ktime_t tstamp; |
674 | /* Some protocols might use this space to store information, | 678 | u64 skb_mstamp; |
675 | * while device pointer would be NULL. | ||
676 | * UDP receive path is one user. | ||
677 | */ | ||
678 | unsigned long dev_scratch; | ||
679 | }; | 679 | }; |
680 | /* | 680 | /* |
681 | * This is the control buffer. It is free to use for every | 681 | * This is the control buffer. It is free to use for every |
diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..49a8a46466f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -797,12 +797,6 @@ struct tcp_skb_cb { | |||
797 | u16 tcp_gso_segs; | 797 | u16 tcp_gso_segs; |
798 | u16 tcp_gso_size; | 798 | u16 tcp_gso_size; |
799 | }; | 799 | }; |
800 | |||
801 | /* Used to stash the receive timestamp while this skb is in the | ||
802 | * out of order queue, as skb->tstamp is overwritten by the | ||
803 | * rbnode. | ||
804 | */ | ||
805 | ktime_t swtstamp; | ||
806 | }; | 800 | }; |
807 | __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ | 801 | __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ |
808 | 802 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bddf724f5c02..db9bb46b5776 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4266 | tp->rx_opt.num_sacks = num_sacks; | 4266 | tp->rx_opt.num_sacks = num_sacks; |
4267 | } | 4267 | } |
4268 | 4268 | ||
4269 | enum tcp_queue { | ||
4270 | OOO_QUEUE, | ||
4271 | RCV_QUEUE, | ||
4272 | }; | ||
4273 | |||
4274 | /** | 4269 | /** |
4275 | * tcp_try_coalesce - try to merge skb to prior one | 4270 | * tcp_try_coalesce - try to merge skb to prior one |
4276 | * @sk: socket | 4271 | * @sk: socket |
@@ -4286,7 +4281,6 @@ enum tcp_queue { | |||
4286 | * Returns true if caller should free @from instead of queueing it | 4281 | * Returns true if caller should free @from instead of queueing it |
4287 | */ | 4282 | */ |
4288 | static bool tcp_try_coalesce(struct sock *sk, | 4283 | static bool tcp_try_coalesce(struct sock *sk, |
4289 | enum tcp_queue dest, | ||
4290 | struct sk_buff *to, | 4284 | struct sk_buff *to, |
4291 | struct sk_buff *from, | 4285 | struct sk_buff *from, |
4292 | bool *fragstolen) | 4286 | bool *fragstolen) |
@@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4311 | 4305 | ||
4312 | if (TCP_SKB_CB(from)->has_rxtstamp) { | 4306 | if (TCP_SKB_CB(from)->has_rxtstamp) { |
4313 | TCP_SKB_CB(to)->has_rxtstamp = true; | 4307 | TCP_SKB_CB(to)->has_rxtstamp = true; |
4314 | if (dest == OOO_QUEUE) | 4308 | to->tstamp = from->tstamp; |
4315 | TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; | ||
4316 | else | ||
4317 | to->tstamp = from->tstamp; | ||
4318 | } | 4309 | } |
4319 | 4310 | ||
4320 | return true; | 4311 | return true; |
@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4351 | } | 4342 | } |
4352 | p = rb_next(p); | 4343 | p = rb_next(p); |
4353 | rb_erase(&skb->rbnode, &tp->out_of_order_queue); | 4344 | rb_erase(&skb->rbnode, &tp->out_of_order_queue); |
4354 | /* Replace tstamp which was stomped by rbnode */ | ||
4355 | if (TCP_SKB_CB(skb)->has_rxtstamp) | ||
4356 | skb->tstamp = TCP_SKB_CB(skb)->swtstamp; | ||
4357 | 4345 | ||
4358 | if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { | 4346 | if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { |
4359 | SOCK_DEBUG(sk, "ofo packet was already received\n"); | 4347 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4365 | TCP_SKB_CB(skb)->end_seq); | 4353 | TCP_SKB_CB(skb)->end_seq); |
4366 | 4354 | ||
4367 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4355 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4368 | eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, | 4356 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4369 | tail, skb, &fragstolen); | ||
4370 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); | 4357 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4371 | fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; | 4358 | fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; |
4372 | if (!eaten) | 4359 | if (!eaten) |
@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4420 | return; | 4407 | return; |
4421 | } | 4408 | } |
4422 | 4409 | ||
4423 | /* Stash tstamp to avoid being stomped on by rbnode */ | ||
4424 | if (TCP_SKB_CB(skb)->has_rxtstamp) | ||
4425 | TCP_SKB_CB(skb)->swtstamp = skb->tstamp; | ||
4426 | |||
4427 | /* Disable header prediction. */ | 4410 | /* Disable header prediction. */ |
4428 | tp->pred_flags = 0; | 4411 | tp->pred_flags = 0; |
4429 | inet_csk_schedule_ack(sk); | 4412 | inet_csk_schedule_ack(sk); |
@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4451 | /* In the typical case, we are adding an skb to the end of the list. | 4434 | /* In the typical case, we are adding an skb to the end of the list. |
4452 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4435 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4453 | */ | 4436 | */ |
4454 | if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, | 4437 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, |
4455 | skb, &fragstolen)) { | 4438 | skb, &fragstolen)) { |
4456 | coalesce_done: | 4439 | coalesce_done: |
4457 | tcp_grow_window(sk, skb); | 4440 | tcp_grow_window(sk, skb); |
@@ -4502,7 +4485,7 @@ coalesce_done: | |||
4502 | __kfree_skb(skb1); | 4485 | __kfree_skb(skb1); |
4503 | goto merge_right; | 4486 | goto merge_right; |
4504 | } | 4487 | } |
4505 | } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, | 4488 | } else if (tcp_try_coalesce(sk, skb1, |
4506 | skb, &fragstolen)) { | 4489 | skb, &fragstolen)) { |
4507 | goto coalesce_done; | 4490 | goto coalesce_done; |
4508 | } | 4491 | } |
@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4554 | 4537 | ||
4555 | __skb_pull(skb, hdrlen); | 4538 | __skb_pull(skb, hdrlen); |
4556 | eaten = (tail && | 4539 | eaten = (tail && |
4557 | tcp_try_coalesce(sk, RCV_QUEUE, tail, | 4540 | tcp_try_coalesce(sk, tail, |
4558 | skb, fragstolen)) ? 1 : 0; | 4541 | skb, fragstolen)) ? 1 : 0; |
4559 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); | 4542 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
4560 | if (!eaten) { | 4543 | if (!eaten) { |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b1266e75ca43..063a4bdb9ee6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -146,7 +146,6 @@ struct netem_sched_data { | |||
146 | */ | 146 | */ |
147 | struct netem_skb_cb { | 147 | struct netem_skb_cb { |
148 | psched_time_t time_to_send; | 148 | psched_time_t time_to_send; |
149 | ktime_t tstamp_save; | ||
150 | }; | 149 | }; |
151 | 150 | ||
152 | 151 | ||
@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, | |||
561 | } | 560 | } |
562 | 561 | ||
563 | cb->time_to_send = now + delay; | 562 | cb->time_to_send = now + delay; |
564 | cb->tstamp_save = skb->tstamp; | ||
565 | ++q->counter; | 563 | ++q->counter; |
566 | tfifo_enqueue(skb, sch); | 564 | tfifo_enqueue(skb, sch); |
567 | } else { | 565 | } else { |
@@ -629,7 +627,10 @@ deliver: | |||
629 | qdisc_qstats_backlog_dec(sch, skb); | 627 | qdisc_qstats_backlog_dec(sch, skb); |
630 | skb->next = NULL; | 628 | skb->next = NULL; |
631 | skb->prev = NULL; | 629 | skb->prev = NULL; |
632 | skb->tstamp = netem_skb_cb(skb)->tstamp_save; | 630 | /* skb->dev shares skb->rbnode area, |
631 | * we need to restore its value. | ||
632 | */ | ||
633 | skb->dev = qdisc_dev(sch); | ||
633 | 634 | ||
634 | #ifdef CONFIG_NET_CLS_ACT | 635 | #ifdef CONFIG_NET_CLS_ACT |
635 | /* | 636 | /* |