aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-09-19 08:14:24 -0400
committerDavid S. Miller <davem@davemloft.net>2017-09-19 18:20:22 -0400
commitbffa72cf7f9df842f0016ba03586039296b4caaf (patch)
treee27752d22b270c0c607ff8025e44dd8f884aa1cb
parenta38b2fa37e2e2ac897e7159738c5763ee65ee405 (diff)
net: sk_buff rbnode reorg
skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Wei Wang <weiwan@google.com> Cc: Willem de Bruijn <willemb@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--include/net/tcp.h6
-rw-r--r--net/ipv4/tcp_input.c27
-rw-r--r--net/sched/sch_netem.c7
4 files changed, 17 insertions, 39 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 72299ef00061..492828801acb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -661,8 +661,12 @@ struct sk_buff {
661 struct sk_buff *prev; 661 struct sk_buff *prev;
662 662
663 union { 663 union {
664 ktime_t tstamp; 664 struct net_device *dev;
665 u64 skb_mstamp; 665 /* Some protocols might use this space to store information,
666 * while device pointer would be NULL.
667 * UDP receive path is one user.
668 */
669 unsigned long dev_scratch;
666 }; 670 };
667 }; 671 };
668 struct rb_node rbnode; /* used in netem & tcp stack */ 672 struct rb_node rbnode; /* used in netem & tcp stack */
@@ -670,12 +674,8 @@ struct sk_buff {
670 struct sock *sk; 674 struct sock *sk;
671 675
672 union { 676 union {
673 struct net_device *dev; 677 ktime_t tstamp;
674 /* Some protocols might use this space to store information, 678 u64 skb_mstamp;
675 * while device pointer would be NULL.
676 * UDP receive path is one user.
677 */
678 unsigned long dev_scratch;
679 }; 679 };
680 /* 680 /*
681 * This is the control buffer. It is free to use for every 681 * This is the control buffer. It is free to use for every
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f284427a..49a8a46466f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -797,12 +797,6 @@ struct tcp_skb_cb {
797 u16 tcp_gso_segs; 797 u16 tcp_gso_segs;
798 u16 tcp_gso_size; 798 u16 tcp_gso_size;
799 }; 799 };
800
801 /* Used to stash the receive timestamp while this skb is in the
802 * out of order queue, as skb->tstamp is overwritten by the
803 * rbnode.
804 */
805 ktime_t swtstamp;
806 }; 800 };
807 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ 801 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
808 802
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bddf724f5c02..db9bb46b5776 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4266 tp->rx_opt.num_sacks = num_sacks; 4266 tp->rx_opt.num_sacks = num_sacks;
4267} 4267}
4268 4268
4269enum tcp_queue {
4270 OOO_QUEUE,
4271 RCV_QUEUE,
4272};
4273
4274/** 4269/**
4275 * tcp_try_coalesce - try to merge skb to prior one 4270 * tcp_try_coalesce - try to merge skb to prior one
4276 * @sk: socket 4271 * @sk: socket
@@ -4286,7 +4281,6 @@ enum tcp_queue {
4286 * Returns true if caller should free @from instead of queueing it 4281 * Returns true if caller should free @from instead of queueing it
4287 */ 4282 */
4288static bool tcp_try_coalesce(struct sock *sk, 4283static bool tcp_try_coalesce(struct sock *sk,
4289 enum tcp_queue dest,
4290 struct sk_buff *to, 4284 struct sk_buff *to,
4291 struct sk_buff *from, 4285 struct sk_buff *from,
4292 bool *fragstolen) 4286 bool *fragstolen)
@@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk,
4311 4305
4312 if (TCP_SKB_CB(from)->has_rxtstamp) { 4306 if (TCP_SKB_CB(from)->has_rxtstamp) {
4313 TCP_SKB_CB(to)->has_rxtstamp = true; 4307 TCP_SKB_CB(to)->has_rxtstamp = true;
4314 if (dest == OOO_QUEUE) 4308 to->tstamp = from->tstamp;
4315 TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
4316 else
4317 to->tstamp = from->tstamp;
4318 } 4309 }
4319 4310
4320 return true; 4311 return true;
@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk)
4351 } 4342 }
4352 p = rb_next(p); 4343 p = rb_next(p);
4353 rb_erase(&skb->rbnode, &tp->out_of_order_queue); 4344 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4354 /* Replace tstamp which was stomped by rbnode */
4355 if (TCP_SKB_CB(skb)->has_rxtstamp)
4356 skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
4357 4345
4358 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { 4346 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4359 SOCK_DEBUG(sk, "ofo packet was already received\n"); 4347 SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
4365 TCP_SKB_CB(skb)->end_seq); 4353 TCP_SKB_CB(skb)->end_seq);
4366 4354
4367 tail = skb_peek_tail(&sk->sk_receive_queue); 4355 tail = skb_peek_tail(&sk->sk_receive_queue);
4368 eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, 4356 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4369 tail, skb, &fragstolen);
4370 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 4357 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4371 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 4358 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4372 if (!eaten) 4359 if (!eaten)
@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4420 return; 4407 return;
4421 } 4408 }
4422 4409
4423 /* Stash tstamp to avoid being stomped on by rbnode */
4424 if (TCP_SKB_CB(skb)->has_rxtstamp)
4425 TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
4426
4427 /* Disable header prediction. */ 4410 /* Disable header prediction. */
4428 tp->pred_flags = 0; 4411 tp->pred_flags = 0;
4429 inet_csk_schedule_ack(sk); 4412 inet_csk_schedule_ack(sk);
@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4451 /* In the typical case, we are adding an skb to the end of the list. 4434 /* In the typical case, we are adding an skb to the end of the list.
4452 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. 4435 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4453 */ 4436 */
4454 if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, 4437 if (tcp_try_coalesce(sk, tp->ooo_last_skb,
4455 skb, &fragstolen)) { 4438 skb, &fragstolen)) {
4456coalesce_done: 4439coalesce_done:
4457 tcp_grow_window(sk, skb); 4440 tcp_grow_window(sk, skb);
@@ -4502,7 +4485,7 @@ coalesce_done:
4502 __kfree_skb(skb1); 4485 __kfree_skb(skb1);
4503 goto merge_right; 4486 goto merge_right;
4504 } 4487 }
4505 } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, 4488 } else if (tcp_try_coalesce(sk, skb1,
4506 skb, &fragstolen)) { 4489 skb, &fragstolen)) {
4507 goto coalesce_done; 4490 goto coalesce_done;
4508 } 4491 }
@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4554 4537
4555 __skb_pull(skb, hdrlen); 4538 __skb_pull(skb, hdrlen);
4556 eaten = (tail && 4539 eaten = (tail &&
4557 tcp_try_coalesce(sk, RCV_QUEUE, tail, 4540 tcp_try_coalesce(sk, tail,
4558 skb, fragstolen)) ? 1 : 0; 4541 skb, fragstolen)) ? 1 : 0;
4559 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); 4542 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4560 if (!eaten) { 4543 if (!eaten) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b1266e75ca43..063a4bdb9ee6 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -146,7 +146,6 @@ struct netem_sched_data {
146 */ 146 */
147struct netem_skb_cb { 147struct netem_skb_cb {
148 psched_time_t time_to_send; 148 psched_time_t time_to_send;
149 ktime_t tstamp_save;
150}; 149};
151 150
152 151
@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
561 } 560 }
562 561
563 cb->time_to_send = now + delay; 562 cb->time_to_send = now + delay;
564 cb->tstamp_save = skb->tstamp;
565 ++q->counter; 563 ++q->counter;
566 tfifo_enqueue(skb, sch); 564 tfifo_enqueue(skb, sch);
567 } else { 565 } else {
@@ -629,7 +627,10 @@ deliver:
629 qdisc_qstats_backlog_dec(sch, skb); 627 qdisc_qstats_backlog_dec(sch, skb);
630 skb->next = NULL; 628 skb->next = NULL;
631 skb->prev = NULL; 629 skb->prev = NULL;
632 skb->tstamp = netem_skb_cb(skb)->tstamp_save; 630 /* skb->dev shares skb->rbnode area,
631 * we need to restore its value.
632 */
633 skb->dev = qdisc_dev(sch);
633 634
634#ifdef CONFIG_NET_CLS_ACT 635#ifdef CONFIG_NET_CLS_ACT
635 /* 636 /*