aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-09-27 12:50:57 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-28 16:35:43 -0400
commit971f10eca186cab238c49daa91f703c5a001b0b1 (patch)
tree0d769e6155899c89ae95c3e31c79ce011eb96a39
parenta224772db8420ecb7ce91a9ba5d535ee3a50d982 (diff)
tcp: better TCP_SKB_CB layout to reduce cache line misses
TCP maintains lists of skb in write queue, and in receive queues (in order and out of order queues) Scanning these lists both in input and output path usually requires access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq These fields are currently in two different cache lines, meaning we waste lot of memory bandwidth when these queues are big and flows have either packet drops or packet reorders. We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because this header is not used in fast path. This allows TCP to search much faster in the skb lists. Even with regular flows, we save one cache line miss in fast path. Thanks to Christoph Paasch for noticing we need to cleanup skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path, and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options(). Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h12
-rw-r--r--net/ipv4/tcp_ipv4.c19
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv6/tcp_ipv6.c7
4 files changed, 30 insertions, 13 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4201ef216e8..4dc6641ee990 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -696,12 +696,6 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
696 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately. 696 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
697 */ 697 */
698struct tcp_skb_cb { 698struct tcp_skb_cb {
699 union {
700 struct inet_skb_parm h4;
701#if IS_ENABLED(CONFIG_IPV6)
702 struct inet6_skb_parm h6;
703#endif
704 } header; /* For incoming frames */
705 __u32 seq; /* Starting sequence number */ 699 __u32 seq; /* Starting sequence number */
706 __u32 end_seq; /* SEQ + FIN + SYN + datalen */ 700 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
707 __u32 tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */ 701 __u32 tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */
@@ -720,6 +714,12 @@ struct tcp_skb_cb {
720 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ 714 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
721 /* 1 byte hole */ 715 /* 1 byte hole */
722 __u32 ack_seq; /* Sequence number ACK'd */ 716 __u32 ack_seq; /* Sequence number ACK'd */
717 union {
718 struct inet_skb_parm h4;
719#if IS_ENABLED(CONFIG_IPV6)
720 struct inet6_skb_parm h6;
721#endif
722 } header; /* For incoming frames */
723}; 723};
724 724
725#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) 725#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 28ab90382c01..9ce3eac02957 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -886,18 +886,16 @@ EXPORT_SYMBOL(tcp_syn_flood_action);
886 */ 886 */
887static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) 887static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
888{ 888{
889 const struct ip_options *opt = &(IPCB(skb)->opt); 889 const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
890 struct ip_options_rcu *dopt = NULL; 890 struct ip_options_rcu *dopt = NULL;
891 891
892 if (opt && opt->optlen) { 892 if (opt && opt->optlen) {
893 int opt_size = sizeof(*dopt) + opt->optlen; 893 int opt_size = sizeof(*dopt) + opt->optlen;
894 894
895 dopt = kmalloc(opt_size, GFP_ATOMIC); 895 dopt = kmalloc(opt_size, GFP_ATOMIC);
896 if (dopt) { 896 if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
897 if (ip_options_echo(&dopt->opt, skb)) { 897 kfree(dopt);
898 kfree(dopt); 898 dopt = NULL;
899 dopt = NULL;
900 }
901 } 899 }
902 } 900 }
903 return dopt; 901 return dopt;
@@ -1431,7 +1429,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1431 1429
1432#ifdef CONFIG_SYN_COOKIES 1430#ifdef CONFIG_SYN_COOKIES
1433 if (!th->syn) 1431 if (!th->syn)
1434 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1432 sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt);
1435#endif 1433#endif
1436 return sk; 1434 return sk;
1437} 1435}
@@ -1636,6 +1634,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
1636 1634
1637 th = tcp_hdr(skb); 1635 th = tcp_hdr(skb);
1638 iph = ip_hdr(skb); 1636 iph = ip_hdr(skb);
1637 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1638 * barrier() makes sure compiler wont play fool^Waliasing games.
1639 */
1640 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1641 sizeof(struct inet_skb_parm));
1642 barrier();
1643
1639 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1644 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1640 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1645 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1641 skb->len - th->doff * 4); 1646 skb->len - th->doff * 4);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f173b1c4f815..a462fb1db896 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -974,6 +974,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
974 974
975 /* Our usage of tstamp should remain private */ 975 /* Our usage of tstamp should remain private */
976 skb->tstamp.tv64 = 0; 976 skb->tstamp.tv64 = 0;
977
978 /* Cleanup our debris for IP stacks */
979 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
980 sizeof(struct inet6_skb_parm)));
981
977 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); 982 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
978 983
979 if (likely(err <= 0)) 984 if (likely(err <= 0))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9400b4326f22..132bac137aed 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1412,6 +1412,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1412 1412
1413 th = tcp_hdr(skb); 1413 th = tcp_hdr(skb);
1414 hdr = ipv6_hdr(skb); 1414 hdr = ipv6_hdr(skb);
1415 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1416 * barrier() makes sure compiler wont play fool^Waliasing games.
1417 */
1418 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1419 sizeof(struct inet6_skb_parm));
1420 barrier();
1421
1415 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1422 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1416 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1423 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1417 skb->len - th->doff*4); 1424 skb->len - th->doff*4);