aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-09-29 01:18:47 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-29 12:27:20 -0400
commitb1937227316417aa7568d01e6fa1f272e98fb890 (patch)
tree93891f7672c803b767de6621c028f45edf242f17
parent842abe08aa6f81f1062cf9624e9f6afc117d73e4 (diff)
net: reorganize sk_buff for faster __copy_skb_header()
With proliferation of bit fields in sk_buff, __copy_skb_header() became quite expensive, showing as the most expensive function in a GSO workload. __copy_skb_header() performance is also critical for non GSO TCP operations, as it is used from skb_clone() This patch carefully moves all the fields that were not copied in a separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more Then I moved all other fields and all other copied fields in a section delimited by headers_start[0]/headers_end[0] section so that we can use a single memcpy() call, inlined by compiler using long word load/stores. I also tried to make all copies in the natural orders of sk_buff, to help hardware prefetching. I made sure sk_buff size did not change. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h133
-rw-r--r--net/core/skbuff.c80
2 files changed, 113 insertions, 100 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8eaa62400fca..b6cced304b26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -527,27 +527,41 @@ struct sk_buff {
527 char cb[48] __aligned(8); 527 char cb[48] __aligned(8);
528 528
529 unsigned long _skb_refdst; 529 unsigned long _skb_refdst;
530 void (*destructor)(struct sk_buff *skb);
530#ifdef CONFIG_XFRM 531#ifdef CONFIG_XFRM
531 struct sec_path *sp; 532 struct sec_path *sp;
532#endif 533#endif
534#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
535 struct nf_conntrack *nfct;
536#endif
537#ifdef CONFIG_BRIDGE_NETFILTER
538 struct nf_bridge_info *nf_bridge;
539#endif
533 unsigned int len, 540 unsigned int len,
534 data_len; 541 data_len;
535 __u16 mac_len, 542 __u16 mac_len,
536 hdr_len; 543 hdr_len;
537 union { 544
538 __wsum csum; 545 /* Following fields are _not_ copied in __copy_skb_header()
539 struct { 546 * Note that queue_mapping is here mostly to fill a hole.
540 __u16 csum_start; 547 */
541 __u16 csum_offset;
542 };
543 };
544 __u32 priority;
545 kmemcheck_bitfield_begin(flags1); 548 kmemcheck_bitfield_begin(flags1);
546 __u8 ignore_df:1, 549 __u16 queue_mapping;
547 cloned:1, 550 __u8 cloned:1,
548 ip_summed:2,
549 nohdr:1, 551 nohdr:1,
550 nfctinfo:3; 552 fclone:2,
553 peeked:1,
554 head_frag:1,
555 xmit_more:1;
556 /* one bit hole */
557 kmemcheck_bitfield_end(flags1);
558
559
560
561 /* fields enclosed in headers_start/headers_end are copied
562 * using a single memcpy() in __copy_skb_header()
563 */
564 __u32 headers_start[0];
551 565
552/* if you move pkt_type around you also must adapt those constants */ 566/* if you move pkt_type around you also must adapt those constants */
553#ifdef __BIG_ENDIAN_BITFIELD 567#ifdef __BIG_ENDIAN_BITFIELD
@@ -558,58 +572,53 @@ struct sk_buff {
558#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) 572#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
559 573
560 __u8 __pkt_type_offset[0]; 574 __u8 __pkt_type_offset[0];
561 __u8 pkt_type:3, 575 __u8 pkt_type:3;
562 fclone:2,
563 ipvs_property:1,
564 peeked:1,
565 nf_trace:1;
566 kmemcheck_bitfield_end(flags1);
567 __be16 protocol;
568
569 void (*destructor)(struct sk_buff *skb);
570#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
571 struct nf_conntrack *nfct;
572#endif
573#ifdef CONFIG_BRIDGE_NETFILTER
574 struct nf_bridge_info *nf_bridge;
575#endif
576
577 int skb_iif;
578
579 __u32 hash;
580
581 __be16 vlan_proto;
582 __u16 vlan_tci;
583
584#ifdef CONFIG_NET_SCHED
585 __u16 tc_index; /* traffic control index */
586#ifdef CONFIG_NET_CLS_ACT
587 __u16 tc_verd; /* traffic control verdict */
588#endif
589#endif
590
591 __u16 queue_mapping;
592 kmemcheck_bitfield_begin(flags2);
593 __u8 xmit_more:1;
594#ifdef CONFIG_IPV6_NDISC_NODETYPE
595 __u8 ndisc_nodetype:2;
596#endif
597 __u8 pfmemalloc:1; 576 __u8 pfmemalloc:1;
577 __u8 ignore_df:1;
578 __u8 nfctinfo:3;
579
580 __u8 nf_trace:1;
581 __u8 ip_summed:2;
598 __u8 ooo_okay:1; 582 __u8 ooo_okay:1;
599 __u8 l4_hash:1; 583 __u8 l4_hash:1;
600 __u8 sw_hash:1; 584 __u8 sw_hash:1;
601 __u8 wifi_acked_valid:1; 585 __u8 wifi_acked_valid:1;
602 __u8 wifi_acked:1; 586 __u8 wifi_acked:1;
587
603 __u8 no_fcs:1; 588 __u8 no_fcs:1;
604 __u8 head_frag:1;
605 /* Indicates the inner headers are valid in the skbuff. */ 589 /* Indicates the inner headers are valid in the skbuff. */
606 __u8 encapsulation:1; 590 __u8 encapsulation:1;
607 __u8 encap_hdr_csum:1; 591 __u8 encap_hdr_csum:1;
608 __u8 csum_valid:1; 592 __u8 csum_valid:1;
609 __u8 csum_complete_sw:1; 593 __u8 csum_complete_sw:1;
610 /* 1/3 bit hole (depending on ndisc_nodetype presence) */ 594 __u8 csum_level:2;
611 kmemcheck_bitfield_end(flags2); 595 __u8 csum_bad:1;
612 596
597#ifdef CONFIG_IPV6_NDISC_NODETYPE
598 __u8 ndisc_nodetype:2;
599#endif
600 __u8 ipvs_property:1;
601 /* 5 or 7 bit hole */
602
603#ifdef CONFIG_NET_SCHED
604 __u16 tc_index; /* traffic control index */
605#ifdef CONFIG_NET_CLS_ACT
606 __u16 tc_verd; /* traffic control verdict */
607#endif
608#endif
609
610 union {
611 __wsum csum;
612 struct {
613 __u16 csum_start;
614 __u16 csum_offset;
615 };
616 };
617 __u32 priority;
618 int skb_iif;
619 __u32 hash;
620 __be16 vlan_proto;
621 __u16 vlan_tci;
613#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL 622#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
614 union { 623 union {
615 unsigned int napi_id; 624 unsigned int napi_id;
@@ -625,19 +634,18 @@ struct sk_buff {
625 __u32 reserved_tailroom; 634 __u32 reserved_tailroom;
626 }; 635 };
627 636
628 kmemcheck_bitfield_begin(flags3);
629 __u8 csum_level:2;
630 __u8 csum_bad:1;
631 /* 13 bit hole */
632 kmemcheck_bitfield_end(flags3);
633
634 __be16 inner_protocol; 637 __be16 inner_protocol;
635 __u16 inner_transport_header; 638 __u16 inner_transport_header;
636 __u16 inner_network_header; 639 __u16 inner_network_header;
637 __u16 inner_mac_header; 640 __u16 inner_mac_header;
641
642 __be16 protocol;
638 __u16 transport_header; 643 __u16 transport_header;
639 __u16 network_header; 644 __u16 network_header;
640 __u16 mac_header; 645 __u16 mac_header;
646
647 __u32 headers_end[0];
648
641 /* These elements must be at the end, see alloc_skb() for details. */ 649 /* These elements must be at the end, see alloc_skb() for details. */
642 sk_buff_data_t tail; 650 sk_buff_data_t tail;
643 sk_buff_data_t end; 651 sk_buff_data_t end;
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
3040} 3048}
3041 3049
3042/* Note: This doesn't put any conntrack and bridge info in dst. */ 3050/* Note: This doesn't put any conntrack and bridge info in dst. */
3043static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) 3051static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
3052 bool copy)
3044{ 3053{
3045#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 3054#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
3046 dst->nfct = src->nfct; 3055 dst->nfct = src->nfct;
3047 nf_conntrack_get(src->nfct); 3056 nf_conntrack_get(src->nfct);
3048 dst->nfctinfo = src->nfctinfo; 3057 if (copy)
3058 dst->nfctinfo = src->nfctinfo;
3049#endif 3059#endif
3050#ifdef CONFIG_BRIDGE_NETFILTER 3060#ifdef CONFIG_BRIDGE_NETFILTER
3051 dst->nf_bridge = src->nf_bridge; 3061 dst->nf_bridge = src->nf_bridge;
3052 nf_bridge_get(src->nf_bridge); 3062 nf_bridge_get(src->nf_bridge);
3053#endif 3063#endif
3054#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) 3064#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
3055 dst->nf_trace = src->nf_trace; 3065 if (copy)
3066 dst->nf_trace = src->nf_trace;
3056#endif 3067#endif
3057} 3068}
3058 3069
@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
3064#ifdef CONFIG_BRIDGE_NETFILTER 3075#ifdef CONFIG_BRIDGE_NETFILTER
3065 nf_bridge_put(dst->nf_bridge); 3076 nf_bridge_put(dst->nf_bridge);
3066#endif 3077#endif
3067 __nf_copy(dst, src); 3078 __nf_copy(dst, src, true);
3068} 3079}
3069 3080
3070#ifdef CONFIG_NETWORK_SECMARK 3081#ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d4fdc649112c..4be570a4ab21 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
261 atomic_t *fclone_ref = (atomic_t *) (child + 1); 261 atomic_t *fclone_ref = (atomic_t *) (child + 1);
262 262
263 kmemcheck_annotate_bitfield(child, flags1); 263 kmemcheck_annotate_bitfield(child, flags1);
264 kmemcheck_annotate_bitfield(child, flags2);
265 skb->fclone = SKB_FCLONE_ORIG; 264 skb->fclone = SKB_FCLONE_ORIG;
266 atomic_set(fclone_ref, 1); 265 atomic_set(fclone_ref, 1);
267 266
@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb)
675} 674}
676EXPORT_SYMBOL(consume_skb); 675EXPORT_SYMBOL(consume_skb);
677 676
677/* Make sure a field is enclosed inside headers_start/headers_end section */
678#define CHECK_SKB_FIELD(field) \
679 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
680 offsetof(struct sk_buff, headers_start)); \
681 BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
682 offsetof(struct sk_buff, headers_end)); \
683
678static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 684static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
679{ 685{
680 new->tstamp = old->tstamp; 686 new->tstamp = old->tstamp;
687 /* We do not copy old->sk */
681 new->dev = old->dev; 688 new->dev = old->dev;
682 new->transport_header = old->transport_header; 689 memcpy(new->cb, old->cb, sizeof(old->cb));
683 new->network_header = old->network_header;
684 new->mac_header = old->mac_header;
685 new->inner_protocol = old->inner_protocol;
686 new->inner_transport_header = old->inner_transport_header;
687 new->inner_network_header = old->inner_network_header;
688 new->inner_mac_header = old->inner_mac_header;
689 skb_dst_copy(new, old); 690 skb_dst_copy(new, old);
690 skb_copy_hash(new, old);
691 new->ooo_okay = old->ooo_okay;
692 new->no_fcs = old->no_fcs;
693 new->encapsulation = old->encapsulation;
694 new->encap_hdr_csum = old->encap_hdr_csum;
695 new->csum_valid = old->csum_valid;
696 new->csum_complete_sw = old->csum_complete_sw;
697#ifdef CONFIG_XFRM 691#ifdef CONFIG_XFRM
698 new->sp = secpath_get(old->sp); 692 new->sp = secpath_get(old->sp);
699#endif 693#endif
700 memcpy(new->cb, old->cb, sizeof(old->cb)); 694 __nf_copy(new, old, false);
701 new->csum = old->csum; 695
702 new->ignore_df = old->ignore_df; 696 /* Note : this field could be in headers_start/headers_end section
703 new->pkt_type = old->pkt_type; 697 * It is not yet because we do not want to have a 16 bit hole
704 new->ip_summed = old->ip_summed; 698 */
705 skb_copy_queue_mapping(new, old); 699 new->queue_mapping = old->queue_mapping;
706 new->priority = old->priority; 700
707#if IS_ENABLED(CONFIG_IP_VS) 701 memcpy(&new->headers_start, &old->headers_start,
708 new->ipvs_property = old->ipvs_property; 702 offsetof(struct sk_buff, headers_end) -
703 offsetof(struct sk_buff, headers_start));
704 CHECK_SKB_FIELD(protocol);
705 CHECK_SKB_FIELD(csum);
706 CHECK_SKB_FIELD(hash);
707 CHECK_SKB_FIELD(priority);
708 CHECK_SKB_FIELD(skb_iif);
709 CHECK_SKB_FIELD(vlan_proto);
710 CHECK_SKB_FIELD(vlan_tci);
711 CHECK_SKB_FIELD(transport_header);
712 CHECK_SKB_FIELD(network_header);
713 CHECK_SKB_FIELD(mac_header);
714 CHECK_SKB_FIELD(inner_protocol);
715 CHECK_SKB_FIELD(inner_transport_header);
716 CHECK_SKB_FIELD(inner_network_header);
717 CHECK_SKB_FIELD(inner_mac_header);
718 CHECK_SKB_FIELD(mark);
719#ifdef CONFIG_NETWORK_SECMARK
720 CHECK_SKB_FIELD(secmark);
721#endif
722#ifdef CONFIG_NET_RX_BUSY_POLL
723 CHECK_SKB_FIELD(napi_id);
709#endif 724#endif
710 new->pfmemalloc = old->pfmemalloc;
711 new->protocol = old->protocol;
712 new->mark = old->mark;
713 new->skb_iif = old->skb_iif;
714 __nf_copy(new, old);
715#ifdef CONFIG_NET_SCHED 725#ifdef CONFIG_NET_SCHED
716 new->tc_index = old->tc_index; 726 CHECK_SKB_FIELD(tc_index);
717#ifdef CONFIG_NET_CLS_ACT 727#ifdef CONFIG_NET_CLS_ACT
718 new->tc_verd = old->tc_verd; 728 CHECK_SKB_FIELD(tc_verd);
719#endif 729#endif
720#endif 730#endif
721 new->vlan_proto = old->vlan_proto;
722 new->vlan_tci = old->vlan_tci;
723
724 skb_copy_secmark(new, old);
725 731
726#ifdef CONFIG_NET_RX_BUSY_POLL
727 new->napi_id = old->napi_id;
728#endif
729} 732}
730 733
731/* 734/*
@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
876 return NULL; 879 return NULL;
877 880
878 kmemcheck_annotate_bitfield(n, flags1); 881 kmemcheck_annotate_bitfield(n, flags1);
879 kmemcheck_annotate_bitfield(n, flags2);
880 n->fclone = SKB_FCLONE_UNAVAILABLE; 882 n->fclone = SKB_FCLONE_UNAVAILABLE;
881 } 883 }
882 884