diff options
author | Eric Dumazet <edumazet@google.com> | 2014-09-29 01:18:47 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-29 12:27:20 -0400 |
commit | b1937227316417aa7568d01e6fa1f272e98fb890 (patch) | |
tree | 93891f7672c803b767de6621c028f45edf242f17 | |
parent | 842abe08aa6f81f1062cf9624e9f6afc117d73e4 (diff) |
net: reorganize sk_buff for faster __copy_skb_header()
With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.
__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()
This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more
Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.
I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.
I made sure sk_buff size did not change.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 133 | ||||
-rw-r--r-- | net/core/skbuff.c | 80 |
2 files changed, 113 insertions, 100 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8eaa62400fca..b6cced304b26 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -527,27 +527,41 @@ struct sk_buff { | |||
527 | char cb[48] __aligned(8); | 527 | char cb[48] __aligned(8); |
528 | 528 | ||
529 | unsigned long _skb_refdst; | 529 | unsigned long _skb_refdst; |
530 | void (*destructor)(struct sk_buff *skb); | ||
530 | #ifdef CONFIG_XFRM | 531 | #ifdef CONFIG_XFRM |
531 | struct sec_path *sp; | 532 | struct sec_path *sp; |
532 | #endif | 533 | #endif |
534 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
535 | struct nf_conntrack *nfct; | ||
536 | #endif | ||
537 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
538 | struct nf_bridge_info *nf_bridge; | ||
539 | #endif | ||
533 | unsigned int len, | 540 | unsigned int len, |
534 | data_len; | 541 | data_len; |
535 | __u16 mac_len, | 542 | __u16 mac_len, |
536 | hdr_len; | 543 | hdr_len; |
537 | union { | 544 | |
538 | __wsum csum; | 545 | /* Following fields are _not_ copied in __copy_skb_header() |
539 | struct { | 546 | * Note that queue_mapping is here mostly to fill a hole. |
540 | __u16 csum_start; | 547 | */ |
541 | __u16 csum_offset; | ||
542 | }; | ||
543 | }; | ||
544 | __u32 priority; | ||
545 | kmemcheck_bitfield_begin(flags1); | 548 | kmemcheck_bitfield_begin(flags1); |
546 | __u8 ignore_df:1, | 549 | __u16 queue_mapping; |
547 | cloned:1, | 550 | __u8 cloned:1, |
548 | ip_summed:2, | ||
549 | nohdr:1, | 551 | nohdr:1, |
550 | nfctinfo:3; | 552 | fclone:2, |
553 | peeked:1, | ||
554 | head_frag:1, | ||
555 | xmit_more:1; | ||
556 | /* one bit hole */ | ||
557 | kmemcheck_bitfield_end(flags1); | ||
558 | |||
559 | |||
560 | |||
561 | /* fields enclosed in headers_start/headers_end are copied | ||
562 | * using a single memcpy() in __copy_skb_header() | ||
563 | */ | ||
564 | __u32 headers_start[0]; | ||
551 | 565 | ||
552 | /* if you move pkt_type around you also must adapt those constants */ | 566 | /* if you move pkt_type around you also must adapt those constants */ |
553 | #ifdef __BIG_ENDIAN_BITFIELD | 567 | #ifdef __BIG_ENDIAN_BITFIELD |
@@ -558,58 +572,53 @@ struct sk_buff { | |||
558 | #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) | 572 | #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) |
559 | 573 | ||
560 | __u8 __pkt_type_offset[0]; | 574 | __u8 __pkt_type_offset[0]; |
561 | __u8 pkt_type:3, | 575 | __u8 pkt_type:3; |
562 | fclone:2, | ||
563 | ipvs_property:1, | ||
564 | peeked:1, | ||
565 | nf_trace:1; | ||
566 | kmemcheck_bitfield_end(flags1); | ||
567 | __be16 protocol; | ||
568 | |||
569 | void (*destructor)(struct sk_buff *skb); | ||
570 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
571 | struct nf_conntrack *nfct; | ||
572 | #endif | ||
573 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
574 | struct nf_bridge_info *nf_bridge; | ||
575 | #endif | ||
576 | |||
577 | int skb_iif; | ||
578 | |||
579 | __u32 hash; | ||
580 | |||
581 | __be16 vlan_proto; | ||
582 | __u16 vlan_tci; | ||
583 | |||
584 | #ifdef CONFIG_NET_SCHED | ||
585 | __u16 tc_index; /* traffic control index */ | ||
586 | #ifdef CONFIG_NET_CLS_ACT | ||
587 | __u16 tc_verd; /* traffic control verdict */ | ||
588 | #endif | ||
589 | #endif | ||
590 | |||
591 | __u16 queue_mapping; | ||
592 | kmemcheck_bitfield_begin(flags2); | ||
593 | __u8 xmit_more:1; | ||
594 | #ifdef CONFIG_IPV6_NDISC_NODETYPE | ||
595 | __u8 ndisc_nodetype:2; | ||
596 | #endif | ||
597 | __u8 pfmemalloc:1; | 576 | __u8 pfmemalloc:1; |
577 | __u8 ignore_df:1; | ||
578 | __u8 nfctinfo:3; | ||
579 | |||
580 | __u8 nf_trace:1; | ||
581 | __u8 ip_summed:2; | ||
598 | __u8 ooo_okay:1; | 582 | __u8 ooo_okay:1; |
599 | __u8 l4_hash:1; | 583 | __u8 l4_hash:1; |
600 | __u8 sw_hash:1; | 584 | __u8 sw_hash:1; |
601 | __u8 wifi_acked_valid:1; | 585 | __u8 wifi_acked_valid:1; |
602 | __u8 wifi_acked:1; | 586 | __u8 wifi_acked:1; |
587 | |||
603 | __u8 no_fcs:1; | 588 | __u8 no_fcs:1; |
604 | __u8 head_frag:1; | ||
605 | /* Indicates the inner headers are valid in the skbuff. */ | 589 | /* Indicates the inner headers are valid in the skbuff. */ |
606 | __u8 encapsulation:1; | 590 | __u8 encapsulation:1; |
607 | __u8 encap_hdr_csum:1; | 591 | __u8 encap_hdr_csum:1; |
608 | __u8 csum_valid:1; | 592 | __u8 csum_valid:1; |
609 | __u8 csum_complete_sw:1; | 593 | __u8 csum_complete_sw:1; |
610 | /* 1/3 bit hole (depending on ndisc_nodetype presence) */ | 594 | __u8 csum_level:2; |
611 | kmemcheck_bitfield_end(flags2); | 595 | __u8 csum_bad:1; |
612 | 596 | ||
597 | #ifdef CONFIG_IPV6_NDISC_NODETYPE | ||
598 | __u8 ndisc_nodetype:2; | ||
599 | #endif | ||
600 | __u8 ipvs_property:1; | ||
601 | /* 5 or 7 bit hole */ | ||
602 | |||
603 | #ifdef CONFIG_NET_SCHED | ||
604 | __u16 tc_index; /* traffic control index */ | ||
605 | #ifdef CONFIG_NET_CLS_ACT | ||
606 | __u16 tc_verd; /* traffic control verdict */ | ||
607 | #endif | ||
608 | #endif | ||
609 | |||
610 | union { | ||
611 | __wsum csum; | ||
612 | struct { | ||
613 | __u16 csum_start; | ||
614 | __u16 csum_offset; | ||
615 | }; | ||
616 | }; | ||
617 | __u32 priority; | ||
618 | int skb_iif; | ||
619 | __u32 hash; | ||
620 | __be16 vlan_proto; | ||
621 | __u16 vlan_tci; | ||
613 | #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL | 622 | #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL |
614 | union { | 623 | union { |
615 | unsigned int napi_id; | 624 | unsigned int napi_id; |
@@ -625,19 +634,18 @@ struct sk_buff { | |||
625 | __u32 reserved_tailroom; | 634 | __u32 reserved_tailroom; |
626 | }; | 635 | }; |
627 | 636 | ||
628 | kmemcheck_bitfield_begin(flags3); | ||
629 | __u8 csum_level:2; | ||
630 | __u8 csum_bad:1; | ||
631 | /* 13 bit hole */ | ||
632 | kmemcheck_bitfield_end(flags3); | ||
633 | |||
634 | __be16 inner_protocol; | 637 | __be16 inner_protocol; |
635 | __u16 inner_transport_header; | 638 | __u16 inner_transport_header; |
636 | __u16 inner_network_header; | 639 | __u16 inner_network_header; |
637 | __u16 inner_mac_header; | 640 | __u16 inner_mac_header; |
641 | |||
642 | __be16 protocol; | ||
638 | __u16 transport_header; | 643 | __u16 transport_header; |
639 | __u16 network_header; | 644 | __u16 network_header; |
640 | __u16 mac_header; | 645 | __u16 mac_header; |
646 | |||
647 | __u32 headers_end[0]; | ||
648 | |||
641 | /* These elements must be at the end, see alloc_skb() for details. */ | 649 | /* These elements must be at the end, see alloc_skb() for details. */ |
642 | sk_buff_data_t tail; | 650 | sk_buff_data_t tail; |
643 | sk_buff_data_t end; | 651 | sk_buff_data_t end; |
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb) | |||
3040 | } | 3048 | } |
3041 | 3049 | ||
3042 | /* Note: This doesn't put any conntrack and bridge info in dst. */ | 3050 | /* Note: This doesn't put any conntrack and bridge info in dst. */ |
3043 | static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) | 3051 | static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, |
3052 | bool copy) | ||
3044 | { | 3053 | { |
3045 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 3054 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
3046 | dst->nfct = src->nfct; | 3055 | dst->nfct = src->nfct; |
3047 | nf_conntrack_get(src->nfct); | 3056 | nf_conntrack_get(src->nfct); |
3048 | dst->nfctinfo = src->nfctinfo; | 3057 | if (copy) |
3058 | dst->nfctinfo = src->nfctinfo; | ||
3049 | #endif | 3059 | #endif |
3050 | #ifdef CONFIG_BRIDGE_NETFILTER | 3060 | #ifdef CONFIG_BRIDGE_NETFILTER |
3051 | dst->nf_bridge = src->nf_bridge; | 3061 | dst->nf_bridge = src->nf_bridge; |
3052 | nf_bridge_get(src->nf_bridge); | 3062 | nf_bridge_get(src->nf_bridge); |
3053 | #endif | 3063 | #endif |
3054 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) | 3064 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) |
3055 | dst->nf_trace = src->nf_trace; | 3065 | if (copy) |
3066 | dst->nf_trace = src->nf_trace; | ||
3056 | #endif | 3067 | #endif |
3057 | } | 3068 | } |
3058 | 3069 | ||
@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) | |||
3064 | #ifdef CONFIG_BRIDGE_NETFILTER | 3075 | #ifdef CONFIG_BRIDGE_NETFILTER |
3065 | nf_bridge_put(dst->nf_bridge); | 3076 | nf_bridge_put(dst->nf_bridge); |
3066 | #endif | 3077 | #endif |
3067 | __nf_copy(dst, src); | 3078 | __nf_copy(dst, src, true); |
3068 | } | 3079 | } |
3069 | 3080 | ||
3070 | #ifdef CONFIG_NETWORK_SECMARK | 3081 | #ifdef CONFIG_NETWORK_SECMARK |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d4fdc649112c..4be570a4ab21 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
261 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 261 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
262 | 262 | ||
263 | kmemcheck_annotate_bitfield(child, flags1); | 263 | kmemcheck_annotate_bitfield(child, flags1); |
264 | kmemcheck_annotate_bitfield(child, flags2); | ||
265 | skb->fclone = SKB_FCLONE_ORIG; | 264 | skb->fclone = SKB_FCLONE_ORIG; |
266 | atomic_set(fclone_ref, 1); | 265 | atomic_set(fclone_ref, 1); |
267 | 266 | ||
@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb) | |||
675 | } | 674 | } |
676 | EXPORT_SYMBOL(consume_skb); | 675 | EXPORT_SYMBOL(consume_skb); |
677 | 676 | ||
677 | /* Make sure a field is enclosed inside headers_start/headers_end section */ | ||
678 | #define CHECK_SKB_FIELD(field) \ | ||
679 | BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ | ||
680 | offsetof(struct sk_buff, headers_start)); \ | ||
681 | BUILD_BUG_ON(offsetof(struct sk_buff, field) > \ | ||
682 | offsetof(struct sk_buff, headers_end)); \ | ||
683 | |||
678 | static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | 684 | static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) |
679 | { | 685 | { |
680 | new->tstamp = old->tstamp; | 686 | new->tstamp = old->tstamp; |
687 | /* We do not copy old->sk */ | ||
681 | new->dev = old->dev; | 688 | new->dev = old->dev; |
682 | new->transport_header = old->transport_header; | 689 | memcpy(new->cb, old->cb, sizeof(old->cb)); |
683 | new->network_header = old->network_header; | ||
684 | new->mac_header = old->mac_header; | ||
685 | new->inner_protocol = old->inner_protocol; | ||
686 | new->inner_transport_header = old->inner_transport_header; | ||
687 | new->inner_network_header = old->inner_network_header; | ||
688 | new->inner_mac_header = old->inner_mac_header; | ||
689 | skb_dst_copy(new, old); | 690 | skb_dst_copy(new, old); |
690 | skb_copy_hash(new, old); | ||
691 | new->ooo_okay = old->ooo_okay; | ||
692 | new->no_fcs = old->no_fcs; | ||
693 | new->encapsulation = old->encapsulation; | ||
694 | new->encap_hdr_csum = old->encap_hdr_csum; | ||
695 | new->csum_valid = old->csum_valid; | ||
696 | new->csum_complete_sw = old->csum_complete_sw; | ||
697 | #ifdef CONFIG_XFRM | 691 | #ifdef CONFIG_XFRM |
698 | new->sp = secpath_get(old->sp); | 692 | new->sp = secpath_get(old->sp); |
699 | #endif | 693 | #endif |
700 | memcpy(new->cb, old->cb, sizeof(old->cb)); | 694 | __nf_copy(new, old, false); |
701 | new->csum = old->csum; | 695 | |
702 | new->ignore_df = old->ignore_df; | 696 | /* Note : this field could be in headers_start/headers_end section |
703 | new->pkt_type = old->pkt_type; | 697 | * It is not yet because we do not want to have a 16 bit hole |
704 | new->ip_summed = old->ip_summed; | 698 | */ |
705 | skb_copy_queue_mapping(new, old); | 699 | new->queue_mapping = old->queue_mapping; |
706 | new->priority = old->priority; | 700 | |
707 | #if IS_ENABLED(CONFIG_IP_VS) | 701 | memcpy(&new->headers_start, &old->headers_start, |
708 | new->ipvs_property = old->ipvs_property; | 702 | offsetof(struct sk_buff, headers_end) - |
703 | offsetof(struct sk_buff, headers_start)); | ||
704 | CHECK_SKB_FIELD(protocol); | ||
705 | CHECK_SKB_FIELD(csum); | ||
706 | CHECK_SKB_FIELD(hash); | ||
707 | CHECK_SKB_FIELD(priority); | ||
708 | CHECK_SKB_FIELD(skb_iif); | ||
709 | CHECK_SKB_FIELD(vlan_proto); | ||
710 | CHECK_SKB_FIELD(vlan_tci); | ||
711 | CHECK_SKB_FIELD(transport_header); | ||
712 | CHECK_SKB_FIELD(network_header); | ||
713 | CHECK_SKB_FIELD(mac_header); | ||
714 | CHECK_SKB_FIELD(inner_protocol); | ||
715 | CHECK_SKB_FIELD(inner_transport_header); | ||
716 | CHECK_SKB_FIELD(inner_network_header); | ||
717 | CHECK_SKB_FIELD(inner_mac_header); | ||
718 | CHECK_SKB_FIELD(mark); | ||
719 | #ifdef CONFIG_NETWORK_SECMARK | ||
720 | CHECK_SKB_FIELD(secmark); | ||
721 | #endif | ||
722 | #ifdef CONFIG_NET_RX_BUSY_POLL | ||
723 | CHECK_SKB_FIELD(napi_id); | ||
709 | #endif | 724 | #endif |
710 | new->pfmemalloc = old->pfmemalloc; | ||
711 | new->protocol = old->protocol; | ||
712 | new->mark = old->mark; | ||
713 | new->skb_iif = old->skb_iif; | ||
714 | __nf_copy(new, old); | ||
715 | #ifdef CONFIG_NET_SCHED | 725 | #ifdef CONFIG_NET_SCHED |
716 | new->tc_index = old->tc_index; | 726 | CHECK_SKB_FIELD(tc_index); |
717 | #ifdef CONFIG_NET_CLS_ACT | 727 | #ifdef CONFIG_NET_CLS_ACT |
718 | new->tc_verd = old->tc_verd; | 728 | CHECK_SKB_FIELD(tc_verd); |
719 | #endif | 729 | #endif |
720 | #endif | 730 | #endif |
721 | new->vlan_proto = old->vlan_proto; | ||
722 | new->vlan_tci = old->vlan_tci; | ||
723 | |||
724 | skb_copy_secmark(new, old); | ||
725 | 731 | ||
726 | #ifdef CONFIG_NET_RX_BUSY_POLL | ||
727 | new->napi_id = old->napi_id; | ||
728 | #endif | ||
729 | } | 732 | } |
730 | 733 | ||
731 | /* | 734 | /* |
@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
876 | return NULL; | 879 | return NULL; |
877 | 880 | ||
878 | kmemcheck_annotate_bitfield(n, flags1); | 881 | kmemcheck_annotate_bitfield(n, flags1); |
879 | kmemcheck_annotate_bitfield(n, flags2); | ||
880 | n->fclone = SKB_FCLONE_UNAVAILABLE; | 882 | n->fclone = SKB_FCLONE_UNAVAILABLE; |
881 | } | 883 | } |
882 | 884 | ||