From bf1ac5ca6f0dede02635588704d216da474b4baa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Apr 2012 23:19:25 +0000 Subject: nf_bridge: remove holes in struct nf_bridge_info Put use & mask on same location to avoid two holes on 64bit arches Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 775292a66fa4..f25795ca6753 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -117,11 +117,11 @@ struct nf_conntrack { #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info { - atomic_t use; - struct net_device *physindev; - struct net_device *physoutdev; - unsigned int mask; - unsigned long data[32 / sizeof(unsigned long)]; + atomic_t use; + unsigned int mask; + struct net_device *physindev; + struct net_device *physoutdev; + unsigned long data[32 / sizeof(unsigned long)]; }; #endif -- cgit v1.2.2 From 38ba0a65faf451dd46c7860b4fade84c0b8e444f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Apr 2012 17:48:27 +0000 Subject: net: skb_can_coalesce returns a boolean Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f25795ca6753..4a656b51825e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1963,8 +1963,8 @@ static inline int skb_add_data(struct sk_buff *skb, return -EFAULT; } -static inline int skb_can_coalesce(struct sk_buff *skb, int i, - const struct page *page, int off) +static inline bool skb_can_coalesce(struct sk_buff *skb, int i, + const struct page *page, int off) { if (i) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -1972,7 +1972,7 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, return page == skb_frag_page(frag) && off == frag->page_offset + skb_frag_size(frag); } - return 0; + return false; } static inline int __skb_linearize(struct sk_buff *skb) -- cgit v1.2.2 From d3836f21b0af5513ef55701dd3f50b8c42e44c7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Apr 2012 00:33:38 +0000 Subject: net: allow skb->head to be a page fragment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skb->head is currently allocated from kmalloc(). This is convenient but has the drawback the data cannot be converted to a page fragment if needed. We have three spots were it hurts : 1) GRO aggregation When a linear skb must be appended to another skb, GRO uses the frag_list fallback, very inefficient since we keep all struct sk_buff around. So drivers enabling GRO but delivering linear skbs to network stack aren't enabling full GRO power. 2) splice(socket -> pipe). We must copy the linear part to a page fragment. This kind of defeats splice() purpose (zero copy claim) 3) TCP coalescing. Recently introduced, this permits to group several contiguous segments into a single skb. This shortens queue lengths and save kernel memory, and greatly reduce probabilities of TCP collapses. This coalescing doesnt work on linear skbs (or we would need to copy data, this would be too slow) Given all these issues, the following patch introduces the possibility of having skb->head be a fragment in itself. We use a new skb flag, skb->head_frag to carry this information. build_skb() is changed to accept a frag_size argument. Drivers willing to provide a page fragment instead of kmalloc() data will set a non zero value, set to the fragment size. Then, on situations we need to convert the skb head to a frag in itself, we can check if skb->head_frag is set and avoid the copies or various fallbacks we have. This means drivers currently using frags could be updated to avoid the current skb->head allocation and reduce their memory footprint (aka skb truesize). (thats 512 or 1024 bytes saved per skb). This also makes bpf/netfilter faster since the 'first frag' will be part of skb linear part, no need to copy data. Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Herbert Xu Cc: Maciej Żenczykowski Cc: Neal Cardwell Cc: Tom Herbert Cc: Jeff Kirsher Cc: Ben Hutchings Cc: Matt Carlson Cc: Michael Chan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4a656b51825e..9d28a22a8554 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -470,7 +470,8 @@ struct sk_buff { __u8 wifi_acked_valid:1; __u8 wifi_acked:1; __u8 no_fcs:1; - /* 9/11 bit hole (depending on ndisc_nodetype presence) */ + __u8 head_frag:1; + /* 8/10 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #ifdef CONFIG_NET_DMA @@ -562,7 +563,7 @@ extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); -extern struct sk_buff *build_skb(void *data); +extern struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { -- cgit v1.2.2 From d7e8883cfcf4851afe74fb380cc62b7fa9cf66ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Apr 2012 08:10:34 +0000 Subject: net: make GRO aware of skb->head_frag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GRO can check if skb to be merged has its skb->head mapped to a page fragment, instead of a kmalloc() area. We 'upgrade' skb->head as a fragment in itself This avoids the frag_list fallback, and permits to build true GRO skb (one sk_buff and up to 16 fragments), using less memory. This reduces number of cache misses when user makes its copy, since a single sk_buff is fetched. This is a followup of patch "net: allow skb->head to be a page fragment" Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Herbert Xu Cc: Maciej Żenczykowski Cc: Neal Cardwell Cc: Tom Herbert Cc: Jeff Kirsher Cc: Ben Hutchings Cc: Matt Carlson Cc: Michael Chan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9d28a22a8554..2c75e98953b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -561,6 +561,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) extern void kfree_skb(struct sk_buff *skb); extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); +extern struct kmem_cache *skbuff_head_cache; extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); extern struct sk_buff *build_skb(void *data, unsigned int frag_size); -- cgit v1.2.2 From 18d0700024d68a075c507b845d85eda2abb5aee7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Apr 2012 16:31:46 +0000 Subject: net: skb_peek()/skb_peek_tail() cleanups remove useless casts and rename variables for less confusion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2c75e98953b2..988fc49667b1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -883,10 +883,11 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, */ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) { - struct sk_buff *list = ((const struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; + struct sk_buff *skb = list_->next; + + if (skb == (struct sk_buff *)list_) + skb = NULL; + return skb; } /** @@ -902,6 +903,7 @@ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, const struct sk_buff_head *list_) { struct sk_buff *next = skb->next; + if (next == (struct sk_buff *)list_) next = NULL; return next; @@ -922,10 +924,12 @@ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { - struct sk_buff *list = ((const struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; + struct sk_buff *skb = list_->prev; + + if (skb == (struct sk_buff *)list_) + skb = NULL; + return skb; + } /** -- cgit v1.2.2 From 3a7c1ee4ab89f9250b8f82656a7be0ae14aa3691 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 3 May 2012 01:09:42 +0000 Subject: skb: Add skb_head_is_locked helper function This patch adds support for a skb_head_is_locked helper function. It is meant to be used any time we are considering transferring the head from skb->head to a paged frag. If the head is locked it means we cannot remove the head from the skb so it must be copied or we must take the skb as a whole. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 988fc49667b1..37f539129d89 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2566,5 +2566,19 @@ static inline bool skb_is_recycleable(const struct sk_buff *skb, int skb_size) return true; } + +/** + * skb_head_is_locked - Determine if the skb->head is locked down + * @skb: skb to check + * + * The head on skbs build around a head frag can be removed if they are + * not cloned. This function returns true if the skb head is locked down + * due to either being allocated via kmalloc, or by being a clone with + * multiple references to the head. + */ +static inline bool skb_head_is_locked(const struct sk_buff *skb) +{ + return !skb->head_frag || skb_cloned(skb); +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.2 From ec47ea82477404631d49b8e568c71826c9b663ac Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 4 May 2012 14:26:56 +0000 Subject: skb: Add inline helper for getting the skb end offset from head With the recent changes for how we compute the skb truesize it occurs to me we are probably going to have a lot of calls to skb_end_pointer - skb->head. Instead of running all over the place doing that it would make more sense to just make it a separate inline skb_end_offset(skb) that way we can return the correct value without having gcc having to do all the optimization to cancel out skb->head - skb->head. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 37f539129d89..91ad5e227d1d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,11 +645,21 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->head + skb->end; } + +static inline unsigned int skb_end_offset(const struct sk_buff *skb) +{ + return skb->end; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->end; } + +static inline unsigned int skb_end_offset(const struct sk_buff *skb) +{ + return skb->end - skb->head; +} #endif /* Internal */ @@ -2558,7 +2568,7 @@ static inline bool skb_is_recycleable(const struct sk_buff *skb, int skb_size) return false; skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_pointer(skb) - skb->head < skb_size) + if (skb_end_offset(skb) < skb_size) return false; if (skb_shared(skb) || skb_cloned(skb)) -- cgit v1.2.2 From 6f532612cc2410a5079ea0f83e7a5011adfbf70d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 May 2012 05:12:12 +0000 Subject: net: introduce netdev_alloc_frag() Fix two issues introduced in commit a1c7fff7e18f5 ( net: netdev_alloc_skb() use build_skb() ) - Must be IRQ safe (non NAPI drivers can use it) - Must not leak the frag if build_skb() fails to allocate sk_buff This patch introduces netdev_alloc_frag() for drivers willing to use build_skb() instead of __netdev_alloc_skb() variants. Factorize code so that : __dev_alloc_skb() is a wrapper around __netdev_alloc_skb(), and dev_alloc_skb() a wrapper around netdev_alloc_skb() Use __GFP_COLD flag. Almost all network drivers now benefit from skb->head_frag infrastructure. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bb47314c7179..fe37c21d3a60 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1680,31 +1680,11 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -/** - * __dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - gfp_t gfp_mask) -{ - struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); - if (likely(skb)) - skb_reserve(skb, NET_SKB_PAD); - return skb; -} - -extern struct sk_buff *dev_alloc_skb(unsigned int length); +extern void *netdev_alloc_frag(unsigned int fragsz); extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask); + unsigned int length, + gfp_t gfp_mask); /** * netdev_alloc_skb - allocate an skbuff for rx on a specific device @@ -1720,11 +1700,25 @@ extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, * allocates memory it can be called from an interrupt. */ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, - unsigned int length) + unsigned int length) { return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +/* legacy helper around __netdev_alloc_skb() */ +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, + gfp_t gfp_mask) +{ + return __netdev_alloc_skb(NULL, length, gfp_mask); +} + +/* legacy helper around netdev_alloc_skb() */ +static inline struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return netdev_alloc_skb(NULL, length); +} + + static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, unsigned int length, gfp_t gfp) { -- cgit v1.2.2 From bad43ca8325f493dcaa0896c2f036276af059c7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 May 2012 03:02:02 +0000 Subject: net: introduce skb_try_coalesce() Move tcp_try_coalesce() protocol independent part to skb_try_coalesce(). skb_try_coalesce() can be used in IPv4 defrag and IPv6 reassembly, to build optimized skbs (less sk_buff, and possibly less 'headers') skb_try_coalesce() is zero copy, unless the copy can fit in destination header (its a rare case) kfree_skb_partial() is also moved to net/core/skbuff.c and exported, because IPv6 will need it in patch (ipv6: use skb coalescing in reassembly). Signed-off-by: Eric Dumazet Cc: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe37c21d3a60..0e501714d47f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -562,6 +562,11 @@ extern void kfree_skb(struct sk_buff *skb); extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; + +extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); +extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, + bool *fragstolen, int *delta_truesize); + extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); extern struct sk_buff *build_skb(void *data, unsigned int frag_size); -- cgit v1.2.2 From 617c8c11236716dcbda877e764b7bf37c6fd8063 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 May 2012 03:35:08 +0000 Subject: skb: avoid unnecessary reallocations in __skb_cow At the beginning of __skb_cow, headroom gets set to a minimum of NET_SKB_PAD. This causes unnecessary reallocations if the buffer was not cloned and the headroom is just below NET_SKB_PAD, but still more than the amount requested by the caller. This was showing up frequently in my tests on VLAN tx, where vlan_insert_tag calls skb_cow_head(skb, VLAN_HLEN). Locally generated packets should have enough headroom, and for forward paths, we already have NET_SKB_PAD bytes of headroom, so we don't need to add any extra space here. Signed-off-by: Felix Fietkau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e501714d47f..b534a1be540a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1896,8 +1896,6 @@ static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, { int delta = 0; - if (headroom < NET_SKB_PAD) - headroom = NET_SKB_PAD; if (headroom > skb_headroom(skb)) delta = headroom - skb_headroom(skb); -- cgit v1.2.2