From ccb7c7732e2ceb4e81a7806faf1670be9681ccd2 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 20 Apr 2010 22:39:53 -0700 Subject: net: Remove two unnecessary exports (skbuff). There is no need to export skb_under_panic() and skb_over_panic() in skbuff.c, since these methods are used only in skbuff.c ; this patch removes these two exports. It also marks these functions as 'static' and removeS the extern declarations of them from include/linux/skbuff.h Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 38501d20650c..82f5116a89e4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -470,10 +470,6 @@ extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); extern int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, - void *here); extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int getfrag(void *from, char *to, int offset, -- cgit v1.2.2 From 47d29646a2c1c147d8a7598aeac2c87dd71ed638 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 2 May 2010 02:21:44 -0700 Subject: net: Inline skb_pull() in eth_type_trans(). In commit 6be8ac2f ("[NET]: uninline skb_pull, de-bloats a lot") we uninlined skb_pull. But in some critical paths it makes sense to inline this thing and it helps performance significantly. Create an skb_pull_inline() so that we can do this in a way that serves also as annotation. Based upon a patch by Eric Dumazet. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 82f5116a89e4..746a652b9f6f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1128,6 +1128,11 @@ static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) return skb->data += len; } +static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len) +{ + return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); +} + extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) -- cgit v1.2.2 From ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 May 2010 01:07:37 -0700 Subject: net: __alloc_skb() speedup With following patch I can reach maximum rate of my pktgen+udpsink simulator : - 'old' machine : dual quad core E5450 @3.00GHz - 64 UDP rx flows (only differ by destination port) - RPS enabled, NIC interrupts serviced on cpu0 - rps dispatched on 7 other cores. (~130.000 IPI per second) - SLAB allocator (faster than SLUB in this workload) - tg3 NIC - 1.080.000 pps without a single drop at NIC level. Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch first sk_buff cache line, the second to prefetch the shinfo part. Also using one memset() to initialize all skb_shared_info fields instead of one by one to reduce number of instructions, using long word moves. All skb_shared_info fields before 'dataref' are cleared in __alloc_skb(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 746a652b9f6f..88d55395a27c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -187,7 +187,6 @@ union skb_shared_tx { * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - atomic_t dataref; unsigned short nr_frags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ @@ -197,6 +196,12 @@ struct skb_shared_info { union skb_shared_tx tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + + /* + * Warning : all fields before dataref are cleared in __alloc_skb() + */ + atomic_t dataref; + skb_frag_t frags[MAX_SKB_FRAGS]; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ -- cgit v1.2.2 From 18e8c134f4e984e6639e62846345192816f06d5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 May 2010 21:58:51 -0700 Subject: net: Increase NET_SKB_PAD to 64 bytes eth_type_trans() & get_rps_cpus() currently need two 64bytes cache lines in packet to compute rxhash. Increasing NET_SKB_PAD from 32 to 64 reduces the need to one cache line only, and makes RPS faster. NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 88d55395a27c..c9525bce80f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1361,9 +1361,12 @@ static inline int skb_network_offset(const struct sk_buff *skb) * * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. + * With RPS, we raised NET_SKB_PAD to 64 so that get_rps_cpus() fetches span + * a 64 bytes aligned block to fit modern (>= 64 bytes) cache line sizes + * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD -#define NET_SKB_PAD 32 +#define NET_SKB_PAD 64 #endif extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); -- cgit v1.2.2