aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-05-05 04:07:37 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-05 04:07:37 -0400
commitec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch)
tree177c324eb0cf7e687d1bbd10a6add3a7d5979002
parent8753d29fd5daf890004a38c80835e1eb3acda394 (diff)
net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink simulator : - 'old' machine : dual quad core E5450 @3.00GHz - 64 UDP rx flows (only differ by destination port) - RPS enabled, NIC interrupts serviced on cpu0 - rps dispatched on 7 other cores. (~130.000 IPI per second) - SLAB allocator (faster than SLUB in this workload) - tg3 NIC - 1.080.000 pps without a single drop at NIC level. Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch first sk_buff cache line, the second to prefetch the shinfo part. Also using one memset() to initialize all skb_shared_info fields instead of one by one to reduce number of instructions, using long word moves. All skb_shared_info fields before 'dataref' are cleared in __alloc_skb(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h7
-rw-r--r--net/core/skbuff.c21
2 files changed, 11 insertions, 17 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 746a652b9f6f..88d55395a27c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -187,7 +187,6 @@ union skb_shared_tx {
187 * the end of the header data, ie. at skb->end. 187 * the end of the header data, ie. at skb->end.
188 */ 188 */
189struct skb_shared_info { 189struct skb_shared_info {
190 atomic_t dataref;
191 unsigned short nr_frags; 190 unsigned short nr_frags;
192 unsigned short gso_size; 191 unsigned short gso_size;
193 /* Warning: this field is not always filled in (UFO)! */ 192 /* Warning: this field is not always filled in (UFO)! */
@@ -197,6 +196,12 @@ struct skb_shared_info {
197 union skb_shared_tx tx_flags; 196 union skb_shared_tx tx_flags;
198 struct sk_buff *frag_list; 197 struct sk_buff *frag_list;
199 struct skb_shared_hwtstamps hwtstamps; 198 struct skb_shared_hwtstamps hwtstamps;
199
200 /*
201 * Warning : all fields before dataref are cleared in __alloc_skb()
202 */
203 atomic_t dataref;
204
200 skb_frag_t frags[MAX_SKB_FRAGS]; 205 skb_frag_t frags[MAX_SKB_FRAGS];
201 /* Intermediate layers must ensure that destructor_arg 206 /* Intermediate layers must ensure that destructor_arg
202 * remains valid until skb destructor */ 207 * remains valid until skb destructor */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109166a7..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
181 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 181 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
182 if (!skb) 182 if (!skb)
183 goto out; 183 goto out;
184 prefetchw(skb);
184 185
185 size = SKB_DATA_ALIGN(size); 186 size = SKB_DATA_ALIGN(size);
186 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 187 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
187 gfp_mask, node); 188 gfp_mask, node);
188 if (!data) 189 if (!data)
189 goto nodata; 190 goto nodata;
191 prefetchw(data + size);
190 192
191 /* 193 /*
192 * Only clear those fields we need to clear, not those that we will 194 * Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
208 210
209 /* make sure we initialize shinfo sequentially */ 211 /* make sure we initialize shinfo sequentially */
210 shinfo = skb_shinfo(skb); 212 shinfo = skb_shinfo(skb);
213 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
211 atomic_set(&shinfo->dataref, 1); 214 atomic_set(&shinfo->dataref, 1);
212 shinfo->nr_frags = 0;
213 shinfo->gso_size = 0;
214 shinfo->gso_segs = 0;
215 shinfo->gso_type = 0;
216 shinfo->ip6_frag_id = 0;
217 shinfo->tx_flags.flags = 0;
218 skb_frag_list_init(skb);
219 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
220 215
221 if (fclone) { 216 if (fclone) {
222 struct sk_buff *child = skb + 1; 217 struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
505 return 0; 500 return 0;
506 501
507 skb_release_head_state(skb); 502 skb_release_head_state(skb);
503
508 shinfo = skb_shinfo(skb); 504 shinfo = skb_shinfo(skb);
505 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
509 atomic_set(&shinfo->dataref, 1); 506 atomic_set(&shinfo->dataref, 1);
510 shinfo->nr_frags = 0;
511 shinfo->gso_size = 0;
512 shinfo->gso_segs = 0;
513 shinfo->gso_type = 0;
514 shinfo->ip6_frag_id = 0;
515 shinfo->tx_flags.flags = 0;
516 skb_frag_list_init(skb);
517 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
518 507
519 memset(skb, 0, offsetof(struct sk_buff, tail)); 508 memset(skb, 0, offsetof(struct sk_buff, tail));
520 skb->data = skb->head + NET_SKB_PAD; 509 skb->data = skb->head + NET_SKB_PAD;