diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-05 04:07:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-05 04:07:37 -0400 |
commit | ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch) | |
tree | 177c324eb0cf7e687d1bbd10a6add3a7d5979002 /net/core | |
parent | 8753d29fd5daf890004a38c80835e1eb3acda394 (diff) |
net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450 @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.
Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.
Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.
All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/skbuff.c | 21 |
1 files changed, 5 insertions, 16 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8b9c109166a7..a9b0e1f77806 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
182 | if (!skb) | 182 | if (!skb) |
183 | goto out; | 183 | goto out; |
184 | prefetchw(skb); | ||
184 | 185 | ||
185 | size = SKB_DATA_ALIGN(size); | 186 | size = SKB_DATA_ALIGN(size); |
186 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | 187 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), |
187 | gfp_mask, node); | 188 | gfp_mask, node); |
188 | if (!data) | 189 | if (!data) |
189 | goto nodata; | 190 | goto nodata; |
191 | prefetchw(data + size); | ||
190 | 192 | ||
191 | /* | 193 | /* |
192 | * Only clear those fields we need to clear, not those that we will | 194 | * Only clear those fields we need to clear, not those that we will |
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
208 | 210 | ||
209 | /* make sure we initialize shinfo sequentially */ | 211 | /* make sure we initialize shinfo sequentially */ |
210 | shinfo = skb_shinfo(skb); | 212 | shinfo = skb_shinfo(skb); |
213 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
211 | atomic_set(&shinfo->dataref, 1); | 214 | atomic_set(&shinfo->dataref, 1); |
212 | shinfo->nr_frags = 0; | ||
213 | shinfo->gso_size = 0; | ||
214 | shinfo->gso_segs = 0; | ||
215 | shinfo->gso_type = 0; | ||
216 | shinfo->ip6_frag_id = 0; | ||
217 | shinfo->tx_flags.flags = 0; | ||
218 | skb_frag_list_init(skb); | ||
219 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
220 | 215 | ||
221 | if (fclone) { | 216 | if (fclone) { |
222 | struct sk_buff *child = skb + 1; | 217 | struct sk_buff *child = skb + 1; |
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) | |||
505 | return 0; | 500 | return 0; |
506 | 501 | ||
507 | skb_release_head_state(skb); | 502 | skb_release_head_state(skb); |
503 | |||
508 | shinfo = skb_shinfo(skb); | 504 | shinfo = skb_shinfo(skb); |
505 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
509 | atomic_set(&shinfo->dataref, 1); | 506 | atomic_set(&shinfo->dataref, 1); |
510 | shinfo->nr_frags = 0; | ||
511 | shinfo->gso_size = 0; | ||
512 | shinfo->gso_segs = 0; | ||
513 | shinfo->gso_type = 0; | ||
514 | shinfo->ip6_frag_id = 0; | ||
515 | shinfo->tx_flags.flags = 0; | ||
516 | skb_frag_list_init(skb); | ||
517 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
518 | 507 | ||
519 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 508 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
520 | skb->data = skb->head + NET_SKB_PAD; | 509 | skb->data = skb->head + NET_SKB_PAD; |