diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-05 04:07:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-05 04:07:37 -0400 |
commit | ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch) | |
tree | 177c324eb0cf7e687d1bbd10a6add3a7d5979002 | |
parent | 8753d29fd5daf890004a38c80835e1eb3acda394 (diff) |
net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450 @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.
Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.
Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.
All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 7 | ||||
-rw-r--r-- | net/core/skbuff.c | 21 |
2 files changed, 11 insertions, 17 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 746a652b9f6f..88d55395a27c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -187,7 +187,6 @@ union skb_shared_tx { | |||
187 | * the end of the header data, ie. at skb->end. | 187 | * the end of the header data, ie. at skb->end. |
188 | */ | 188 | */ |
189 | struct skb_shared_info { | 189 | struct skb_shared_info { |
190 | atomic_t dataref; | ||
191 | unsigned short nr_frags; | 190 | unsigned short nr_frags; |
192 | unsigned short gso_size; | 191 | unsigned short gso_size; |
193 | /* Warning: this field is not always filled in (UFO)! */ | 192 | /* Warning: this field is not always filled in (UFO)! */ |
@@ -197,6 +196,12 @@ struct skb_shared_info { | |||
197 | union skb_shared_tx tx_flags; | 196 | union skb_shared_tx tx_flags; |
198 | struct sk_buff *frag_list; | 197 | struct sk_buff *frag_list; |
199 | struct skb_shared_hwtstamps hwtstamps; | 198 | struct skb_shared_hwtstamps hwtstamps; |
199 | |||
200 | /* | ||
201 | * Warning : all fields before dataref are cleared in __alloc_skb() | ||
202 | */ | ||
203 | atomic_t dataref; | ||
204 | |||
200 | skb_frag_t frags[MAX_SKB_FRAGS]; | 205 | skb_frag_t frags[MAX_SKB_FRAGS]; |
201 | /* Intermediate layers must ensure that destructor_arg | 206 | /* Intermediate layers must ensure that destructor_arg |
202 | * remains valid until skb destructor */ | 207 | * remains valid until skb destructor */ |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8b9c109166a7..a9b0e1f77806 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
182 | if (!skb) | 182 | if (!skb) |
183 | goto out; | 183 | goto out; |
184 | prefetchw(skb); | ||
184 | 185 | ||
185 | size = SKB_DATA_ALIGN(size); | 186 | size = SKB_DATA_ALIGN(size); |
186 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | 187 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), |
187 | gfp_mask, node); | 188 | gfp_mask, node); |
188 | if (!data) | 189 | if (!data) |
189 | goto nodata; | 190 | goto nodata; |
191 | prefetchw(data + size); | ||
190 | 192 | ||
191 | /* | 193 | /* |
192 | * Only clear those fields we need to clear, not those that we will | 194 | * Only clear those fields we need to clear, not those that we will |
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
208 | 210 | ||
209 | /* make sure we initialize shinfo sequentially */ | 211 | /* make sure we initialize shinfo sequentially */ |
210 | shinfo = skb_shinfo(skb); | 212 | shinfo = skb_shinfo(skb); |
213 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
211 | atomic_set(&shinfo->dataref, 1); | 214 | atomic_set(&shinfo->dataref, 1); |
212 | shinfo->nr_frags = 0; | ||
213 | shinfo->gso_size = 0; | ||
214 | shinfo->gso_segs = 0; | ||
215 | shinfo->gso_type = 0; | ||
216 | shinfo->ip6_frag_id = 0; | ||
217 | shinfo->tx_flags.flags = 0; | ||
218 | skb_frag_list_init(skb); | ||
219 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
220 | 215 | ||
221 | if (fclone) { | 216 | if (fclone) { |
222 | struct sk_buff *child = skb + 1; | 217 | struct sk_buff *child = skb + 1; |
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) | |||
505 | return 0; | 500 | return 0; |
506 | 501 | ||
507 | skb_release_head_state(skb); | 502 | skb_release_head_state(skb); |
503 | |||
508 | shinfo = skb_shinfo(skb); | 504 | shinfo = skb_shinfo(skb); |
505 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
509 | atomic_set(&shinfo->dataref, 1); | 506 | atomic_set(&shinfo->dataref, 1); |
510 | shinfo->nr_frags = 0; | ||
511 | shinfo->gso_size = 0; | ||
512 | shinfo->gso_segs = 0; | ||
513 | shinfo->gso_type = 0; | ||
514 | shinfo->ip6_frag_id = 0; | ||
515 | shinfo->tx_flags.flags = 0; | ||
516 | skb_frag_list_init(skb); | ||
517 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
518 | 507 | ||
519 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 508 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
520 | skb->data = skb->head + NET_SKB_PAD; | 509 | skb->data = skb->head + NET_SKB_PAD; |