net: __alloc_skb() speedup

With following patch I can reach maximum rate of my pktgen+udpsink simulator : - 'old' machine : dual quad core E5450 @3.00GHz - 64 UDP rx flows (only differ by destination port) - RPS enabled, NIC interrupts serviced on cpu0 - rps dispatched on 7 other cores. (~130.000 IPI per second) - SLAB allocator (faster than SLUB in this workload) - tg3 NIC - 1.080.000 pps without a single drop at NIC level. Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch first sk_buff cache line, the second to prefetch the shinfo part. Also using one memset() to initialize all skb_shared_info fields instead of one by one to reduce number of instructions, using long word moves. All skb_shared_info fields before 'dataref' are cleared in __alloc_skb(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <eric.dumazet@gmail.com> 2010-05-05 04:07:37 -0400
committer: David S. Miller <davem@davemloft.net> 2010-05-05 04:07:37 -0400
commit: ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch)
tree: 177c324eb0cf7e687d1bbd10a6add3a7d5979002 /net/core
parent: 8753d29fd5daf890004a38c80835e1eb3acda394 (diff)
1 files changed, 5 insertions, 16 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109166a7..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
        if (!skb)
                goto out;
+        prefetchw(skb);
        size = SKB_DATA_ALIGN(size);
        data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
                        gfp_mask, node);
        if (!data)
                goto nodata;
+        prefetchw(data + size);
        /*
         * Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        /* make sure we initialize shinfo sequentially */
        shinfo = skb_shinfo(skb);
+        memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
        atomic_set(&shinfo->dataref, 1);
-        shinfo->nr_frags  = 0;
-        shinfo->gso_size = 0;
-        shinfo->gso_segs = 0;
-        shinfo->gso_type = 0;
-        shinfo->ip6_frag_id = 0;
-        shinfo->tx_flags.flags = 0;
-        skb_frag_list_init(skb);
-        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
        if (fclone) {
                struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
                return 0;
        skb_release_head_state(skb);
        shinfo = skb_shinfo(skb);
+        memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
        atomic_set(&shinfo->dataref, 1);
-        shinfo->nr_frags = 0;
-        shinfo->gso_size = 0;
-        shinfo->gso_segs = 0;
-        shinfo->gso_type = 0;
-        shinfo->ip6_frag_id = 0;
-        shinfo->tx_flags.flags = 0;
-        skb_frag_list_init(skb);
-        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
        memset(skb, 0, offsetof(struct sk_buff, tail));
        skb->data = skb->head + NET_SKB_PAD;
author	Eric Dumazet <eric.dumazet@gmail.com>	2010-05-05 04:07:37 -0400
committer	David S. Miller <davem@davemloft.net>	2010-05-05 04:07:37 -0400
commit	ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 (patch)
tree	177c324eb0cf7e687d1bbd10a6add3a7d5979002 /net/core
parent	8753d29fd5daf890004a38c80835e1eb3acda394 (diff)