aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-11-14 01:03:34 -0500
committerDavid S. Miller <davem@davemloft.net>2011-11-14 14:13:30 -0500
commitb2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 (patch)
tree60839b87de74c617f417eaccbbac98c770c9fc90
parentc3e072f8a6c5625028531c40ec65f7e301531be2 (diff)
net: introduce build_skb()
One of the thing we discussed during netdev 2011 conference was the idea to change some network drivers to allocate/populate their skb at RX completion time, right before feeding the skb to network stack. In old days, we allocated skbs when populating the RX ring. This means bringing into cpu cache sk_buff and skb_shared_info cache lines (since we clear/initialize them), then 'queue' skb->data to NIC. By the time NIC fills a frame in skb->data buffer and host can process it, cpu probably threw away the cache lines from its caches, because lot of things happened between the allocation and final use. So the deal would be to allocate only the data buffer for the NIC to populate its RX ring buffer. And use build_skb() at RX completion to attach a data buffer (now filled with an ethernet frame) to a new skb, initialize the skb_shared_info portion, and give the hot skb to network stack. build_skb() is the function to allocate an skb, caller providing the data buffer that should be attached to it. Drivers are expected to call skb_reserve() right after build_skb() to adjust skb->data to the Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN, but some drivers might add a hardware provided alignment) Data provided to build_skb() MUST have been allocated by a prior kmalloc() call, with enough room to add SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) bytes at the end of the data without corrupting incoming frame. data = kmalloc(NET_SKB_PAD + NET_IP_ALIGN + 1536 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), GFP_ATOMIC); ... skb = build_skb(data); if (!skb) { recycle_data(data); } else { skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); ... } Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Eilon Greenstein <eilong@broadcom.com> CC: Ben Hutchings <bhutchings@solarflare.com> CC: Tom Herbert <therbert@google.com> CC: Jamal Hadi Salim <hadi@mojatatu.com> CC: Stephen Hemminger <shemminger@vyatta.com> CC: Thomas Graf <tgraf@infradead.org> CC: Herbert Xu <herbert@gondor.apana.org.au> CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--net/core/skbuff.c49
2 files changed, 50 insertions, 0 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe864885c1ed..abad8a0941e8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -540,6 +540,7 @@ extern void consume_skb(struct sk_buff *skb);
540extern void __kfree_skb(struct sk_buff *skb); 540extern void __kfree_skb(struct sk_buff *skb);
541extern struct sk_buff *__alloc_skb(unsigned int size, 541extern struct sk_buff *__alloc_skb(unsigned int size,
542 gfp_t priority, int fclone, int node); 542 gfp_t priority, int fclone, int node);
543extern struct sk_buff *build_skb(void *data);
543static inline struct sk_buff *alloc_skb(unsigned int size, 544static inline struct sk_buff *alloc_skb(unsigned int size,
544 gfp_t priority) 545 gfp_t priority)
545{ 546{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 18a3cebb753d..8d2c5b32f172 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -245,6 +245,55 @@ nodata:
245EXPORT_SYMBOL(__alloc_skb); 245EXPORT_SYMBOL(__alloc_skb);
246 246
247/** 247/**
248 * build_skb - build a network buffer
249 * @data: data buffer provided by caller
250 *
251 * Allocate a new &sk_buff. Caller provides space holding head and
252 * skb_shared_info. @data must have been allocated by kmalloc()
253 * The return is the new skb buffer.
254 * On a failure the return is %NULL, and @data is not freed.
255 * Notes :
256 * Before IO, driver allocates only data buffer where NIC put incoming frame
257 * Driver should add room at head (NET_SKB_PAD) and
258 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
259 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
260 * before giving packet to stack.
261 * RX rings only contains data buffers, not full skbs.
262 */
263struct sk_buff *build_skb(void *data)
264{
265 struct skb_shared_info *shinfo;
266 struct sk_buff *skb;
267 unsigned int size;
268
269 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
270 if (!skb)
271 return NULL;
272
273 size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
274
275 memset(skb, 0, offsetof(struct sk_buff, tail));
276 skb->truesize = SKB_TRUESIZE(size);
277 atomic_set(&skb->users, 1);
278 skb->head = data;
279 skb->data = data;
280 skb_reset_tail_pointer(skb);
281 skb->end = skb->tail + size;
282#ifdef NET_SKBUFF_DATA_USES_OFFSET
283 skb->mac_header = ~0U;
284#endif
285
286 /* make sure we initialize shinfo sequentially */
287 shinfo = skb_shinfo(skb);
288 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
289 atomic_set(&shinfo->dataref, 1);
290 kmemcheck_annotate_variable(shinfo->destructor_arg);
291
292 return skb;
293}
294EXPORT_SYMBOL(build_skb);
295
296/**
248 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 297 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
249 * @dev: network device to receive on 298 * @dev: network device to receive on
250 * @length: length to allocate 299 * @length: length to allocate