aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-10 13:32:02 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-10 13:32:02 -0500
commit7dbea3e8662eb491e9da109cf8ffc372e160fe9a (patch)
treee69f89b43fa65a1f4279bc6aa7dc068c0817c208 /net
parent6e5f59aacbf9527dfe425541c78cb8c56623e7eb (diff)
parent45abfb1069e4c365f6c1e2fc97c5927272725bfa (diff)
Merge branch 'napi_page_frags'
Alexander Duyck says: ==================== net: Alloc NAPI page frags from their own pool This patch series implements a means of allocating page fragments without the need for the local_irq_save/restore in __netdev_alloc_frag. By doing this I am able to decrease packet processing time by 11ns per packet in my test environment. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/skbuff.c191
2 files changed, 145 insertions, 48 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 3f191da383f6..80f798da3d9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4172,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
4172 struct sk_buff *skb = napi->skb; 4172 struct sk_buff *skb = napi->skb;
4173 4173
4174 if (!skb) { 4174 if (!skb) {
4175 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4175 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4176 napi->skb = skb; 4176 napi->skb = skb;
4177 } 4177 }
4178 return skb; 4178 return skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7a338fb55cc4..ae13ef6b3ea7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
336 unsigned int pagecnt_bias; 336 unsigned int pagecnt_bias;
337}; 337};
338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
339static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
339 340
340static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 341static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
342 gfp_t gfp_mask)
341{ 343{
342 struct netdev_alloc_cache *nc; 344 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
343 void *data = NULL; 345 struct page *page = NULL;
344 int order; 346 gfp_t gfp = gfp_mask;
345 unsigned long flags; 347
348 if (order) {
349 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
350 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
351 nc->frag.size = PAGE_SIZE << (page ? order : 0);
352 }
346 353
347 local_irq_save(flags); 354 if (unlikely(!page))
348 nc = this_cpu_ptr(&netdev_alloc_cache); 355 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
349 if (unlikely(!nc->frag.page)) { 356
357 nc->frag.page = page;
358
359 return page;
360}
361
362static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
363 unsigned int fragsz, gfp_t gfp_mask)
364{
365 struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
366 struct page *page = nc->frag.page;
367 unsigned int size;
368 int offset;
369
370 if (unlikely(!page)) {
350refill: 371refill:
351 for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { 372 page = __page_frag_refill(nc, gfp_mask);
352 gfp_t gfp = gfp_mask; 373 if (!page)
374 return NULL;
375
376 /* if size can vary use frag.size else just use PAGE_SIZE */
377 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
353 378
354 if (order)
355 gfp |= __GFP_COMP | __GFP_NOWARN;
356 nc->frag.page = alloc_pages(gfp, order);
357 if (likely(nc->frag.page))
358 break;
359 if (--order < 0)
360 goto end;
361 }
362 nc->frag.size = PAGE_SIZE << order;
363 /* Even if we own the page, we do not use atomic_set(). 379 /* Even if we own the page, we do not use atomic_set().
364 * This would break get_page_unless_zero() users. 380 * This would break get_page_unless_zero() users.
365 */ 381 */
366 atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, 382 atomic_add(size - 1, &page->_count);
367 &nc->frag.page->_count); 383
368 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 384 /* reset page count bias and offset to start of new frag */
369 nc->frag.offset = 0; 385 nc->pagecnt_bias = size;
386 nc->frag.offset = size;
370 } 387 }
371 388
372 if (nc->frag.offset + fragsz > nc->frag.size) { 389 offset = nc->frag.offset - fragsz;
373 if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { 390 if (unlikely(offset < 0)) {
374 if (!atomic_sub_and_test(nc->pagecnt_bias, 391 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
375 &nc->frag.page->_count)) 392 goto refill;
376 goto refill; 393
377 /* OK, page count is 0, we can safely set it */ 394 /* if size can vary use frag.size else just use PAGE_SIZE */
378 atomic_set(&nc->frag.page->_count, 395 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
379 NETDEV_PAGECNT_MAX_BIAS); 396
380 } else { 397 /* OK, page count is 0, we can safely set it */
381 atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, 398 atomic_set(&page->_count, size);
382 &nc->frag.page->_count); 399
383 } 400 /* reset page count bias and offset to start of new frag */
384 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 401 nc->pagecnt_bias = size;
385 nc->frag.offset = 0; 402 offset = size - fragsz;
386 } 403 }
387 404
388 data = page_address(nc->frag.page) + nc->frag.offset;
389 nc->frag.offset += fragsz;
390 nc->pagecnt_bias--; 405 nc->pagecnt_bias--;
391end: 406 nc->frag.offset = offset;
407
408 return page_address(page) + offset;
409}
410
411static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
412{
413 unsigned long flags;
414 void *data;
415
416 local_irq_save(flags);
417 data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
392 local_irq_restore(flags); 418 local_irq_restore(flags);
393 return data; 419 return data;
394} 420}
@@ -406,11 +432,25 @@ void *netdev_alloc_frag(unsigned int fragsz)
406} 432}
407EXPORT_SYMBOL(netdev_alloc_frag); 433EXPORT_SYMBOL(netdev_alloc_frag);
408 434
435static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
436{
437 return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
438}
439
440void *napi_alloc_frag(unsigned int fragsz)
441{
442 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
443}
444EXPORT_SYMBOL(napi_alloc_frag);
445
409/** 446/**
410 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 447 * __alloc_rx_skb - allocate an skbuff for rx
411 * @dev: network device to receive on
412 * @length: length to allocate 448 * @length: length to allocate
413 * @gfp_mask: get_free_pages mask, passed to alloc_skb 449 * @gfp_mask: get_free_pages mask, passed to alloc_skb
450 * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
451 * allocations in case we have to fallback to __alloc_skb()
452 * If SKB_ALLOC_NAPI is set, page fragment will be allocated
453 * from napi_cache instead of netdev_cache.
414 * 454 *
415 * Allocate a new &sk_buff and assign it a usage count of one. The 455 * Allocate a new &sk_buff and assign it a usage count of one. The
416 * buffer has unspecified headroom built in. Users should allocate 456 * buffer has unspecified headroom built in. Users should allocate
@@ -419,11 +459,11 @@ EXPORT_SYMBOL(netdev_alloc_frag);
419 * 459 *
420 * %NULL is returned if there is no free memory. 460 * %NULL is returned if there is no free memory.
421 */ 461 */
422struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 462static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
423 unsigned int length, gfp_t gfp_mask) 463 int flags)
424{ 464{
425 struct sk_buff *skb = NULL; 465 struct sk_buff *skb = NULL;
426 unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + 466 unsigned int fragsz = SKB_DATA_ALIGN(length) +
427 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 467 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
428 468
429 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 469 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
@@ -432,7 +472,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
432 if (sk_memalloc_socks()) 472 if (sk_memalloc_socks())
433 gfp_mask |= __GFP_MEMALLOC; 473 gfp_mask |= __GFP_MEMALLOC;
434 474
435 data = __netdev_alloc_frag(fragsz, gfp_mask); 475 data = (flags & SKB_ALLOC_NAPI) ?
476 __napi_alloc_frag(fragsz, gfp_mask) :
477 __netdev_alloc_frag(fragsz, gfp_mask);
436 478
437 if (likely(data)) { 479 if (likely(data)) {
438 skb = build_skb(data, fragsz); 480 skb = build_skb(data, fragsz);
@@ -440,17 +482,72 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
440 put_page(virt_to_head_page(data)); 482 put_page(virt_to_head_page(data));
441 } 483 }
442 } else { 484 } else {
443 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 485 skb = __alloc_skb(length, gfp_mask,
444 SKB_ALLOC_RX, NUMA_NO_NODE); 486 SKB_ALLOC_RX, NUMA_NO_NODE);
445 } 487 }
488 return skb;
489}
490
491/**
492 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
493 * @dev: network device to receive on
494 * @length: length to allocate
495 * @gfp_mask: get_free_pages mask, passed to alloc_skb
496 *
497 * Allocate a new &sk_buff and assign it a usage count of one. The
498 * buffer has NET_SKB_PAD headroom built in. Users should allocate
499 * the headroom they think they need without accounting for the
500 * built in space. The built in space is used for optimisations.
501 *
502 * %NULL is returned if there is no free memory.
503 */
504struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
505 unsigned int length, gfp_t gfp_mask)
506{
507 struct sk_buff *skb;
508
509 length += NET_SKB_PAD;
510 skb = __alloc_rx_skb(length, gfp_mask, 0);
511
446 if (likely(skb)) { 512 if (likely(skb)) {
447 skb_reserve(skb, NET_SKB_PAD); 513 skb_reserve(skb, NET_SKB_PAD);
448 skb->dev = dev; 514 skb->dev = dev;
449 } 515 }
516
450 return skb; 517 return skb;
451} 518}
452EXPORT_SYMBOL(__netdev_alloc_skb); 519EXPORT_SYMBOL(__netdev_alloc_skb);
453 520
521/**
522 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
523 * @napi: napi instance this buffer was allocated for
524 * @length: length to allocate
525 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
526 *
527 * Allocate a new sk_buff for use in NAPI receive. This buffer will
528 * attempt to allocate the head from a special reserved region used
529 * only for NAPI Rx allocation. By doing this we can save several
530 * CPU cycles by avoiding having to disable and re-enable IRQs.
531 *
532 * %NULL is returned if there is no free memory.
533 */
534struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
535 unsigned int length, gfp_t gfp_mask)
536{
537 struct sk_buff *skb;
538
539 length += NET_SKB_PAD + NET_IP_ALIGN;
540 skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
541
542 if (likely(skb)) {
543 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
544 skb->dev = napi->dev;
545 }
546
547 return skb;
548}
549EXPORT_SYMBOL(__napi_alloc_skb);
550
454void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 551void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
455 int size, unsigned int truesize) 552 int size, unsigned int truesize)
456{ 553{