aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2014-12-09 22:40:42 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-10 13:31:57 -0500
commitffde7328a36d16e626bae8468571858d71cd010b (patch)
treec84689c687b51d6fe70306f28cb045fa43f9ba59
parent6e5f59aacbf9527dfe425541c78cb8c56623e7eb (diff)
net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag
This patch splits the netdev_alloc_frag function up so that it can be used on one of two page frag pools instead of being fixed on the netdev_alloc_cache. By doing this we can add a NAPI specific function __napi_alloc_frag that accesses a pool that is only used from softirq context. The advantage to this is that we do not need to call local_irq_save/restore which can be a significant savings. I also took the opportunity to refactor the core bits that were placed in __alloc_page_frag. First I updated the allocation to do either a 32K allocation or an order 0 page. This is based on the changes in commmit d9b2938aa where it was found that latencies could be reduced in case of failures. Then I also rewrote the logic to work from the end of the page to the start. By doing this the size value doesn't have to be used unless we have run out of space for page fragments. Finally I cleaned up the atomic bits so that we just do an atomic_sub_and_test and if that returns true then we set the page->_count via an atomic_set. This way we can remove the extra conditional for the atomic_read since it would have led to an atomic_inc in the case of success anyway. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--net/core/skbuff.c117
2 files changed, 79 insertions, 40 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ab0bc43c82a4..736cc99f3f6c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
2164 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); 2164 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
2165} 2165}
2166 2166
2167void *napi_alloc_frag(unsigned int fragsz);
2168
2167/** 2169/**
2168 * __dev_alloc_pages - allocate page for network Rx 2170 * __dev_alloc_pages - allocate page for network Rx
2169 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx 2171 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7a338fb55cc4..56ed17cd2151 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
336 unsigned int pagecnt_bias; 336 unsigned int pagecnt_bias;
337}; 337};
338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
339static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
339 340
340static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 341static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
342 gfp_t gfp_mask)
341{ 343{
342 struct netdev_alloc_cache *nc; 344 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
343 void *data = NULL; 345 struct page *page = NULL;
344 int order; 346 gfp_t gfp = gfp_mask;
345 unsigned long flags; 347
348 if (order) {
349 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
350 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
351 nc->frag.size = PAGE_SIZE << (page ? order : 0);
352 }
346 353
347 local_irq_save(flags); 354 if (unlikely(!page))
348 nc = this_cpu_ptr(&netdev_alloc_cache); 355 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
349 if (unlikely(!nc->frag.page)) { 356
357 nc->frag.page = page;
358
359 return page;
360}
361
362static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
363 unsigned int fragsz, gfp_t gfp_mask)
364{
365 struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
366 struct page *page = nc->frag.page;
367 unsigned int size;
368 int offset;
369
370 if (unlikely(!page)) {
350refill: 371refill:
351 for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { 372 page = __page_frag_refill(nc, gfp_mask);
352 gfp_t gfp = gfp_mask; 373 if (!page)
374 return NULL;
375
376 /* if size can vary use frag.size else just use PAGE_SIZE */
377 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
353 378
354 if (order)
355 gfp |= __GFP_COMP | __GFP_NOWARN;
356 nc->frag.page = alloc_pages(gfp, order);
357 if (likely(nc->frag.page))
358 break;
359 if (--order < 0)
360 goto end;
361 }
362 nc->frag.size = PAGE_SIZE << order;
363 /* Even if we own the page, we do not use atomic_set(). 379 /* Even if we own the page, we do not use atomic_set().
364 * This would break get_page_unless_zero() users. 380 * This would break get_page_unless_zero() users.
365 */ 381 */
366 atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, 382 atomic_add(size - 1, &page->_count);
367 &nc->frag.page->_count); 383
368 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 384 /* reset page count bias and offset to start of new frag */
369 nc->frag.offset = 0; 385 nc->pagecnt_bias = size;
386 nc->frag.offset = size;
370 } 387 }
371 388
372 if (nc->frag.offset + fragsz > nc->frag.size) { 389 offset = nc->frag.offset - fragsz;
373 if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { 390 if (unlikely(offset < 0)) {
374 if (!atomic_sub_and_test(nc->pagecnt_bias, 391 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
375 &nc->frag.page->_count)) 392 goto refill;
376 goto refill; 393
377 /* OK, page count is 0, we can safely set it */ 394 /* if size can vary use frag.size else just use PAGE_SIZE */
378 atomic_set(&nc->frag.page->_count, 395 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
379 NETDEV_PAGECNT_MAX_BIAS); 396
380 } else { 397 /* OK, page count is 0, we can safely set it */
381 atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, 398 atomic_set(&page->_count, size);
382 &nc->frag.page->_count); 399
383 } 400 /* reset page count bias and offset to start of new frag */
384 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 401 nc->pagecnt_bias = size;
385 nc->frag.offset = 0; 402 offset = size - fragsz;
386 } 403 }
387 404
388 data = page_address(nc->frag.page) + nc->frag.offset;
389 nc->frag.offset += fragsz;
390 nc->pagecnt_bias--; 405 nc->pagecnt_bias--;
391end: 406 nc->frag.offset = offset;
407
408 return page_address(page) + offset;
409}
410
411static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
412{
413 unsigned long flags;
414 void *data;
415
416 local_irq_save(flags);
417 data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
392 local_irq_restore(flags); 418 local_irq_restore(flags);
393 return data; 419 return data;
394} 420}
@@ -406,6 +432,17 @@ void *netdev_alloc_frag(unsigned int fragsz)
406} 432}
407EXPORT_SYMBOL(netdev_alloc_frag); 433EXPORT_SYMBOL(netdev_alloc_frag);
408 434
435static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
436{
437 return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
438}
439
440void *napi_alloc_frag(unsigned int fragsz)
441{
442 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
443}
444EXPORT_SYMBOL(napi_alloc_frag);
445
409/** 446/**
410 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 447 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
411 * @dev: network device to receive on 448 * @dev: network device to receive on