diff options
author | Eric Dumazet <edumazet@google.com> | 2012-09-26 02:46:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-27 19:29:35 -0400 |
commit | 69b08f62e17439ee3d436faf0b9a7ca6fffb78db (patch) | |
tree | a59ad1118535432b42821b65efe96ac27450c26a /net/core | |
parent | 5dff747b7038d10f9c174a1245263fd1c36a644d (diff) |
net: use bigger pages in __netdev_alloc_frag
We currently use percpu order-0 pages in __netdev_alloc_frag
to deliver fragments used by __netdev_alloc_skb()
Depending on NIC driver and arch being 32 or 64 bit, it allows a page to
be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096
Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows :
- Better filling of space (the ending hole overhead is less an issue)
- Less calls to page allocator or accesses to page->_count
- Could allow struct skb_shared_info futures changes without major
performance impact.
This patch implements a transparent fallback to smaller
pages in case of memory pressure.
It also uses a standard "struct page_frag" instead of a custom one.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/skbuff.c | 46 |
1 files changed, 30 insertions, 16 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2ede3cfa8ffa..607a70ff2cc2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
340 | EXPORT_SYMBOL(build_skb); | 340 | EXPORT_SYMBOL(build_skb); |
341 | 341 | ||
342 | struct netdev_alloc_cache { | 342 | struct netdev_alloc_cache { |
343 | struct page *page; | 343 | struct page_frag frag; |
344 | unsigned int offset; | 344 | /* we maintain a pagecount bias, so that we dont dirty cache line |
345 | unsigned int pagecnt_bias; | 345 | * containing page->_count every time we allocate a fragment. |
346 | */ | ||
347 | unsigned int pagecnt_bias; | ||
346 | }; | 348 | }; |
347 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | 349 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); |
348 | 350 | ||
349 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) | 351 | #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) |
352 | #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) | ||
353 | #define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE | ||
350 | 354 | ||
351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 355 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
352 | { | 356 | { |
353 | struct netdev_alloc_cache *nc; | 357 | struct netdev_alloc_cache *nc; |
354 | void *data = NULL; | 358 | void *data = NULL; |
359 | int order; | ||
355 | unsigned long flags; | 360 | unsigned long flags; |
356 | 361 | ||
357 | local_irq_save(flags); | 362 | local_irq_save(flags); |
358 | nc = &__get_cpu_var(netdev_alloc_cache); | 363 | nc = &__get_cpu_var(netdev_alloc_cache); |
359 | if (unlikely(!nc->page)) { | 364 | if (unlikely(!nc->frag.page)) { |
360 | refill: | 365 | refill: |
361 | nc->page = alloc_page(gfp_mask); | 366 | for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { |
362 | if (unlikely(!nc->page)) | 367 | gfp_t gfp = gfp_mask; |
363 | goto end; | 368 | |
369 | if (order) | ||
370 | gfp |= __GFP_COMP | __GFP_NOWARN; | ||
371 | nc->frag.page = alloc_pages(gfp, order); | ||
372 | if (likely(nc->frag.page)) | ||
373 | break; | ||
374 | if (--order < 0) | ||
375 | goto end; | ||
376 | } | ||
377 | nc->frag.size = PAGE_SIZE << order; | ||
364 | recycle: | 378 | recycle: |
365 | atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS); | 379 | atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); |
366 | nc->pagecnt_bias = NETDEV_PAGECNT_BIAS; | 380 | nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; |
367 | nc->offset = 0; | 381 | nc->frag.offset = 0; |
368 | } | 382 | } |
369 | 383 | ||
370 | if (nc->offset + fragsz > PAGE_SIZE) { | 384 | if (nc->frag.offset + fragsz > nc->frag.size) { |
371 | /* avoid unnecessary locked operations if possible */ | 385 | /* avoid unnecessary locked operations if possible */ |
372 | if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) || | 386 | if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || |
373 | atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count)) | 387 | atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) |
374 | goto recycle; | 388 | goto recycle; |
375 | goto refill; | 389 | goto refill; |
376 | } | 390 | } |
377 | 391 | ||
378 | data = page_address(nc->page) + nc->offset; | 392 | data = page_address(nc->frag.page) + nc->frag.offset; |
379 | nc->offset += fragsz; | 393 | nc->frag.offset += fragsz; |
380 | nc->pagecnt_bias--; | 394 | nc->pagecnt_bias--; |
381 | end: | 395 | end: |
382 | local_irq_restore(flags); | 396 | local_irq_restore(flags); |