net: use bigger pages in __netdev_alloc_frag

We currently use percpu order-0 pages in __netdev_alloc_frag to deliver fragments used by __netdev_alloc_skb() Depending on NIC driver and arch being 32 or 64 bit, it allows a page to be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096 Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows : - Better filling of space (the ending hole overhead is less an issue) - Less calls to page allocator or accesses to page->_count - Could allow struct skb_shared_info futures changes without major performance impact. This patch implements a transparent fallback to smaller pages in case of memory pressure. It also uses a standard "struct page_frag" instead of a custom one. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Alexander Duyck <alexander.h.duyck@intel.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <edumazet@google.com> 2012-09-26 02:46:57 -0400
committer: David S. Miller <davem@davemloft.net> 2012-09-27 19:29:35 -0400
commit: 69b08f62e17439ee3d436faf0b9a7ca6fffb78db (patch)
tree: a59ad1118535432b42821b65efe96ac27450c26a /net/core
parent: 5dff747b7038d10f9c174a1245263fd1c36a644d (diff)
1 files changed, 30 insertions, 16 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2ede3cfa8ffa..607a70ff2cc2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 EXPORT_SYMBOL(build_skb);
 struct netdev_alloc_cache {
-        struct page *page;
+        struct page_frag        frag;
-        unsigned int offset;
+        /* we maintain a pagecount bias, so that we dont dirty cache line
-        unsigned int pagecnt_bias;
+         * containing page->_count every time we allocate a fragment.
+         */
+        unsigned int            pagecnt_bias;
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS    NETDEV_FRAG_PAGE_MAX_SIZE
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
        struct netdev_alloc_cache *nc;
        void *data = NULL;
+        int order;
        unsigned long flags;
        local_irq_save(flags);
        nc = &__get_cpu_var(netdev_alloc_cache);
-        if (unlikely(!nc->page)) {
+        if (unlikely(!nc->frag.page)) {
 refill:
-                nc->page = alloc_page(gfp_mask);
+                for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
-                if (unlikely(!nc->page))
+                        gfp_t gfp = gfp_mask;
-                        goto end;
+                        if (order)
+                                gfp |= __GFP_COMP | __GFP_NOWARN;
+                        nc->frag.page = alloc_pages(gfp, order);
+                        if (likely(nc->frag.page))
+                                break;
+                        if (--order < 0)
+                                goto end;
+                }
+                nc->frag.size = PAGE_SIZE << order;
 recycle:
-                atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
+                atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
-                nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
+                nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
-                nc->offset = 0;
+                nc->frag.offset = 0;
        }
-        if (nc->offset + fragsz > PAGE_SIZE) {
+        if (nc->frag.offset + fragsz > nc->frag.size) {
                /* avoid unnecessary locked operations if possible */
-                if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
+                if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
-                    atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
+                    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
                        goto recycle;
                goto refill;
        }
-        data = page_address(nc->page) + nc->offset;
+        data = page_address(nc->frag.page) + nc->frag.offset;
-        nc->offset += fragsz;
+        nc->frag.offset += fragsz;
        nc->pagecnt_bias--;
 end:
        local_irq_restore(flags);
author	Eric Dumazet <edumazet@google.com>	2012-09-26 02:46:57 -0400
committer	David S. Miller <davem@davemloft.net>	2012-09-27 19:29:35 -0400
commit	69b08f62e17439ee3d436faf0b9a7ca6fffb78db (patch)
tree	a59ad1118535432b42821b65efe96ac27450c26a /net/core
parent	5dff747b7038d10f9c174a1245263fd1c36a644d (diff)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2ede3cfa8ffa..607a70ff2cc2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff build_skb(void data, unsigned int frag_size)
340	EXPORT_SYMBOL(build_skb);	340	EXPORT_SYMBOL(build_skb);
341		341
342	struct netdev_alloc_cache {	342	struct netdev_alloc_cache {
343	struct page *page;	343	struct page_frag frag;
344	unsigned int offset;	344	/* we maintain a pagecount bias, so that we dont dirty cache line
345	unsigned int pagecnt_bias;	345	* containing page->_count every time we allocate a fragment.
		346	*/
		347	unsigned int pagecnt_bias;
346	};	348	};
347	static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);	349	static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
348		350
349	#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)	351	#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
		352	#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
		353	#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
350		354
351	static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)	355	static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
352	{	356	{
353	struct netdev_alloc_cache *nc;	357	struct netdev_alloc_cache *nc;
354	void *data = NULL;	358	void *data = NULL;
		359	int order;
355	unsigned long flags;	360	unsigned long flags;
356		361
357	local_irq_save(flags);	362	local_irq_save(flags);
358	nc = &__get_cpu_var(netdev_alloc_cache);	363	nc = &__get_cpu_var(netdev_alloc_cache);
359	if (unlikely(!nc->page)) {	364	if (unlikely(!nc->frag.page)) {
360	refill:	365	refill:
361	nc->page = alloc_page(gfp_mask);	366	for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
362	if (unlikely(!nc->page))	367	gfp_t gfp = gfp_mask;
363	goto end;	368
		369	if (order)
		370	gfp \|= __GFP_COMP \| __GFP_NOWARN;
		371	nc->frag.page = alloc_pages(gfp, order);
		372	if (likely(nc->frag.page))
		373	break;
		374	if (--order < 0)
		375	goto end;
		376	}
		377	nc->frag.size = PAGE_SIZE << order;
364	recycle:	378	recycle:
365	atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);	379	atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
366	nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;	380	nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
367	nc->offset = 0;	381	nc->frag.offset = 0;
368	}	382	}
369		383
370	if (nc->offset + fragsz > PAGE_SIZE) {	384	if (nc->frag.offset + fragsz > nc->frag.size) {
371	/* avoid unnecessary locked operations if possible */	385	/* avoid unnecessary locked operations if possible */
372	if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) \|\|	386	if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) \|\|
373	atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))	387	atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
374	goto recycle;	388	goto recycle;
375	goto refill;	389	goto refill;
376	}	390	}
377		391
378	data = page_address(nc->page) + nc->offset;	392	data = page_address(nc->frag.page) + nc->frag.offset;
379	nc->offset += fragsz;	393	nc->frag.offset += fragsz;
380	nc->pagecnt_bias--;	394	nc->pagecnt_bias--;
381	end:	395	end:
382	local_irq_restore(flags);	396	local_irq_restore(flags);