aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c2
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c19
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c11
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c2
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c2
-rw-r--r--include/linux/gfp.h5
-rw-r--r--include/linux/mm_types.h18
-rw-r--r--include/linux/skbuff.h9
-rw-r--r--mm/page_alloc.c98
-rw-r--r--net/core/skbuff.c224
12 files changed, 223 insertions, 171 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index a8bb8f664d3d..b10d1744e5ae 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -662,7 +662,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
662static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) 662static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data)
663{ 663{
664 if (fp->rx_frag_size) 664 if (fp->rx_frag_size)
665 put_page(virt_to_head_page(data)); 665 skb_free_frag(data);
666 else 666 else
667 kfree(data); 667 kfree(data);
668} 668}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 069952fa5d64..73c934cf6c61 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6618,7 +6618,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
6618static void tg3_frag_free(bool is_frag, void *data) 6618static void tg3_frag_free(bool is_frag, void *data)
6619{ 6619{
6620 if (is_frag) 6620 if (is_frag)
6621 put_page(virt_to_head_page(data)); 6621 skb_free_frag(data);
6622 else 6622 else
6623 kfree(data); 6623 kfree(data);
6624} 6624}
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 3b39fdddeb57..d49bee38cd31 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -798,7 +798,7 @@ static void hip04_free_ring(struct net_device *ndev, struct device *d)
798 798
799 for (i = 0; i < RX_DESC_NUM; i++) 799 for (i = 0; i < RX_DESC_NUM; i++)
800 if (priv->rx_buf[i]) 800 if (priv->rx_buf[i])
801 put_page(virt_to_head_page(priv->rx_buf[i])); 801 skb_free_frag(priv->rx_buf[i]);
802 802
803 for (i = 0; i < TX_DESC_NUM; i++) 803 for (i = 0; i < TX_DESC_NUM; i++)
804 if (priv->tx_skb[i]) 804 if (priv->tx_skb[i])
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 983eb4e6f7aa..74dc15055971 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -2079,11 +2079,6 @@ static void *e1000_alloc_frag(const struct e1000_adapter *a)
2079 return data; 2079 return data;
2080} 2080}
2081 2081
2082static void e1000_free_frag(const void *data)
2083{
2084 put_page(virt_to_head_page(data));
2085}
2086
2087/** 2082/**
2088 * e1000_clean_rx_ring - Free Rx Buffers per Queue 2083 * e1000_clean_rx_ring - Free Rx Buffers per Queue
2089 * @adapter: board private structure 2084 * @adapter: board private structure
@@ -2107,7 +2102,7 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
2107 adapter->rx_buffer_len, 2102 adapter->rx_buffer_len,
2108 DMA_FROM_DEVICE); 2103 DMA_FROM_DEVICE);
2109 if (buffer_info->rxbuf.data) { 2104 if (buffer_info->rxbuf.data) {
2110 e1000_free_frag(buffer_info->rxbuf.data); 2105 skb_free_frag(buffer_info->rxbuf.data);
2111 buffer_info->rxbuf.data = NULL; 2106 buffer_info->rxbuf.data = NULL;
2112 } 2107 }
2113 } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) { 2108 } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) {
@@ -4594,28 +4589,28 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
4594 data = e1000_alloc_frag(adapter); 4589 data = e1000_alloc_frag(adapter);
4595 /* Failed allocation, critical failure */ 4590 /* Failed allocation, critical failure */
4596 if (!data) { 4591 if (!data) {
4597 e1000_free_frag(olddata); 4592 skb_free_frag(olddata);
4598 adapter->alloc_rx_buff_failed++; 4593 adapter->alloc_rx_buff_failed++;
4599 break; 4594 break;
4600 } 4595 }
4601 4596
4602 if (!e1000_check_64k_bound(adapter, data, bufsz)) { 4597 if (!e1000_check_64k_bound(adapter, data, bufsz)) {
4603 /* give up */ 4598 /* give up */
4604 e1000_free_frag(data); 4599 skb_free_frag(data);
4605 e1000_free_frag(olddata); 4600 skb_free_frag(olddata);
4606 adapter->alloc_rx_buff_failed++; 4601 adapter->alloc_rx_buff_failed++;
4607 break; 4602 break;
4608 } 4603 }
4609 4604
4610 /* Use new allocation */ 4605 /* Use new allocation */
4611 e1000_free_frag(olddata); 4606 skb_free_frag(olddata);
4612 } 4607 }
4613 buffer_info->dma = dma_map_single(&pdev->dev, 4608 buffer_info->dma = dma_map_single(&pdev->dev,
4614 data, 4609 data,
4615 adapter->rx_buffer_len, 4610 adapter->rx_buffer_len,
4616 DMA_FROM_DEVICE); 4611 DMA_FROM_DEVICE);
4617 if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { 4612 if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
4618 e1000_free_frag(data); 4613 skb_free_frag(data);
4619 buffer_info->dma = 0; 4614 buffer_info->dma = 0;
4620 adapter->alloc_rx_buff_failed++; 4615 adapter->alloc_rx_buff_failed++;
4621 break; 4616 break;
@@ -4637,7 +4632,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
4637 adapter->rx_buffer_len, 4632 adapter->rx_buffer_len,
4638 DMA_FROM_DEVICE); 4633 DMA_FROM_DEVICE);
4639 4634
4640 e1000_free_frag(data); 4635 skb_free_frag(data);
4641 buffer_info->rxbuf.data = NULL; 4636 buffer_info->rxbuf.data = NULL;
4642 buffer_info->dma = 0; 4637 buffer_info->dma = 0;
4643 4638
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index e63664649029..8a45ed7506c5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4974,6 +4974,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4974 struct igb_tx_buffer *first; 4974 struct igb_tx_buffer *first;
4975 int tso; 4975 int tso;
4976 u32 tx_flags = 0; 4976 u32 tx_flags = 0;
4977 unsigned short f;
4977 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 4978 u16 count = TXD_USE_COUNT(skb_headlen(skb));
4978 __be16 protocol = vlan_get_protocol(skb); 4979 __be16 protocol = vlan_get_protocol(skb);
4979 u8 hdr_len = 0; 4980 u8 hdr_len = 0;
@@ -4984,14 +4985,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4984 * + 1 desc for context descriptor, 4985 * + 1 desc for context descriptor,
4985 * otherwise try next time 4986 * otherwise try next time
4986 */ 4987 */
4987 if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { 4988 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
4988 unsigned short f; 4989 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
4989
4990 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
4991 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
4992 } else {
4993 count += skb_shinfo(skb)->nr_frags;
4994 }
4995 4990
4996 if (igb_maybe_stop_tx(tx_ring, count + 3)) { 4991 if (igb_maybe_stop_tx(tx_ring, count + 3)) {
4997 /* this is a hard error */ 4992 /* this is a hard error */
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index ce5f7f9cff06..ecce8261ce3b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1359,7 +1359,7 @@ static void *mvneta_frag_alloc(const struct mvneta_port *pp)
1359static void mvneta_frag_free(const struct mvneta_port *pp, void *data) 1359static void mvneta_frag_free(const struct mvneta_port *pp, void *data)
1360{ 1360{
1361 if (likely(pp->frag_size <= PAGE_SIZE)) 1361 if (likely(pp->frag_size <= PAGE_SIZE))
1362 put_page(virt_to_head_page(data)); 1362 skb_free_frag(data);
1363 else 1363 else
1364 kfree(data); 1364 kfree(data);
1365} 1365}
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 43efc3a0cda5..0a28c07361cf 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -537,7 +537,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order,
537static void netcp_frag_free(bool is_frag, void *ptr) 537static void netcp_frag_free(bool is_frag, void *ptr)
538{ 538{
539 if (is_frag) 539 if (is_frag)
540 put_page(virt_to_head_page(ptr)); 540 skb_free_frag(ptr);
541 else 541 else
542 kfree(ptr); 542 kfree(ptr);
543} 543}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 97a9373e61e8..70a7fee1efb3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order);
366extern void free_hot_cold_page(struct page *page, bool cold); 366extern void free_hot_cold_page(struct page *page, bool cold);
367extern void free_hot_cold_page_list(struct list_head *list, bool cold); 367extern void free_hot_cold_page_list(struct list_head *list, bool cold);
368 368
369struct page_frag_cache;
370extern void *__alloc_page_frag(struct page_frag_cache *nc,
371 unsigned int fragsz, gfp_t gfp_mask);
372extern void __free_page_frag(void *addr);
373
369extern void __free_kmem_pages(struct page *page, unsigned int order); 374extern void __free_kmem_pages(struct page *page, unsigned int order);
370extern void free_kmem_pages(unsigned long addr, unsigned int order); 375extern void free_kmem_pages(unsigned long addr, unsigned int order);
371 376
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8d37e26a1007..0038ac7466fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -226,6 +226,24 @@ struct page_frag {
226#endif 226#endif
227}; 227};
228 228
229#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
230#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
231
232struct page_frag_cache {
233 void * va;
234#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
235 __u16 offset;
236 __u16 size;
237#else
238 __u32 offset;
239#endif
240 /* we maintain a pagecount bias, so that we dont dirty cache line
241 * containing page->_count every time we allocate a fragment.
242 */
243 unsigned int pagecnt_bias;
244 bool pfmemalloc;
245};
246
229typedef unsigned long __nocast vm_flags_t; 247typedef unsigned long __nocast vm_flags_t;
230 248
231/* 249/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c2f793573fa..c0b574a414e7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2128,10 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
2128 kfree_skb(skb); 2128 kfree_skb(skb);
2129} 2129}
2130 2130
2131#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
2132#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
2133#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
2134
2135void *netdev_alloc_frag(unsigned int fragsz); 2131void *netdev_alloc_frag(unsigned int fragsz);
2136 2132
2137struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, 2133struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
@@ -2186,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
2186 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); 2182 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
2187} 2183}
2188 2184
2185static inline void skb_free_frag(void *addr)
2186{
2187 __free_page_frag(addr);
2188}
2189
2189void *napi_alloc_frag(unsigned int fragsz); 2190void *napi_alloc_frag(unsigned int fragsz);
2190struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, 2191struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
2191 unsigned int length, gfp_t gfp_mask); 2192 unsigned int length, gfp_t gfp_mask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ebffa0e4a9c0..2fd31aebef30 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2967,6 +2967,104 @@ void free_pages(unsigned long addr, unsigned int order)
2967EXPORT_SYMBOL(free_pages); 2967EXPORT_SYMBOL(free_pages);
2968 2968
2969/* 2969/*
2970 * Page Fragment:
2971 * An arbitrary-length arbitrary-offset area of memory which resides
2972 * within a 0 or higher order page. Multiple fragments within that page
2973 * are individually refcounted, in the page's reference counter.
2974 *
2975 * The page_frag functions below provide a simple allocation framework for
2976 * page fragments. This is used by the network stack and network device
2977 * drivers to provide a backing region of memory for use as either an
2978 * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
2979 */
2980static struct page *__page_frag_refill(struct page_frag_cache *nc,
2981 gfp_t gfp_mask)
2982{
2983 struct page *page = NULL;
2984 gfp_t gfp = gfp_mask;
2985
2986#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
2987 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
2988 __GFP_NOMEMALLOC;
2989 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
2990 PAGE_FRAG_CACHE_MAX_ORDER);
2991 nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
2992#endif
2993 if (unlikely(!page))
2994 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
2995
2996 nc->va = page ? page_address(page) : NULL;
2997
2998 return page;
2999}
3000
3001void *__alloc_page_frag(struct page_frag_cache *nc,
3002 unsigned int fragsz, gfp_t gfp_mask)
3003{
3004 unsigned int size = PAGE_SIZE;
3005 struct page *page;
3006 int offset;
3007
3008 if (unlikely(!nc->va)) {
3009refill:
3010 page = __page_frag_refill(nc, gfp_mask);
3011 if (!page)
3012 return NULL;
3013
3014#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
3015 /* if size can vary use size else just use PAGE_SIZE */
3016 size = nc->size;
3017#endif
3018 /* Even if we own the page, we do not use atomic_set().
3019 * This would break get_page_unless_zero() users.
3020 */
3021 atomic_add(size - 1, &page->_count);
3022
3023 /* reset page count bias and offset to start of new frag */
3024 nc->pfmemalloc = page->pfmemalloc;
3025 nc->pagecnt_bias = size;
3026 nc->offset = size;
3027 }
3028
3029 offset = nc->offset - fragsz;
3030 if (unlikely(offset < 0)) {
3031 page = virt_to_page(nc->va);
3032
3033 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
3034 goto refill;
3035
3036#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
3037 /* if size can vary use size else just use PAGE_SIZE */
3038 size = nc->size;
3039#endif
3040 /* OK, page count is 0, we can safely set it */
3041 atomic_set(&page->_count, size);
3042
3043 /* reset page count bias and offset to start of new frag */
3044 nc->pagecnt_bias = size;
3045 offset = size - fragsz;
3046 }
3047
3048 nc->pagecnt_bias--;
3049 nc->offset = offset;
3050
3051 return nc->va + offset;
3052}
3053EXPORT_SYMBOL(__alloc_page_frag);
3054
3055/*
3056 * Frees a page fragment allocated out of either a compound or order 0 page.
3057 */
3058void __free_page_frag(void *addr)
3059{
3060 struct page *page = virt_to_head_page(addr);
3061
3062 if (unlikely(put_page_testzero(page)))
3063 __free_pages_ok(page, compound_order(page));
3064}
3065EXPORT_SYMBOL(__free_page_frag);
3066
3067/*
2970 * alloc_kmem_pages charges newly allocated pages to the kmem resource counter 3068 * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
2971 * of the current memory cgroup. 3069 * of the current memory cgroup.
2972 * 3070 *
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b9eb90b39ac7..d67e612bf0ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
347} 347}
348EXPORT_SYMBOL(build_skb); 348EXPORT_SYMBOL(build_skb);
349 349
350struct netdev_alloc_cache { 350static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
351 struct page_frag frag; 351static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
352 /* we maintain a pagecount bias, so that we dont dirty cache line
353 * containing page->_count every time we allocate a fragment.
354 */
355 unsigned int pagecnt_bias;
356};
357static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
358static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
359
360static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
361 gfp_t gfp_mask)
362{
363 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
364 struct page *page = NULL;
365 gfp_t gfp = gfp_mask;
366
367 if (order) {
368 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
369 __GFP_NOMEMALLOC;
370 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
371 nc->frag.size = PAGE_SIZE << (page ? order : 0);
372 }
373
374 if (unlikely(!page))
375 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
376
377 nc->frag.page = page;
378
379 return page;
380}
381
382static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
383 unsigned int fragsz, gfp_t gfp_mask)
384{
385 struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
386 struct page *page = nc->frag.page;
387 unsigned int size;
388 int offset;
389
390 if (unlikely(!page)) {
391refill:
392 page = __page_frag_refill(nc, gfp_mask);
393 if (!page)
394 return NULL;
395
396 /* if size can vary use frag.size else just use PAGE_SIZE */
397 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
398
399 /* Even if we own the page, we do not use atomic_set().
400 * This would break get_page_unless_zero() users.
401 */
402 atomic_add(size - 1, &page->_count);
403
404 /* reset page count bias and offset to start of new frag */
405 nc->pagecnt_bias = size;
406 nc->frag.offset = size;
407 }
408
409 offset = nc->frag.offset - fragsz;
410 if (unlikely(offset < 0)) {
411 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
412 goto refill;
413
414 /* if size can vary use frag.size else just use PAGE_SIZE */
415 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
416
417 /* OK, page count is 0, we can safely set it */
418 atomic_set(&page->_count, size);
419
420 /* reset page count bias and offset to start of new frag */
421 nc->pagecnt_bias = size;
422 offset = size - fragsz;
423 }
424
425 nc->pagecnt_bias--;
426 nc->frag.offset = offset;
427
428 return page_address(page) + offset;
429}
430 352
431static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 353static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
432{ 354{
355 struct page_frag_cache *nc;
433 unsigned long flags; 356 unsigned long flags;
434 void *data; 357 void *data;
435 358
436 local_irq_save(flags); 359 local_irq_save(flags);
437 data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); 360 nc = this_cpu_ptr(&netdev_alloc_cache);
361 data = __alloc_page_frag(nc, fragsz, gfp_mask);
438 local_irq_restore(flags); 362 local_irq_restore(flags);
439 return data; 363 return data;
440} 364}
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
454 378
455static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 379static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
456{ 380{
457 return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); 381 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
382
383 return __alloc_page_frag(nc, fragsz, gfp_mask);
458} 384}
459 385
460void *napi_alloc_frag(unsigned int fragsz) 386void *napi_alloc_frag(unsigned int fragsz)
@@ -464,76 +390,64 @@ void *napi_alloc_frag(unsigned int fragsz)
464EXPORT_SYMBOL(napi_alloc_frag); 390EXPORT_SYMBOL(napi_alloc_frag);
465 391
466/** 392/**
467 * __alloc_rx_skb - allocate an skbuff for rx 393 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
394 * @dev: network device to receive on
468 * @length: length to allocate 395 * @length: length to allocate
469 * @gfp_mask: get_free_pages mask, passed to alloc_skb 396 * @gfp_mask: get_free_pages mask, passed to alloc_skb
470 * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
471 * allocations in case we have to fallback to __alloc_skb()
472 * If SKB_ALLOC_NAPI is set, page fragment will be allocated
473 * from napi_cache instead of netdev_cache.
474 * 397 *
475 * Allocate a new &sk_buff and assign it a usage count of one. The 398 * Allocate a new &sk_buff and assign it a usage count of one. The
476 * buffer has unspecified headroom built in. Users should allocate 399 * buffer has NET_SKB_PAD headroom built in. Users should allocate
477 * the headroom they think they need without accounting for the 400 * the headroom they think they need without accounting for the
478 * built in space. The built in space is used for optimisations. 401 * built in space. The built in space is used for optimisations.
479 * 402 *
480 * %NULL is returned if there is no free memory. 403 * %NULL is returned if there is no free memory.
481 */ 404 */
482static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, 405struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
483 int flags) 406 gfp_t gfp_mask)
484{ 407{
485 struct sk_buff *skb = NULL; 408 struct page_frag_cache *nc;
486 unsigned int fragsz = SKB_DATA_ALIGN(length) + 409 unsigned long flags;
487 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 410 struct sk_buff *skb;
411 bool pfmemalloc;
412 void *data;
488 413
489 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 414 len += NET_SKB_PAD;
490 void *data;
491 415
492 if (sk_memalloc_socks()) 416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
493 gfp_mask |= __GFP_MEMALLOC; 417 (gfp_mask & (__GFP_WAIT | GFP_DMA)))
418 return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
494 419
495 data = (flags & SKB_ALLOC_NAPI) ? 420 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
496 __napi_alloc_frag(fragsz, gfp_mask) : 421 len = SKB_DATA_ALIGN(len);
497 __netdev_alloc_frag(fragsz, gfp_mask);
498 422
499 if (likely(data)) { 423 if (sk_memalloc_socks())
500 skb = build_skb(data, fragsz); 424 gfp_mask |= __GFP_MEMALLOC;
501 if (unlikely(!skb))
502 put_page(virt_to_head_page(data));
503 }
504 } else {
505 skb = __alloc_skb(length, gfp_mask,
506 SKB_ALLOC_RX, NUMA_NO_NODE);
507 }
508 return skb;
509}
510 425
511/** 426 local_irq_save(flags);
512 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
513 * @dev: network device to receive on
514 * @length: length to allocate
515 * @gfp_mask: get_free_pages mask, passed to alloc_skb
516 *
517 * Allocate a new &sk_buff and assign it a usage count of one. The
518 * buffer has NET_SKB_PAD headroom built in. Users should allocate
519 * the headroom they think they need without accounting for the
520 * built in space. The built in space is used for optimisations.
521 *
522 * %NULL is returned if there is no free memory.
523 */
524struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
525 unsigned int length, gfp_t gfp_mask)
526{
527 struct sk_buff *skb;
528 427
529 length += NET_SKB_PAD; 428 nc = this_cpu_ptr(&netdev_alloc_cache);
530 skb = __alloc_rx_skb(length, gfp_mask, 0); 429 data = __alloc_page_frag(nc, len, gfp_mask);
430 pfmemalloc = nc->pfmemalloc;
531 431
532 if (likely(skb)) { 432 local_irq_restore(flags);
533 skb_reserve(skb, NET_SKB_PAD); 433
534 skb->dev = dev; 434 if (unlikely(!data))
435 return NULL;
436
437 skb = __build_skb(data, len);
438 if (unlikely(!skb)) {
439 skb_free_frag(data);
440 return NULL;
535 } 441 }
536 442
443 /* use OR instead of assignment to avoid clearing of bits in mask */
444 if (pfmemalloc)
445 skb->pfmemalloc = 1;
446 skb->head_frag = 1;
447
448 skb_reserve(skb, NET_SKB_PAD);
449 skb->dev = dev;
450
537 return skb; 451 return skb;
538} 452}
539EXPORT_SYMBOL(__netdev_alloc_skb); 453EXPORT_SYMBOL(__netdev_alloc_skb);
@@ -551,19 +465,43 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
551 * 465 *
552 * %NULL is returned if there is no free memory. 466 * %NULL is returned if there is no free memory.
553 */ 467 */
554struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, 468struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
555 unsigned int length, gfp_t gfp_mask) 469 gfp_t gfp_mask)
556{ 470{
471 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
557 struct sk_buff *skb; 472 struct sk_buff *skb;
473 void *data;
474
475 len += NET_SKB_PAD + NET_IP_ALIGN;
558 476
559 length += NET_SKB_PAD + NET_IP_ALIGN; 477 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
560 skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI); 478 (gfp_mask & (__GFP_WAIT | GFP_DMA)))
479 return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
561 480
562 if (likely(skb)) { 481 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
563 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 482 len = SKB_DATA_ALIGN(len);
564 skb->dev = napi->dev; 483
484 if (sk_memalloc_socks())
485 gfp_mask |= __GFP_MEMALLOC;
486
487 data = __alloc_page_frag(nc, len, gfp_mask);
488 if (unlikely(!data))
489 return NULL;
490
491 skb = __build_skb(data, len);
492 if (unlikely(!skb)) {
493 skb_free_frag(data);
494 return NULL;
565 } 495 }
566 496
497 /* use OR instead of assignment to avoid clearing of bits in mask */
498 if (nc->pfmemalloc)
499 skb->pfmemalloc = 1;
500 skb->head_frag = 1;
501
502 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
503 skb->dev = napi->dev;
504
567 return skb; 505 return skb;
568} 506}
569EXPORT_SYMBOL(__napi_alloc_skb); 507EXPORT_SYMBOL(__napi_alloc_skb);
@@ -611,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb)
611 549
612static void skb_free_head(struct sk_buff *skb) 550static void skb_free_head(struct sk_buff *skb)
613{ 551{
552 unsigned char *head = skb->head;
553
614 if (skb->head_frag) 554 if (skb->head_frag)
615 put_page(virt_to_head_page(skb->head)); 555 skb_free_frag(head);
616 else 556 else
617 kfree(skb->head); 557 kfree(head);
618} 558}
619 559
620static void skb_release_data(struct sk_buff *skb) 560static void skb_release_data(struct sk_buff *skb)