diff options
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/tg3.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/hisilicon/hip04_eth.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/e1000/e1000_main.c | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/mvneta.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/netcp_core.c | 2 | ||||
-rw-r--r-- | include/linux/gfp.h | 5 | ||||
-rw-r--r-- | include/linux/mm_types.h | 18 | ||||
-rw-r--r-- | include/linux/skbuff.h | 9 | ||||
-rw-r--r-- | mm/page_alloc.c | 98 | ||||
-rw-r--r-- | net/core/skbuff.c | 224 |
12 files changed, 223 insertions, 171 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index a8bb8f664d3d..b10d1744e5ae 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | |||
@@ -662,7 +662,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, | |||
662 | static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) | 662 | static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) |
663 | { | 663 | { |
664 | if (fp->rx_frag_size) | 664 | if (fp->rx_frag_size) |
665 | put_page(virt_to_head_page(data)); | 665 | skb_free_frag(data); |
666 | else | 666 | else |
667 | kfree(data); | 667 | kfree(data); |
668 | } | 668 | } |
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 069952fa5d64..73c934cf6c61 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c | |||
@@ -6618,7 +6618,7 @@ static void tg3_tx(struct tg3_napi *tnapi) | |||
6618 | static void tg3_frag_free(bool is_frag, void *data) | 6618 | static void tg3_frag_free(bool is_frag, void *data) |
6619 | { | 6619 | { |
6620 | if (is_frag) | 6620 | if (is_frag) |
6621 | put_page(virt_to_head_page(data)); | 6621 | skb_free_frag(data); |
6622 | else | 6622 | else |
6623 | kfree(data); | 6623 | kfree(data); |
6624 | } | 6624 | } |
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 3b39fdddeb57..d49bee38cd31 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c | |||
@@ -798,7 +798,7 @@ static void hip04_free_ring(struct net_device *ndev, struct device *d) | |||
798 | 798 | ||
799 | for (i = 0; i < RX_DESC_NUM; i++) | 799 | for (i = 0; i < RX_DESC_NUM; i++) |
800 | if (priv->rx_buf[i]) | 800 | if (priv->rx_buf[i]) |
801 | put_page(virt_to_head_page(priv->rx_buf[i])); | 801 | skb_free_frag(priv->rx_buf[i]); |
802 | 802 | ||
803 | for (i = 0; i < TX_DESC_NUM; i++) | 803 | for (i = 0; i < TX_DESC_NUM; i++) |
804 | if (priv->tx_skb[i]) | 804 | if (priv->tx_skb[i]) |
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 983eb4e6f7aa..74dc15055971 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c | |||
@@ -2079,11 +2079,6 @@ static void *e1000_alloc_frag(const struct e1000_adapter *a) | |||
2079 | return data; | 2079 | return data; |
2080 | } | 2080 | } |
2081 | 2081 | ||
2082 | static void e1000_free_frag(const void *data) | ||
2083 | { | ||
2084 | put_page(virt_to_head_page(data)); | ||
2085 | } | ||
2086 | |||
2087 | /** | 2082 | /** |
2088 | * e1000_clean_rx_ring - Free Rx Buffers per Queue | 2083 | * e1000_clean_rx_ring - Free Rx Buffers per Queue |
2089 | * @adapter: board private structure | 2084 | * @adapter: board private structure |
@@ -2107,7 +2102,7 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, | |||
2107 | adapter->rx_buffer_len, | 2102 | adapter->rx_buffer_len, |
2108 | DMA_FROM_DEVICE); | 2103 | DMA_FROM_DEVICE); |
2109 | if (buffer_info->rxbuf.data) { | 2104 | if (buffer_info->rxbuf.data) { |
2110 | e1000_free_frag(buffer_info->rxbuf.data); | 2105 | skb_free_frag(buffer_info->rxbuf.data); |
2111 | buffer_info->rxbuf.data = NULL; | 2106 | buffer_info->rxbuf.data = NULL; |
2112 | } | 2107 | } |
2113 | } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) { | 2108 | } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) { |
@@ -4594,28 +4589,28 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, | |||
4594 | data = e1000_alloc_frag(adapter); | 4589 | data = e1000_alloc_frag(adapter); |
4595 | /* Failed allocation, critical failure */ | 4590 | /* Failed allocation, critical failure */ |
4596 | if (!data) { | 4591 | if (!data) { |
4597 | e1000_free_frag(olddata); | 4592 | skb_free_frag(olddata); |
4598 | adapter->alloc_rx_buff_failed++; | 4593 | adapter->alloc_rx_buff_failed++; |
4599 | break; | 4594 | break; |
4600 | } | 4595 | } |
4601 | 4596 | ||
4602 | if (!e1000_check_64k_bound(adapter, data, bufsz)) { | 4597 | if (!e1000_check_64k_bound(adapter, data, bufsz)) { |
4603 | /* give up */ | 4598 | /* give up */ |
4604 | e1000_free_frag(data); | 4599 | skb_free_frag(data); |
4605 | e1000_free_frag(olddata); | 4600 | skb_free_frag(olddata); |
4606 | adapter->alloc_rx_buff_failed++; | 4601 | adapter->alloc_rx_buff_failed++; |
4607 | break; | 4602 | break; |
4608 | } | 4603 | } |
4609 | 4604 | ||
4610 | /* Use new allocation */ | 4605 | /* Use new allocation */ |
4611 | e1000_free_frag(olddata); | 4606 | skb_free_frag(olddata); |
4612 | } | 4607 | } |
4613 | buffer_info->dma = dma_map_single(&pdev->dev, | 4608 | buffer_info->dma = dma_map_single(&pdev->dev, |
4614 | data, | 4609 | data, |
4615 | adapter->rx_buffer_len, | 4610 | adapter->rx_buffer_len, |
4616 | DMA_FROM_DEVICE); | 4611 | DMA_FROM_DEVICE); |
4617 | if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { | 4612 | if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { |
4618 | e1000_free_frag(data); | 4613 | skb_free_frag(data); |
4619 | buffer_info->dma = 0; | 4614 | buffer_info->dma = 0; |
4620 | adapter->alloc_rx_buff_failed++; | 4615 | adapter->alloc_rx_buff_failed++; |
4621 | break; | 4616 | break; |
@@ -4637,7 +4632,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, | |||
4637 | adapter->rx_buffer_len, | 4632 | adapter->rx_buffer_len, |
4638 | DMA_FROM_DEVICE); | 4633 | DMA_FROM_DEVICE); |
4639 | 4634 | ||
4640 | e1000_free_frag(data); | 4635 | skb_free_frag(data); |
4641 | buffer_info->rxbuf.data = NULL; | 4636 | buffer_info->rxbuf.data = NULL; |
4642 | buffer_info->dma = 0; | 4637 | buffer_info->dma = 0; |
4643 | 4638 | ||
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e63664649029..8a45ed7506c5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c | |||
@@ -4974,6 +4974,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, | |||
4974 | struct igb_tx_buffer *first; | 4974 | struct igb_tx_buffer *first; |
4975 | int tso; | 4975 | int tso; |
4976 | u32 tx_flags = 0; | 4976 | u32 tx_flags = 0; |
4977 | unsigned short f; | ||
4977 | u16 count = TXD_USE_COUNT(skb_headlen(skb)); | 4978 | u16 count = TXD_USE_COUNT(skb_headlen(skb)); |
4978 | __be16 protocol = vlan_get_protocol(skb); | 4979 | __be16 protocol = vlan_get_protocol(skb); |
4979 | u8 hdr_len = 0; | 4980 | u8 hdr_len = 0; |
@@ -4984,14 +4985,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, | |||
4984 | * + 1 desc for context descriptor, | 4985 | * + 1 desc for context descriptor, |
4985 | * otherwise try next time | 4986 | * otherwise try next time |
4986 | */ | 4987 | */ |
4987 | if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { | 4988 | for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) |
4988 | unsigned short f; | 4989 | count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); |
4989 | |||
4990 | for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) | ||
4991 | count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); | ||
4992 | } else { | ||
4993 | count += skb_shinfo(skb)->nr_frags; | ||
4994 | } | ||
4995 | 4990 | ||
4996 | if (igb_maybe_stop_tx(tx_ring, count + 3)) { | 4991 | if (igb_maybe_stop_tx(tx_ring, count + 3)) { |
4997 | /* this is a hard error */ | 4992 | /* this is a hard error */ |
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff06..ecce8261ce3b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c | |||
@@ -1359,7 +1359,7 @@ static void *mvneta_frag_alloc(const struct mvneta_port *pp) | |||
1359 | static void mvneta_frag_free(const struct mvneta_port *pp, void *data) | 1359 | static void mvneta_frag_free(const struct mvneta_port *pp, void *data) |
1360 | { | 1360 | { |
1361 | if (likely(pp->frag_size <= PAGE_SIZE)) | 1361 | if (likely(pp->frag_size <= PAGE_SIZE)) |
1362 | put_page(virt_to_head_page(data)); | 1362 | skb_free_frag(data); |
1363 | else | 1363 | else |
1364 | kfree(data); | 1364 | kfree(data); |
1365 | } | 1365 | } |
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 43efc3a0cda5..0a28c07361cf 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c | |||
@@ -537,7 +537,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order, | |||
537 | static void netcp_frag_free(bool is_frag, void *ptr) | 537 | static void netcp_frag_free(bool is_frag, void *ptr) |
538 | { | 538 | { |
539 | if (is_frag) | 539 | if (is_frag) |
540 | put_page(virt_to_head_page(ptr)); | 540 | skb_free_frag(ptr); |
541 | else | 541 | else |
542 | kfree(ptr); | 542 | kfree(ptr); |
543 | } | 543 | } |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..70a7fee1efb3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order); | |||
366 | extern void free_hot_cold_page(struct page *page, bool cold); | 366 | extern void free_hot_cold_page(struct page *page, bool cold); |
367 | extern void free_hot_cold_page_list(struct list_head *list, bool cold); | 367 | extern void free_hot_cold_page_list(struct list_head *list, bool cold); |
368 | 368 | ||
369 | struct page_frag_cache; | ||
370 | extern void *__alloc_page_frag(struct page_frag_cache *nc, | ||
371 | unsigned int fragsz, gfp_t gfp_mask); | ||
372 | extern void __free_page_frag(void *addr); | ||
373 | |||
369 | extern void __free_kmem_pages(struct page *page, unsigned int order); | 374 | extern void __free_kmem_pages(struct page *page, unsigned int order); |
370 | extern void free_kmem_pages(unsigned long addr, unsigned int order); | 375 | extern void free_kmem_pages(unsigned long addr, unsigned int order); |
371 | 376 | ||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -226,6 +226,24 @@ struct page_frag { | |||
226 | #endif | 226 | #endif |
227 | }; | 227 | }; |
228 | 228 | ||
229 | #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) | ||
230 | #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) | ||
231 | |||
232 | struct page_frag_cache { | ||
233 | void * va; | ||
234 | #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) | ||
235 | __u16 offset; | ||
236 | __u16 size; | ||
237 | #else | ||
238 | __u32 offset; | ||
239 | #endif | ||
240 | /* we maintain a pagecount bias, so that we dont dirty cache line | ||
241 | * containing page->_count every time we allocate a fragment. | ||
242 | */ | ||
243 | unsigned int pagecnt_bias; | ||
244 | bool pfmemalloc; | ||
245 | }; | ||
246 | |||
229 | typedef unsigned long __nocast vm_flags_t; | 247 | typedef unsigned long __nocast vm_flags_t; |
230 | 248 | ||
231 | /* | 249 | /* |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c2f793573fa..c0b574a414e7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -2128,10 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) | |||
2128 | kfree_skb(skb); | 2128 | kfree_skb(skb); |
2129 | } | 2129 | } |
2130 | 2130 | ||
2131 | #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) | ||
2132 | #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) | ||
2133 | #define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE | ||
2134 | |||
2135 | void *netdev_alloc_frag(unsigned int fragsz); | 2131 | void *netdev_alloc_frag(unsigned int fragsz); |
2136 | 2132 | ||
2137 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, | 2133 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, |
@@ -2186,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, | |||
2186 | return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); | 2182 | return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); |
2187 | } | 2183 | } |
2188 | 2184 | ||
2185 | static inline void skb_free_frag(void *addr) | ||
2186 | { | ||
2187 | __free_page_frag(addr); | ||
2188 | } | ||
2189 | |||
2189 | void *napi_alloc_frag(unsigned int fragsz); | 2190 | void *napi_alloc_frag(unsigned int fragsz); |
2190 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, | 2191 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, |
2191 | unsigned int length, gfp_t gfp_mask); | 2192 | unsigned int length, gfp_t gfp_mask); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c0..2fd31aebef30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2967,6 +2967,104 @@ void free_pages(unsigned long addr, unsigned int order) | |||
2967 | EXPORT_SYMBOL(free_pages); | 2967 | EXPORT_SYMBOL(free_pages); |
2968 | 2968 | ||
2969 | /* | 2969 | /* |
2970 | * Page Fragment: | ||
2971 | * An arbitrary-length arbitrary-offset area of memory which resides | ||
2972 | * within a 0 or higher order page. Multiple fragments within that page | ||
2973 | * are individually refcounted, in the page's reference counter. | ||
2974 | * | ||
2975 | * The page_frag functions below provide a simple allocation framework for | ||
2976 | * page fragments. This is used by the network stack and network device | ||
2977 | * drivers to provide a backing region of memory for use as either an | ||
2978 | * sk_buff->head, or to be used in the "frags" portion of skb_shared_info. | ||
2979 | */ | ||
2980 | static struct page *__page_frag_refill(struct page_frag_cache *nc, | ||
2981 | gfp_t gfp_mask) | ||
2982 | { | ||
2983 | struct page *page = NULL; | ||
2984 | gfp_t gfp = gfp_mask; | ||
2985 | |||
2986 | #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) | ||
2987 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | | ||
2988 | __GFP_NOMEMALLOC; | ||
2989 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, | ||
2990 | PAGE_FRAG_CACHE_MAX_ORDER); | ||
2991 | nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; | ||
2992 | #endif | ||
2993 | if (unlikely(!page)) | ||
2994 | page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); | ||
2995 | |||
2996 | nc->va = page ? page_address(page) : NULL; | ||
2997 | |||
2998 | return page; | ||
2999 | } | ||
3000 | |||
3001 | void *__alloc_page_frag(struct page_frag_cache *nc, | ||
3002 | unsigned int fragsz, gfp_t gfp_mask) | ||
3003 | { | ||
3004 | unsigned int size = PAGE_SIZE; | ||
3005 | struct page *page; | ||
3006 | int offset; | ||
3007 | |||
3008 | if (unlikely(!nc->va)) { | ||
3009 | refill: | ||
3010 | page = __page_frag_refill(nc, gfp_mask); | ||
3011 | if (!page) | ||
3012 | return NULL; | ||
3013 | |||
3014 | #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) | ||
3015 | /* if size can vary use size else just use PAGE_SIZE */ | ||
3016 | size = nc->size; | ||
3017 | #endif | ||
3018 | /* Even if we own the page, we do not use atomic_set(). | ||
3019 | * This would break get_page_unless_zero() users. | ||
3020 | */ | ||
3021 | atomic_add(size - 1, &page->_count); | ||
3022 | |||
3023 | /* reset page count bias and offset to start of new frag */ | ||
3024 | nc->pfmemalloc = page->pfmemalloc; | ||
3025 | nc->pagecnt_bias = size; | ||
3026 | nc->offset = size; | ||
3027 | } | ||
3028 | |||
3029 | offset = nc->offset - fragsz; | ||
3030 | if (unlikely(offset < 0)) { | ||
3031 | page = virt_to_page(nc->va); | ||
3032 | |||
3033 | if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) | ||
3034 | goto refill; | ||
3035 | |||
3036 | #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) | ||
3037 | /* if size can vary use size else just use PAGE_SIZE */ | ||
3038 | size = nc->size; | ||
3039 | #endif | ||
3040 | /* OK, page count is 0, we can safely set it */ | ||
3041 | atomic_set(&page->_count, size); | ||
3042 | |||
3043 | /* reset page count bias and offset to start of new frag */ | ||
3044 | nc->pagecnt_bias = size; | ||
3045 | offset = size - fragsz; | ||
3046 | } | ||
3047 | |||
3048 | nc->pagecnt_bias--; | ||
3049 | nc->offset = offset; | ||
3050 | |||
3051 | return nc->va + offset; | ||
3052 | } | ||
3053 | EXPORT_SYMBOL(__alloc_page_frag); | ||
3054 | |||
3055 | /* | ||
3056 | * Frees a page fragment allocated out of either a compound or order 0 page. | ||
3057 | */ | ||
3058 | void __free_page_frag(void *addr) | ||
3059 | { | ||
3060 | struct page *page = virt_to_head_page(addr); | ||
3061 | |||
3062 | if (unlikely(put_page_testzero(page))) | ||
3063 | __free_pages_ok(page, compound_order(page)); | ||
3064 | } | ||
3065 | EXPORT_SYMBOL(__free_page_frag); | ||
3066 | |||
3067 | /* | ||
2970 | * alloc_kmem_pages charges newly allocated pages to the kmem resource counter | 3068 | * alloc_kmem_pages charges newly allocated pages to the kmem resource counter |
2971 | * of the current memory cgroup. | 3069 | * of the current memory cgroup. |
2972 | * | 3070 | * |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b9eb90b39ac7..d67e612bf0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
347 | } | 347 | } |
348 | EXPORT_SYMBOL(build_skb); | 348 | EXPORT_SYMBOL(build_skb); |
349 | 349 | ||
350 | struct netdev_alloc_cache { | 350 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); |
351 | struct page_frag frag; | 351 | static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); |
352 | /* we maintain a pagecount bias, so that we dont dirty cache line | ||
353 | * containing page->_count every time we allocate a fragment. | ||
354 | */ | ||
355 | unsigned int pagecnt_bias; | ||
356 | }; | ||
357 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | ||
358 | static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); | ||
359 | |||
360 | static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, | ||
361 | gfp_t gfp_mask) | ||
362 | { | ||
363 | const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; | ||
364 | struct page *page = NULL; | ||
365 | gfp_t gfp = gfp_mask; | ||
366 | |||
367 | if (order) { | ||
368 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | | ||
369 | __GFP_NOMEMALLOC; | ||
370 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | ||
371 | nc->frag.size = PAGE_SIZE << (page ? order : 0); | ||
372 | } | ||
373 | |||
374 | if (unlikely(!page)) | ||
375 | page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); | ||
376 | |||
377 | nc->frag.page = page; | ||
378 | |||
379 | return page; | ||
380 | } | ||
381 | |||
382 | static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, | ||
383 | unsigned int fragsz, gfp_t gfp_mask) | ||
384 | { | ||
385 | struct netdev_alloc_cache *nc = this_cpu_ptr(cache); | ||
386 | struct page *page = nc->frag.page; | ||
387 | unsigned int size; | ||
388 | int offset; | ||
389 | |||
390 | if (unlikely(!page)) { | ||
391 | refill: | ||
392 | page = __page_frag_refill(nc, gfp_mask); | ||
393 | if (!page) | ||
394 | return NULL; | ||
395 | |||
396 | /* if size can vary use frag.size else just use PAGE_SIZE */ | ||
397 | size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; | ||
398 | |||
399 | /* Even if we own the page, we do not use atomic_set(). | ||
400 | * This would break get_page_unless_zero() users. | ||
401 | */ | ||
402 | atomic_add(size - 1, &page->_count); | ||
403 | |||
404 | /* reset page count bias and offset to start of new frag */ | ||
405 | nc->pagecnt_bias = size; | ||
406 | nc->frag.offset = size; | ||
407 | } | ||
408 | |||
409 | offset = nc->frag.offset - fragsz; | ||
410 | if (unlikely(offset < 0)) { | ||
411 | if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) | ||
412 | goto refill; | ||
413 | |||
414 | /* if size can vary use frag.size else just use PAGE_SIZE */ | ||
415 | size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; | ||
416 | |||
417 | /* OK, page count is 0, we can safely set it */ | ||
418 | atomic_set(&page->_count, size); | ||
419 | |||
420 | /* reset page count bias and offset to start of new frag */ | ||
421 | nc->pagecnt_bias = size; | ||
422 | offset = size - fragsz; | ||
423 | } | ||
424 | |||
425 | nc->pagecnt_bias--; | ||
426 | nc->frag.offset = offset; | ||
427 | |||
428 | return page_address(page) + offset; | ||
429 | } | ||
430 | 352 | ||
431 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 353 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
432 | { | 354 | { |
355 | struct page_frag_cache *nc; | ||
433 | unsigned long flags; | 356 | unsigned long flags; |
434 | void *data; | 357 | void *data; |
435 | 358 | ||
436 | local_irq_save(flags); | 359 | local_irq_save(flags); |
437 | data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); | 360 | nc = this_cpu_ptr(&netdev_alloc_cache); |
361 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | ||
438 | local_irq_restore(flags); | 362 | local_irq_restore(flags); |
439 | return data; | 363 | return data; |
440 | } | 364 | } |
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |||
454 | 378 | ||
455 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 379 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
456 | { | 380 | { |
457 | return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); | 381 | struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); |
382 | |||
383 | return __alloc_page_frag(nc, fragsz, gfp_mask); | ||
458 | } | 384 | } |
459 | 385 | ||
460 | void *napi_alloc_frag(unsigned int fragsz) | 386 | void *napi_alloc_frag(unsigned int fragsz) |
@@ -464,76 +390,64 @@ void *napi_alloc_frag(unsigned int fragsz) | |||
464 | EXPORT_SYMBOL(napi_alloc_frag); | 390 | EXPORT_SYMBOL(napi_alloc_frag); |
465 | 391 | ||
466 | /** | 392 | /** |
467 | * __alloc_rx_skb - allocate an skbuff for rx | 393 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device |
394 | * @dev: network device to receive on | ||
468 | * @length: length to allocate | 395 | * @length: length to allocate |
469 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | 396 | * @gfp_mask: get_free_pages mask, passed to alloc_skb |
470 | * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
471 | * allocations in case we have to fallback to __alloc_skb() | ||
472 | * If SKB_ALLOC_NAPI is set, page fragment will be allocated | ||
473 | * from napi_cache instead of netdev_cache. | ||
474 | * | 397 | * |
475 | * Allocate a new &sk_buff and assign it a usage count of one. The | 398 | * Allocate a new &sk_buff and assign it a usage count of one. The |
476 | * buffer has unspecified headroom built in. Users should allocate | 399 | * buffer has NET_SKB_PAD headroom built in. Users should allocate |
477 | * the headroom they think they need without accounting for the | 400 | * the headroom they think they need without accounting for the |
478 | * built in space. The built in space is used for optimisations. | 401 | * built in space. The built in space is used for optimisations. |
479 | * | 402 | * |
480 | * %NULL is returned if there is no free memory. | 403 | * %NULL is returned if there is no free memory. |
481 | */ | 404 | */ |
482 | static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, | 405 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, |
483 | int flags) | 406 | gfp_t gfp_mask) |
484 | { | 407 | { |
485 | struct sk_buff *skb = NULL; | 408 | struct page_frag_cache *nc; |
486 | unsigned int fragsz = SKB_DATA_ALIGN(length) + | 409 | unsigned long flags; |
487 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 410 | struct sk_buff *skb; |
411 | bool pfmemalloc; | ||
412 | void *data; | ||
488 | 413 | ||
489 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 414 | len += NET_SKB_PAD; |
490 | void *data; | ||
491 | 415 | ||
492 | if (sk_memalloc_socks()) | 416 | if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || |
493 | gfp_mask |= __GFP_MEMALLOC; | 417 | (gfp_mask & (__GFP_WAIT | GFP_DMA))) |
418 | return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); | ||
494 | 419 | ||
495 | data = (flags & SKB_ALLOC_NAPI) ? | 420 | len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
496 | __napi_alloc_frag(fragsz, gfp_mask) : | 421 | len = SKB_DATA_ALIGN(len); |
497 | __netdev_alloc_frag(fragsz, gfp_mask); | ||
498 | 422 | ||
499 | if (likely(data)) { | 423 | if (sk_memalloc_socks()) |
500 | skb = build_skb(data, fragsz); | 424 | gfp_mask |= __GFP_MEMALLOC; |
501 | if (unlikely(!skb)) | ||
502 | put_page(virt_to_head_page(data)); | ||
503 | } | ||
504 | } else { | ||
505 | skb = __alloc_skb(length, gfp_mask, | ||
506 | SKB_ALLOC_RX, NUMA_NO_NODE); | ||
507 | } | ||
508 | return skb; | ||
509 | } | ||
510 | 425 | ||
511 | /** | 426 | local_irq_save(flags); |
512 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | ||
513 | * @dev: network device to receive on | ||
514 | * @length: length to allocate | ||
515 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | ||
516 | * | ||
517 | * Allocate a new &sk_buff and assign it a usage count of one. The | ||
518 | * buffer has NET_SKB_PAD headroom built in. Users should allocate | ||
519 | * the headroom they think they need without accounting for the | ||
520 | * built in space. The built in space is used for optimisations. | ||
521 | * | ||
522 | * %NULL is returned if there is no free memory. | ||
523 | */ | ||
524 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | ||
525 | unsigned int length, gfp_t gfp_mask) | ||
526 | { | ||
527 | struct sk_buff *skb; | ||
528 | 427 | ||
529 | length += NET_SKB_PAD; | 428 | nc = this_cpu_ptr(&netdev_alloc_cache); |
530 | skb = __alloc_rx_skb(length, gfp_mask, 0); | 429 | data = __alloc_page_frag(nc, len, gfp_mask); |
430 | pfmemalloc = nc->pfmemalloc; | ||
531 | 431 | ||
532 | if (likely(skb)) { | 432 | local_irq_restore(flags); |
533 | skb_reserve(skb, NET_SKB_PAD); | 433 | |
534 | skb->dev = dev; | 434 | if (unlikely(!data)) |
435 | return NULL; | ||
436 | |||
437 | skb = __build_skb(data, len); | ||
438 | if (unlikely(!skb)) { | ||
439 | skb_free_frag(data); | ||
440 | return NULL; | ||
535 | } | 441 | } |
536 | 442 | ||
443 | /* use OR instead of assignment to avoid clearing of bits in mask */ | ||
444 | if (pfmemalloc) | ||
445 | skb->pfmemalloc = 1; | ||
446 | skb->head_frag = 1; | ||
447 | |||
448 | skb_reserve(skb, NET_SKB_PAD); | ||
449 | skb->dev = dev; | ||
450 | |||
537 | return skb; | 451 | return skb; |
538 | } | 452 | } |
539 | EXPORT_SYMBOL(__netdev_alloc_skb); | 453 | EXPORT_SYMBOL(__netdev_alloc_skb); |
@@ -551,19 +465,43 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |||
551 | * | 465 | * |
552 | * %NULL is returned if there is no free memory. | 466 | * %NULL is returned if there is no free memory. |
553 | */ | 467 | */ |
554 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, | 468 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, |
555 | unsigned int length, gfp_t gfp_mask) | 469 | gfp_t gfp_mask) |
556 | { | 470 | { |
471 | struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); | ||
557 | struct sk_buff *skb; | 472 | struct sk_buff *skb; |
473 | void *data; | ||
474 | |||
475 | len += NET_SKB_PAD + NET_IP_ALIGN; | ||
558 | 476 | ||
559 | length += NET_SKB_PAD + NET_IP_ALIGN; | 477 | if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || |
560 | skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI); | 478 | (gfp_mask & (__GFP_WAIT | GFP_DMA))) |
479 | return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); | ||
561 | 480 | ||
562 | if (likely(skb)) { | 481 | len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
563 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | 482 | len = SKB_DATA_ALIGN(len); |
564 | skb->dev = napi->dev; | 483 | |
484 | if (sk_memalloc_socks()) | ||
485 | gfp_mask |= __GFP_MEMALLOC; | ||
486 | |||
487 | data = __alloc_page_frag(nc, len, gfp_mask); | ||
488 | if (unlikely(!data)) | ||
489 | return NULL; | ||
490 | |||
491 | skb = __build_skb(data, len); | ||
492 | if (unlikely(!skb)) { | ||
493 | skb_free_frag(data); | ||
494 | return NULL; | ||
565 | } | 495 | } |
566 | 496 | ||
497 | /* use OR instead of assignment to avoid clearing of bits in mask */ | ||
498 | if (nc->pfmemalloc) | ||
499 | skb->pfmemalloc = 1; | ||
500 | skb->head_frag = 1; | ||
501 | |||
502 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
503 | skb->dev = napi->dev; | ||
504 | |||
567 | return skb; | 505 | return skb; |
568 | } | 506 | } |
569 | EXPORT_SYMBOL(__napi_alloc_skb); | 507 | EXPORT_SYMBOL(__napi_alloc_skb); |
@@ -611,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb) | |||
611 | 549 | ||
612 | static void skb_free_head(struct sk_buff *skb) | 550 | static void skb_free_head(struct sk_buff *skb) |
613 | { | 551 | { |
552 | unsigned char *head = skb->head; | ||
553 | |||
614 | if (skb->head_frag) | 554 | if (skb->head_frag) |
615 | put_page(virt_to_head_page(skb->head)); | 555 | skb_free_frag(head); |
616 | else | 556 | else |
617 | kfree(skb->head); | 557 | kfree(head); |
618 | } | 558 | } |
619 | 559 | ||
620 | static void skb_release_data(struct sk_buff *skb) | 560 | static void skb_release_data(struct sk_buff *skb) |