aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-10 13:32:02 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-10 13:32:02 -0500
commit7dbea3e8662eb491e9da109cf8ffc372e160fe9a (patch)
treee69f89b43fa65a1f4279bc6aa7dc068c0817c208
parent6e5f59aacbf9527dfe425541c78cb8c56623e7eb (diff)
parent45abfb1069e4c365f6c1e2fc97c5927272725bfa (diff)
Merge branch 'napi_page_frags'
Alexander Duyck says: ==================== net: Alloc NAPI page frags from their own pool This patch series implements a means of allocating page fragments without the need for the local_irq_save/restore in __netdev_alloc_frag. By doing this I am able to decrease packet processing time by 11ns per packet in my test environment. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/broadcom/b44.c2
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb/sge.c11
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c4
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c4
-rw-r--r--drivers/net/ethernet/realtek/8139cp.c2
-rw-r--r--drivers/net/ethernet/realtek/8139too.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c2
-rw-r--r--include/linux/skbuff.h11
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/skbuff.c191
16 files changed, 178 insertions, 70 deletions
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index ffeaf476a120..d86d6baf9681 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -836,7 +836,7 @@ static int b44_rx(struct b44 *bp, int budget)
836 struct sk_buff *copy_skb; 836 struct sk_buff *copy_skb;
837 837
838 b44_recycle_rx(bp, cons, bp->rx_prod); 838 b44_recycle_rx(bp, cons, bp->rx_prod);
839 copy_skb = netdev_alloc_skb_ip_align(bp->dev, len); 839 copy_skb = napi_alloc_skb(&bp->napi, len);
840 if (copy_skb == NULL) 840 if (copy_skb == NULL)
841 goto drop_it_no_recycle; 841 goto drop_it_no_recycle;
842 842
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 3e8d1a88ed3d..21206d33b638 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -385,7 +385,7 @@ static int bcm_enet_receive_queue(struct net_device *dev, int budget)
385 if (len < copybreak) { 385 if (len < copybreak) {
386 struct sk_buff *nskb; 386 struct sk_buff *nskb;
387 387
388 nskb = netdev_alloc_skb_ip_align(dev, len); 388 nskb = napi_alloc_skb(&priv->napi, len);
389 if (!nskb) { 389 if (!nskb) {
390 /* forget packet, just rearm desc */ 390 /* forget packet, just rearm desc */
391 dev->stats.rx_dropped++; 391 dev->stats.rx_dropped++;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index b4d71fd909ee..1d1147c93d59 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1015,7 +1015,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
1015 */ 1015 */
1016 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) && 1016 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
1017 (len <= RX_COPY_THRESH)) { 1017 (len <= RX_COPY_THRESH)) {
1018 skb = netdev_alloc_skb_ip_align(bp->dev, len); 1018 skb = napi_alloc_skb(&fp->napi, len);
1019 if (skb == NULL) { 1019 if (skb == NULL) {
1020 DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, 1020 DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS,
1021 "ERROR packet dropped because of alloc failure\n"); 1021 "ERROR packet dropped because of alloc failure\n");
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 86222a1bdb12..babe2a915b00 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1025,7 +1025,7 @@ MODULE_PARM_DESC(copybreak, "Receive copy threshold");
1025 1025
1026/** 1026/**
1027 * get_packet - return the next ingress packet buffer 1027 * get_packet - return the next ingress packet buffer
1028 * @pdev: the PCI device that received the packet 1028 * @adapter: the adapter that received the packet
1029 * @fl: the SGE free list holding the packet 1029 * @fl: the SGE free list holding the packet
1030 * @len: the actual packet length, excluding any SGE padding 1030 * @len: the actual packet length, excluding any SGE padding
1031 * 1031 *
@@ -1037,14 +1037,15 @@ MODULE_PARM_DESC(copybreak, "Receive copy threshold");
1037 * threshold and the packet is too big to copy, or (b) the packet should 1037 * threshold and the packet is too big to copy, or (b) the packet should
1038 * be copied but there is no memory for the copy. 1038 * be copied but there is no memory for the copy.
1039 */ 1039 */
1040static inline struct sk_buff *get_packet(struct pci_dev *pdev, 1040static inline struct sk_buff *get_packet(struct adapter *adapter,
1041 struct freelQ *fl, unsigned int len) 1041 struct freelQ *fl, unsigned int len)
1042{ 1042{
1043 struct sk_buff *skb;
1044 const struct freelQ_ce *ce = &fl->centries[fl->cidx]; 1043 const struct freelQ_ce *ce = &fl->centries[fl->cidx];
1044 struct pci_dev *pdev = adapter->pdev;
1045 struct sk_buff *skb;
1045 1046
1046 if (len < copybreak) { 1047 if (len < copybreak) {
1047 skb = netdev_alloc_skb_ip_align(NULL, len); 1048 skb = napi_alloc_skb(&adapter->napi, len);
1048 if (!skb) 1049 if (!skb)
1049 goto use_orig_buf; 1050 goto use_orig_buf;
1050 1051
@@ -1357,7 +1358,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1357 struct sge_port_stats *st; 1358 struct sge_port_stats *st;
1358 struct net_device *dev; 1359 struct net_device *dev;
1359 1360
1360 skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad); 1361 skb = get_packet(adapter, fl, len - sge->rx_pkt_pad);
1361 if (unlikely(!skb)) { 1362 if (unlikely(!skb)) {
1362 sge->stats.rx_drops++; 1363 sge->stats.rx_drops++;
1363 return; 1364 return;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 862d1989ae1c..83140cbb5f01 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -4100,7 +4100,7 @@ static bool e1000_tbi_should_accept(struct e1000_adapter *adapter,
4100static struct sk_buff *e1000_alloc_rx_skb(struct e1000_adapter *adapter, 4100static struct sk_buff *e1000_alloc_rx_skb(struct e1000_adapter *adapter,
4101 unsigned int bufsz) 4101 unsigned int bufsz)
4102{ 4102{
4103 struct sk_buff *skb = netdev_alloc_skb_ip_align(adapter->netdev, bufsz); 4103 struct sk_buff *skb = napi_alloc_skb(&adapter->napi, bufsz);
4104 4104
4105 if (unlikely(!skb)) 4105 if (unlikely(!skb))
4106 adapter->alloc_rx_buff_failed++; 4106 adapter->alloc_rx_buff_failed++;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 88936aa0029d..5c82c8065501 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1016,7 +1016,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
1016 */ 1016 */
1017 if (length < copybreak) { 1017 if (length < copybreak) {
1018 struct sk_buff *new_skb = 1018 struct sk_buff *new_skb =
1019 netdev_alloc_skb_ip_align(netdev, length); 1019 napi_alloc_skb(&adapter->napi, length);
1020 if (new_skb) { 1020 if (new_skb) {
1021 skb_copy_to_linear_data_offset(new_skb, 1021 skb_copy_to_linear_data_offset(new_skb,
1022 -NET_IP_ALIGN, 1022 -NET_IP_ALIGN,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 91516aed373e..ee1ecb146df7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -308,8 +308,8 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
308#endif 308#endif
309 309
310 /* allocate a skb to store the frags */ 310 /* allocate a skb to store the frags */
311 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 311 skb = napi_alloc_skb(&rx_ring->q_vector->napi,
312 FM10K_RX_HDR_LEN); 312 FM10K_RX_HDR_LEN);
313 if (unlikely(!skb)) { 313 if (unlikely(!skb)) {
314 rx_ring->rx_stats.alloc_failed++; 314 rx_ring->rx_stats.alloc_failed++;
315 return NULL; 315 return NULL;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index f04ad13f7159..485d2c609d5d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6644,8 +6644,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
6644#endif 6644#endif
6645 6645
6646 /* allocate a skb to store the frags */ 6646 /* allocate a skb to store the frags */
6647 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 6647 skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
6648 IGB_RX_HDR_LEN);
6649 if (unlikely(!skb)) { 6648 if (unlikely(!skb)) {
6650 rx_ring->rx_stats.alloc_failed++; 6649 rx_ring->rx_stats.alloc_failed++;
6651 return NULL; 6650 return NULL;
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 055961b0f24b..aa87605b144a 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -1963,7 +1963,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter,
1963 * this should improve performance for small packets with large amounts 1963 * this should improve performance for small packets with large amounts
1964 * of reassembly being done in the stack 1964 * of reassembly being done in the stack
1965 */ 1965 */
1966static void ixgb_check_copybreak(struct net_device *netdev, 1966static void ixgb_check_copybreak(struct napi_struct *napi,
1967 struct ixgb_buffer *buffer_info, 1967 struct ixgb_buffer *buffer_info,
1968 u32 length, struct sk_buff **skb) 1968 u32 length, struct sk_buff **skb)
1969{ 1969{
@@ -1972,7 +1972,7 @@ static void ixgb_check_copybreak(struct net_device *netdev,
1972 if (length > copybreak) 1972 if (length > copybreak)
1973 return; 1973 return;
1974 1974
1975 new_skb = netdev_alloc_skb_ip_align(netdev, length); 1975 new_skb = napi_alloc_skb(napi, length);
1976 if (!new_skb) 1976 if (!new_skb)
1977 return; 1977 return;
1978 1978
@@ -2064,7 +2064,7 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
2064 goto rxdesc_done; 2064 goto rxdesc_done;
2065 } 2065 }
2066 2066
2067 ixgb_check_copybreak(netdev, buffer_info, length, &skb); 2067 ixgb_check_copybreak(&adapter->napi, buffer_info, length, &skb);
2068 2068
2069 /* Good Receive */ 2069 /* Good Receive */
2070 skb_put(skb, length); 2070 skb_put(skb, length);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index fbd52924ee34..798b05556e1b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1913,8 +1913,8 @@ static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring,
1913#endif 1913#endif
1914 1914
1915 /* allocate a skb to store the frags */ 1915 /* allocate a skb to store the frags */
1916 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 1916 skb = napi_alloc_skb(&rx_ring->q_vector->napi,
1917 IXGBE_RX_HDR_SIZE); 1917 IXGBE_RX_HDR_SIZE);
1918 if (unlikely(!skb)) { 1918 if (unlikely(!skb)) {
1919 rx_ring->rx_stats.alloc_rx_buff_failed++; 1919 rx_ring->rx_stats.alloc_rx_buff_failed++;
1920 return NULL; 1920 return NULL;
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 75b1693ec8bf..9c31e46d1eee 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -507,7 +507,7 @@ rx_status_loop:
507 netif_dbg(cp, rx_status, dev, "rx slot %d status 0x%x len %d\n", 507 netif_dbg(cp, rx_status, dev, "rx slot %d status 0x%x len %d\n",
508 rx_tail, status, len); 508 rx_tail, status, len);
509 509
510 new_skb = netdev_alloc_skb_ip_align(dev, buflen); 510 new_skb = napi_alloc_skb(napi, buflen);
511 if (!new_skb) { 511 if (!new_skb) {
512 dev->stats.rx_dropped++; 512 dev->stats.rx_dropped++;
513 goto rx_next; 513 goto rx_next;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 63dc0f95d050..6d0b9dfac313 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2037,7 +2037,7 @@ keep_pkt:
2037 /* Malloc up new buffer, compatible with net-2e. */ 2037 /* Malloc up new buffer, compatible with net-2e. */
2038 /* Omit the four octet CRC from the length. */ 2038 /* Omit the four octet CRC from the length. */
2039 2039
2040 skb = netdev_alloc_skb_ip_align(dev, pkt_size); 2040 skb = napi_alloc_skb(&tp->napi, pkt_size);
2041 if (likely(skb)) { 2041 if (likely(skb)) {
2042#if RX_BUF_IDX == 3 2042#if RX_BUF_IDX == 3
2043 wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); 2043 wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b9c2f33b463d..3dad7e884952 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7260,7 +7260,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
7260 data = rtl8169_align(data); 7260 data = rtl8169_align(data);
7261 dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); 7261 dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
7262 prefetch(data); 7262 prefetch(data);
7263 skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size); 7263 skb = napi_alloc_skb(&tp->napi, pkt_size);
7264 if (skb) 7264 if (skb)
7265 memcpy(skb->data, data, pkt_size); 7265 memcpy(skb->data, data, pkt_size);
7266 dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); 7266 dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ab0bc43c82a4..85ab7d72b54c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -151,6 +151,7 @@ struct net_device;
151struct scatterlist; 151struct scatterlist;
152struct pipe_inode_info; 152struct pipe_inode_info;
153struct iov_iter; 153struct iov_iter;
154struct napi_struct;
154 155
155#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 156#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
156struct nf_conntrack { 157struct nf_conntrack {
@@ -673,6 +674,7 @@ struct sk_buff {
673 674
674#define SKB_ALLOC_FCLONE 0x01 675#define SKB_ALLOC_FCLONE 0x01
675#define SKB_ALLOC_RX 0x02 676#define SKB_ALLOC_RX 0x02
677#define SKB_ALLOC_NAPI 0x04
676 678
677/* Returns true if the skb was allocated from PFMEMALLOC reserves */ 679/* Returns true if the skb was allocated from PFMEMALLOC reserves */
678static inline bool skb_pfmemalloc(const struct sk_buff *skb) 680static inline bool skb_pfmemalloc(const struct sk_buff *skb)
@@ -2164,6 +2166,15 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
2164 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); 2166 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
2165} 2167}
2166 2168
2169void *napi_alloc_frag(unsigned int fragsz);
2170struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
2171 unsigned int length, gfp_t gfp_mask);
2172static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
2173 unsigned int length)
2174{
2175 return __napi_alloc_skb(napi, length, GFP_ATOMIC);
2176}
2177
2167/** 2178/**
2168 * __dev_alloc_pages - allocate page for network Rx 2179 * __dev_alloc_pages - allocate page for network Rx
2169 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx 2180 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index 3f191da383f6..80f798da3d9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4172,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
4172 struct sk_buff *skb = napi->skb; 4172 struct sk_buff *skb = napi->skb;
4173 4173
4174 if (!skb) { 4174 if (!skb) {
4175 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4175 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4176 napi->skb = skb; 4176 napi->skb = skb;
4177 } 4177 }
4178 return skb; 4178 return skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7a338fb55cc4..ae13ef6b3ea7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
336 unsigned int pagecnt_bias; 336 unsigned int pagecnt_bias;
337}; 337};
338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
339static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
339 340
340static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 341static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
342 gfp_t gfp_mask)
341{ 343{
342 struct netdev_alloc_cache *nc; 344 const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
343 void *data = NULL; 345 struct page *page = NULL;
344 int order; 346 gfp_t gfp = gfp_mask;
345 unsigned long flags; 347
348 if (order) {
349 gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
350 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
351 nc->frag.size = PAGE_SIZE << (page ? order : 0);
352 }
346 353
347 local_irq_save(flags); 354 if (unlikely(!page))
348 nc = this_cpu_ptr(&netdev_alloc_cache); 355 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
349 if (unlikely(!nc->frag.page)) { 356
357 nc->frag.page = page;
358
359 return page;
360}
361
362static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
363 unsigned int fragsz, gfp_t gfp_mask)
364{
365 struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
366 struct page *page = nc->frag.page;
367 unsigned int size;
368 int offset;
369
370 if (unlikely(!page)) {
350refill: 371refill:
351 for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { 372 page = __page_frag_refill(nc, gfp_mask);
352 gfp_t gfp = gfp_mask; 373 if (!page)
374 return NULL;
375
376 /* if size can vary use frag.size else just use PAGE_SIZE */
377 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
353 378
354 if (order)
355 gfp |= __GFP_COMP | __GFP_NOWARN;
356 nc->frag.page = alloc_pages(gfp, order);
357 if (likely(nc->frag.page))
358 break;
359 if (--order < 0)
360 goto end;
361 }
362 nc->frag.size = PAGE_SIZE << order;
363 /* Even if we own the page, we do not use atomic_set(). 379 /* Even if we own the page, we do not use atomic_set().
364 * This would break get_page_unless_zero() users. 380 * This would break get_page_unless_zero() users.
365 */ 381 */
366 atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, 382 atomic_add(size - 1, &page->_count);
367 &nc->frag.page->_count); 383
368 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 384 /* reset page count bias and offset to start of new frag */
369 nc->frag.offset = 0; 385 nc->pagecnt_bias = size;
386 nc->frag.offset = size;
370 } 387 }
371 388
372 if (nc->frag.offset + fragsz > nc->frag.size) { 389 offset = nc->frag.offset - fragsz;
373 if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { 390 if (unlikely(offset < 0)) {
374 if (!atomic_sub_and_test(nc->pagecnt_bias, 391 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
375 &nc->frag.page->_count)) 392 goto refill;
376 goto refill; 393
377 /* OK, page count is 0, we can safely set it */ 394 /* if size can vary use frag.size else just use PAGE_SIZE */
378 atomic_set(&nc->frag.page->_count, 395 size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
379 NETDEV_PAGECNT_MAX_BIAS); 396
380 } else { 397 /* OK, page count is 0, we can safely set it */
381 atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, 398 atomic_set(&page->_count, size);
382 &nc->frag.page->_count); 399
383 } 400 /* reset page count bias and offset to start of new frag */
384 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; 401 nc->pagecnt_bias = size;
385 nc->frag.offset = 0; 402 offset = size - fragsz;
386 } 403 }
387 404
388 data = page_address(nc->frag.page) + nc->frag.offset;
389 nc->frag.offset += fragsz;
390 nc->pagecnt_bias--; 405 nc->pagecnt_bias--;
391end: 406 nc->frag.offset = offset;
407
408 return page_address(page) + offset;
409}
410
411static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
412{
413 unsigned long flags;
414 void *data;
415
416 local_irq_save(flags);
417 data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
392 local_irq_restore(flags); 418 local_irq_restore(flags);
393 return data; 419 return data;
394} 420}
@@ -406,11 +432,25 @@ void *netdev_alloc_frag(unsigned int fragsz)
406} 432}
407EXPORT_SYMBOL(netdev_alloc_frag); 433EXPORT_SYMBOL(netdev_alloc_frag);
408 434
435static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
436{
437 return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
438}
439
440void *napi_alloc_frag(unsigned int fragsz)
441{
442 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
443}
444EXPORT_SYMBOL(napi_alloc_frag);
445
409/** 446/**
410 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 447 * __alloc_rx_skb - allocate an skbuff for rx
411 * @dev: network device to receive on
412 * @length: length to allocate 448 * @length: length to allocate
413 * @gfp_mask: get_free_pages mask, passed to alloc_skb 449 * @gfp_mask: get_free_pages mask, passed to alloc_skb
450 * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
451 * allocations in case we have to fallback to __alloc_skb()
452 * If SKB_ALLOC_NAPI is set, page fragment will be allocated
453 * from napi_cache instead of netdev_cache.
414 * 454 *
415 * Allocate a new &sk_buff and assign it a usage count of one. The 455 * Allocate a new &sk_buff and assign it a usage count of one. The
416 * buffer has unspecified headroom built in. Users should allocate 456 * buffer has unspecified headroom built in. Users should allocate
@@ -419,11 +459,11 @@ EXPORT_SYMBOL(netdev_alloc_frag);
419 * 459 *
420 * %NULL is returned if there is no free memory. 460 * %NULL is returned if there is no free memory.
421 */ 461 */
422struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 462static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
423 unsigned int length, gfp_t gfp_mask) 463 int flags)
424{ 464{
425 struct sk_buff *skb = NULL; 465 struct sk_buff *skb = NULL;
426 unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + 466 unsigned int fragsz = SKB_DATA_ALIGN(length) +
427 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 467 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
428 468
429 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 469 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
@@ -432,7 +472,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
432 if (sk_memalloc_socks()) 472 if (sk_memalloc_socks())
433 gfp_mask |= __GFP_MEMALLOC; 473 gfp_mask |= __GFP_MEMALLOC;
434 474
435 data = __netdev_alloc_frag(fragsz, gfp_mask); 475 data = (flags & SKB_ALLOC_NAPI) ?
476 __napi_alloc_frag(fragsz, gfp_mask) :
477 __netdev_alloc_frag(fragsz, gfp_mask);
436 478
437 if (likely(data)) { 479 if (likely(data)) {
438 skb = build_skb(data, fragsz); 480 skb = build_skb(data, fragsz);
@@ -440,17 +482,72 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
440 put_page(virt_to_head_page(data)); 482 put_page(virt_to_head_page(data));
441 } 483 }
442 } else { 484 } else {
443 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 485 skb = __alloc_skb(length, gfp_mask,
444 SKB_ALLOC_RX, NUMA_NO_NODE); 486 SKB_ALLOC_RX, NUMA_NO_NODE);
445 } 487 }
488 return skb;
489}
490
491/**
492 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
493 * @dev: network device to receive on
494 * @length: length to allocate
495 * @gfp_mask: get_free_pages mask, passed to alloc_skb
496 *
497 * Allocate a new &sk_buff and assign it a usage count of one. The
498 * buffer has NET_SKB_PAD headroom built in. Users should allocate
499 * the headroom they think they need without accounting for the
500 * built in space. The built in space is used for optimisations.
501 *
502 * %NULL is returned if there is no free memory.
503 */
504struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
505 unsigned int length, gfp_t gfp_mask)
506{
507 struct sk_buff *skb;
508
509 length += NET_SKB_PAD;
510 skb = __alloc_rx_skb(length, gfp_mask, 0);
511
446 if (likely(skb)) { 512 if (likely(skb)) {
447 skb_reserve(skb, NET_SKB_PAD); 513 skb_reserve(skb, NET_SKB_PAD);
448 skb->dev = dev; 514 skb->dev = dev;
449 } 515 }
516
450 return skb; 517 return skb;
451} 518}
452EXPORT_SYMBOL(__netdev_alloc_skb); 519EXPORT_SYMBOL(__netdev_alloc_skb);
453 520
521/**
522 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
523 * @napi: napi instance this buffer was allocated for
524 * @length: length to allocate
525 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
526 *
527 * Allocate a new sk_buff for use in NAPI receive. This buffer will
528 * attempt to allocate the head from a special reserved region used
529 * only for NAPI Rx allocation. By doing this we can save several
530 * CPU cycles by avoiding having to disable and re-enable IRQs.
531 *
532 * %NULL is returned if there is no free memory.
533 */
534struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
535 unsigned int length, gfp_t gfp_mask)
536{
537 struct sk_buff *skb;
538
539 length += NET_SKB_PAD + NET_IP_ALIGN;
540 skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
541
542 if (likely(skb)) {
543 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
544 skb->dev = napi->dev;
545 }
546
547 return skb;
548}
549EXPORT_SYMBOL(__napi_alloc_skb);
550
454void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 551void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
455 int size, unsigned int truesize) 552 int size, unsigned int truesize)
456{ 553{