diff options
author | David S. Miller <davem@davemloft.net> | 2014-12-10 13:32:02 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-12-10 13:32:02 -0500 |
commit | 7dbea3e8662eb491e9da109cf8ffc372e160fe9a (patch) | |
tree | e69f89b43fa65a1f4279bc6aa7dc068c0817c208 | |
parent | 6e5f59aacbf9527dfe425541c78cb8c56623e7eb (diff) | |
parent | 45abfb1069e4c365f6c1e2fc97c5927272725bfa (diff) |
Merge branch 'napi_page_frags'
Alexander Duyck says:
====================
net: Alloc NAPI page frags from their own pool
This patch series implements a means of allocating page fragments without
the need for the local_irq_save/restore in __netdev_alloc_frag. By doing
this I am able to decrease packet processing time by 11ns per packet in my
test environment.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/broadcom/b44.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bcm63xx_enet.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb/sge.c | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/e1000/e1000_main.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/e1000e/netdev.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/fm10k/fm10k_main.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgb/ixgb_main.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/realtek/8139cp.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/realtek/8139too.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/realtek/r8169.c | 2 | ||||
-rw-r--r-- | include/linux/skbuff.h | 11 | ||||
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 191 |
16 files changed, 178 insertions, 70 deletions
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index ffeaf476a120..d86d6baf9681 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c | |||
@@ -836,7 +836,7 @@ static int b44_rx(struct b44 *bp, int budget) | |||
836 | struct sk_buff *copy_skb; | 836 | struct sk_buff *copy_skb; |
837 | 837 | ||
838 | b44_recycle_rx(bp, cons, bp->rx_prod); | 838 | b44_recycle_rx(bp, cons, bp->rx_prod); |
839 | copy_skb = netdev_alloc_skb_ip_align(bp->dev, len); | 839 | copy_skb = napi_alloc_skb(&bp->napi, len); |
840 | if (copy_skb == NULL) | 840 | if (copy_skb == NULL) |
841 | goto drop_it_no_recycle; | 841 | goto drop_it_no_recycle; |
842 | 842 | ||
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 3e8d1a88ed3d..21206d33b638 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c | |||
@@ -385,7 +385,7 @@ static int bcm_enet_receive_queue(struct net_device *dev, int budget) | |||
385 | if (len < copybreak) { | 385 | if (len < copybreak) { |
386 | struct sk_buff *nskb; | 386 | struct sk_buff *nskb; |
387 | 387 | ||
388 | nskb = netdev_alloc_skb_ip_align(dev, len); | 388 | nskb = napi_alloc_skb(&priv->napi, len); |
389 | if (!nskb) { | 389 | if (!nskb) { |
390 | /* forget packet, just rearm desc */ | 390 | /* forget packet, just rearm desc */ |
391 | dev->stats.rx_dropped++; | 391 | dev->stats.rx_dropped++; |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index b4d71fd909ee..1d1147c93d59 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | |||
@@ -1015,7 +1015,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) | |||
1015 | */ | 1015 | */ |
1016 | if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) && | 1016 | if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) && |
1017 | (len <= RX_COPY_THRESH)) { | 1017 | (len <= RX_COPY_THRESH)) { |
1018 | skb = netdev_alloc_skb_ip_align(bp->dev, len); | 1018 | skb = napi_alloc_skb(&fp->napi, len); |
1019 | if (skb == NULL) { | 1019 | if (skb == NULL) { |
1020 | DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, | 1020 | DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, |
1021 | "ERROR packet dropped because of alloc failure\n"); | 1021 | "ERROR packet dropped because of alloc failure\n"); |
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 86222a1bdb12..babe2a915b00 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c | |||
@@ -1025,7 +1025,7 @@ MODULE_PARM_DESC(copybreak, "Receive copy threshold"); | |||
1025 | 1025 | ||
1026 | /** | 1026 | /** |
1027 | * get_packet - return the next ingress packet buffer | 1027 | * get_packet - return the next ingress packet buffer |
1028 | * @pdev: the PCI device that received the packet | 1028 | * @adapter: the adapter that received the packet |
1029 | * @fl: the SGE free list holding the packet | 1029 | * @fl: the SGE free list holding the packet |
1030 | * @len: the actual packet length, excluding any SGE padding | 1030 | * @len: the actual packet length, excluding any SGE padding |
1031 | * | 1031 | * |
@@ -1037,14 +1037,15 @@ MODULE_PARM_DESC(copybreak, "Receive copy threshold"); | |||
1037 | * threshold and the packet is too big to copy, or (b) the packet should | 1037 | * threshold and the packet is too big to copy, or (b) the packet should |
1038 | * be copied but there is no memory for the copy. | 1038 | * be copied but there is no memory for the copy. |
1039 | */ | 1039 | */ |
1040 | static inline struct sk_buff *get_packet(struct pci_dev *pdev, | 1040 | static inline struct sk_buff *get_packet(struct adapter *adapter, |
1041 | struct freelQ *fl, unsigned int len) | 1041 | struct freelQ *fl, unsigned int len) |
1042 | { | 1042 | { |
1043 | struct sk_buff *skb; | ||
1044 | const struct freelQ_ce *ce = &fl->centries[fl->cidx]; | 1043 | const struct freelQ_ce *ce = &fl->centries[fl->cidx]; |
1044 | struct pci_dev *pdev = adapter->pdev; | ||
1045 | struct sk_buff *skb; | ||
1045 | 1046 | ||
1046 | if (len < copybreak) { | 1047 | if (len < copybreak) { |
1047 | skb = netdev_alloc_skb_ip_align(NULL, len); | 1048 | skb = napi_alloc_skb(&adapter->napi, len); |
1048 | if (!skb) | 1049 | if (!skb) |
1049 | goto use_orig_buf; | 1050 | goto use_orig_buf; |
1050 | 1051 | ||
@@ -1357,7 +1358,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) | |||
1357 | struct sge_port_stats *st; | 1358 | struct sge_port_stats *st; |
1358 | struct net_device *dev; | 1359 | struct net_device *dev; |
1359 | 1360 | ||
1360 | skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad); | 1361 | skb = get_packet(adapter, fl, len - sge->rx_pkt_pad); |
1361 | if (unlikely(!skb)) { | 1362 | if (unlikely(!skb)) { |
1362 | sge->stats.rx_drops++; | 1363 | sge->stats.rx_drops++; |
1363 | return; | 1364 | return; |
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 862d1989ae1c..83140cbb5f01 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c | |||
@@ -4100,7 +4100,7 @@ static bool e1000_tbi_should_accept(struct e1000_adapter *adapter, | |||
4100 | static struct sk_buff *e1000_alloc_rx_skb(struct e1000_adapter *adapter, | 4100 | static struct sk_buff *e1000_alloc_rx_skb(struct e1000_adapter *adapter, |
4101 | unsigned int bufsz) | 4101 | unsigned int bufsz) |
4102 | { | 4102 | { |
4103 | struct sk_buff *skb = netdev_alloc_skb_ip_align(adapter->netdev, bufsz); | 4103 | struct sk_buff *skb = napi_alloc_skb(&adapter->napi, bufsz); |
4104 | 4104 | ||
4105 | if (unlikely(!skb)) | 4105 | if (unlikely(!skb)) |
4106 | adapter->alloc_rx_buff_failed++; | 4106 | adapter->alloc_rx_buff_failed++; |
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 88936aa0029d..5c82c8065501 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c | |||
@@ -1016,7 +1016,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, | |||
1016 | */ | 1016 | */ |
1017 | if (length < copybreak) { | 1017 | if (length < copybreak) { |
1018 | struct sk_buff *new_skb = | 1018 | struct sk_buff *new_skb = |
1019 | netdev_alloc_skb_ip_align(netdev, length); | 1019 | napi_alloc_skb(&adapter->napi, length); |
1020 | if (new_skb) { | 1020 | if (new_skb) { |
1021 | skb_copy_to_linear_data_offset(new_skb, | 1021 | skb_copy_to_linear_data_offset(new_skb, |
1022 | -NET_IP_ALIGN, | 1022 | -NET_IP_ALIGN, |
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 91516aed373e..ee1ecb146df7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c | |||
@@ -308,8 +308,8 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, | |||
308 | #endif | 308 | #endif |
309 | 309 | ||
310 | /* allocate a skb to store the frags */ | 310 | /* allocate a skb to store the frags */ |
311 | skb = netdev_alloc_skb_ip_align(rx_ring->netdev, | 311 | skb = napi_alloc_skb(&rx_ring->q_vector->napi, |
312 | FM10K_RX_HDR_LEN); | 312 | FM10K_RX_HDR_LEN); |
313 | if (unlikely(!skb)) { | 313 | if (unlikely(!skb)) { |
314 | rx_ring->rx_stats.alloc_failed++; | 314 | rx_ring->rx_stats.alloc_failed++; |
315 | return NULL; | 315 | return NULL; |
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f04ad13f7159..485d2c609d5d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c | |||
@@ -6644,8 +6644,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, | |||
6644 | #endif | 6644 | #endif |
6645 | 6645 | ||
6646 | /* allocate a skb to store the frags */ | 6646 | /* allocate a skb to store the frags */ |
6647 | skb = netdev_alloc_skb_ip_align(rx_ring->netdev, | 6647 | skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN); |
6648 | IGB_RX_HDR_LEN); | ||
6649 | if (unlikely(!skb)) { | 6648 | if (unlikely(!skb)) { |
6650 | rx_ring->rx_stats.alloc_failed++; | 6649 | rx_ring->rx_stats.alloc_failed++; |
6651 | return NULL; | 6650 | return NULL; |
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 055961b0f24b..aa87605b144a 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c | |||
@@ -1963,7 +1963,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter, | |||
1963 | * this should improve performance for small packets with large amounts | 1963 | * this should improve performance for small packets with large amounts |
1964 | * of reassembly being done in the stack | 1964 | * of reassembly being done in the stack |
1965 | */ | 1965 | */ |
1966 | static void ixgb_check_copybreak(struct net_device *netdev, | 1966 | static void ixgb_check_copybreak(struct napi_struct *napi, |
1967 | struct ixgb_buffer *buffer_info, | 1967 | struct ixgb_buffer *buffer_info, |
1968 | u32 length, struct sk_buff **skb) | 1968 | u32 length, struct sk_buff **skb) |
1969 | { | 1969 | { |
@@ -1972,7 +1972,7 @@ static void ixgb_check_copybreak(struct net_device *netdev, | |||
1972 | if (length > copybreak) | 1972 | if (length > copybreak) |
1973 | return; | 1973 | return; |
1974 | 1974 | ||
1975 | new_skb = netdev_alloc_skb_ip_align(netdev, length); | 1975 | new_skb = napi_alloc_skb(napi, length); |
1976 | if (!new_skb) | 1976 | if (!new_skb) |
1977 | return; | 1977 | return; |
1978 | 1978 | ||
@@ -2064,7 +2064,7 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do) | |||
2064 | goto rxdesc_done; | 2064 | goto rxdesc_done; |
2065 | } | 2065 | } |
2066 | 2066 | ||
2067 | ixgb_check_copybreak(netdev, buffer_info, length, &skb); | 2067 | ixgb_check_copybreak(&adapter->napi, buffer_info, length, &skb); |
2068 | 2068 | ||
2069 | /* Good Receive */ | 2069 | /* Good Receive */ |
2070 | skb_put(skb, length); | 2070 | skb_put(skb, length); |
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fbd52924ee34..798b05556e1b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | |||
@@ -1913,8 +1913,8 @@ static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, | |||
1913 | #endif | 1913 | #endif |
1914 | 1914 | ||
1915 | /* allocate a skb to store the frags */ | 1915 | /* allocate a skb to store the frags */ |
1916 | skb = netdev_alloc_skb_ip_align(rx_ring->netdev, | 1916 | skb = napi_alloc_skb(&rx_ring->q_vector->napi, |
1917 | IXGBE_RX_HDR_SIZE); | 1917 | IXGBE_RX_HDR_SIZE); |
1918 | if (unlikely(!skb)) { | 1918 | if (unlikely(!skb)) { |
1919 | rx_ring->rx_stats.alloc_rx_buff_failed++; | 1919 | rx_ring->rx_stats.alloc_rx_buff_failed++; |
1920 | return NULL; | 1920 | return NULL; |
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 75b1693ec8bf..9c31e46d1eee 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c | |||
@@ -507,7 +507,7 @@ rx_status_loop: | |||
507 | netif_dbg(cp, rx_status, dev, "rx slot %d status 0x%x len %d\n", | 507 | netif_dbg(cp, rx_status, dev, "rx slot %d status 0x%x len %d\n", |
508 | rx_tail, status, len); | 508 | rx_tail, status, len); |
509 | 509 | ||
510 | new_skb = netdev_alloc_skb_ip_align(dev, buflen); | 510 | new_skb = napi_alloc_skb(napi, buflen); |
511 | if (!new_skb) { | 511 | if (!new_skb) { |
512 | dev->stats.rx_dropped++; | 512 | dev->stats.rx_dropped++; |
513 | goto rx_next; | 513 | goto rx_next; |
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 63dc0f95d050..6d0b9dfac313 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c | |||
@@ -2037,7 +2037,7 @@ keep_pkt: | |||
2037 | /* Malloc up new buffer, compatible with net-2e. */ | 2037 | /* Malloc up new buffer, compatible with net-2e. */ |
2038 | /* Omit the four octet CRC from the length. */ | 2038 | /* Omit the four octet CRC from the length. */ |
2039 | 2039 | ||
2040 | skb = netdev_alloc_skb_ip_align(dev, pkt_size); | 2040 | skb = napi_alloc_skb(&tp->napi, pkt_size); |
2041 | if (likely(skb)) { | 2041 | if (likely(skb)) { |
2042 | #if RX_BUF_IDX == 3 | 2042 | #if RX_BUF_IDX == 3 |
2043 | wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); | 2043 | wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); |
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b9c2f33b463d..3dad7e884952 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c | |||
@@ -7260,7 +7260,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, | |||
7260 | data = rtl8169_align(data); | 7260 | data = rtl8169_align(data); |
7261 | dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); | 7261 | dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); |
7262 | prefetch(data); | 7262 | prefetch(data); |
7263 | skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size); | 7263 | skb = napi_alloc_skb(&tp->napi, pkt_size); |
7264 | if (skb) | 7264 | if (skb) |
7265 | memcpy(skb->data, data, pkt_size); | 7265 | memcpy(skb->data, data, pkt_size); |
7266 | dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); | 7266 | dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ab0bc43c82a4..85ab7d72b54c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -151,6 +151,7 @@ struct net_device; | |||
151 | struct scatterlist; | 151 | struct scatterlist; |
152 | struct pipe_inode_info; | 152 | struct pipe_inode_info; |
153 | struct iov_iter; | 153 | struct iov_iter; |
154 | struct napi_struct; | ||
154 | 155 | ||
155 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 156 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
156 | struct nf_conntrack { | 157 | struct nf_conntrack { |
@@ -673,6 +674,7 @@ struct sk_buff { | |||
673 | 674 | ||
674 | #define SKB_ALLOC_FCLONE 0x01 | 675 | #define SKB_ALLOC_FCLONE 0x01 |
675 | #define SKB_ALLOC_RX 0x02 | 676 | #define SKB_ALLOC_RX 0x02 |
677 | #define SKB_ALLOC_NAPI 0x04 | ||
676 | 678 | ||
677 | /* Returns true if the skb was allocated from PFMEMALLOC reserves */ | 679 | /* Returns true if the skb was allocated from PFMEMALLOC reserves */ |
678 | static inline bool skb_pfmemalloc(const struct sk_buff *skb) | 680 | static inline bool skb_pfmemalloc(const struct sk_buff *skb) |
@@ -2164,6 +2166,15 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, | |||
2164 | return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); | 2166 | return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); |
2165 | } | 2167 | } |
2166 | 2168 | ||
2169 | void *napi_alloc_frag(unsigned int fragsz); | ||
2170 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, | ||
2171 | unsigned int length, gfp_t gfp_mask); | ||
2172 | static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, | ||
2173 | unsigned int length) | ||
2174 | { | ||
2175 | return __napi_alloc_skb(napi, length, GFP_ATOMIC); | ||
2176 | } | ||
2177 | |||
2167 | /** | 2178 | /** |
2168 | * __dev_alloc_pages - allocate page for network Rx | 2179 | * __dev_alloc_pages - allocate page for network Rx |
2169 | * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx | 2180 | * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx |
diff --git a/net/core/dev.c b/net/core/dev.c index 3f191da383f6..80f798da3d9f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -4172,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) | |||
4172 | struct sk_buff *skb = napi->skb; | 4172 | struct sk_buff *skb = napi->skb; |
4173 | 4173 | ||
4174 | if (!skb) { | 4174 | if (!skb) { |
4175 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); | 4175 | skb = napi_alloc_skb(napi, GRO_MAX_HEAD); |
4176 | napi->skb = skb; | 4176 | napi->skb = skb; |
4177 | } | 4177 | } |
4178 | return skb; | 4178 | return skb; |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7a338fb55cc4..ae13ef6b3ea7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -336,59 +336,85 @@ struct netdev_alloc_cache { | |||
336 | unsigned int pagecnt_bias; | 336 | unsigned int pagecnt_bias; |
337 | }; | 337 | }; |
338 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | 338 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); |
339 | static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); | ||
339 | 340 | ||
340 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 341 | static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, |
342 | gfp_t gfp_mask) | ||
341 | { | 343 | { |
342 | struct netdev_alloc_cache *nc; | 344 | const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; |
343 | void *data = NULL; | 345 | struct page *page = NULL; |
344 | int order; | 346 | gfp_t gfp = gfp_mask; |
345 | unsigned long flags; | 347 | |
348 | if (order) { | ||
349 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; | ||
350 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | ||
351 | nc->frag.size = PAGE_SIZE << (page ? order : 0); | ||
352 | } | ||
346 | 353 | ||
347 | local_irq_save(flags); | 354 | if (unlikely(!page)) |
348 | nc = this_cpu_ptr(&netdev_alloc_cache); | 355 | page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); |
349 | if (unlikely(!nc->frag.page)) { | 356 | |
357 | nc->frag.page = page; | ||
358 | |||
359 | return page; | ||
360 | } | ||
361 | |||
362 | static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, | ||
363 | unsigned int fragsz, gfp_t gfp_mask) | ||
364 | { | ||
365 | struct netdev_alloc_cache *nc = this_cpu_ptr(cache); | ||
366 | struct page *page = nc->frag.page; | ||
367 | unsigned int size; | ||
368 | int offset; | ||
369 | |||
370 | if (unlikely(!page)) { | ||
350 | refill: | 371 | refill: |
351 | for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { | 372 | page = __page_frag_refill(nc, gfp_mask); |
352 | gfp_t gfp = gfp_mask; | 373 | if (!page) |
374 | return NULL; | ||
375 | |||
376 | /* if size can vary use frag.size else just use PAGE_SIZE */ | ||
377 | size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; | ||
353 | 378 | ||
354 | if (order) | ||
355 | gfp |= __GFP_COMP | __GFP_NOWARN; | ||
356 | nc->frag.page = alloc_pages(gfp, order); | ||
357 | if (likely(nc->frag.page)) | ||
358 | break; | ||
359 | if (--order < 0) | ||
360 | goto end; | ||
361 | } | ||
362 | nc->frag.size = PAGE_SIZE << order; | ||
363 | /* Even if we own the page, we do not use atomic_set(). | 379 | /* Even if we own the page, we do not use atomic_set(). |
364 | * This would break get_page_unless_zero() users. | 380 | * This would break get_page_unless_zero() users. |
365 | */ | 381 | */ |
366 | atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, | 382 | atomic_add(size - 1, &page->_count); |
367 | &nc->frag.page->_count); | 383 | |
368 | nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; | 384 | /* reset page count bias and offset to start of new frag */ |
369 | nc->frag.offset = 0; | 385 | nc->pagecnt_bias = size; |
386 | nc->frag.offset = size; | ||
370 | } | 387 | } |
371 | 388 | ||
372 | if (nc->frag.offset + fragsz > nc->frag.size) { | 389 | offset = nc->frag.offset - fragsz; |
373 | if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { | 390 | if (unlikely(offset < 0)) { |
374 | if (!atomic_sub_and_test(nc->pagecnt_bias, | 391 | if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) |
375 | &nc->frag.page->_count)) | 392 | goto refill; |
376 | goto refill; | 393 | |
377 | /* OK, page count is 0, we can safely set it */ | 394 | /* if size can vary use frag.size else just use PAGE_SIZE */ |
378 | atomic_set(&nc->frag.page->_count, | 395 | size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; |
379 | NETDEV_PAGECNT_MAX_BIAS); | 396 | |
380 | } else { | 397 | /* OK, page count is 0, we can safely set it */ |
381 | atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, | 398 | atomic_set(&page->_count, size); |
382 | &nc->frag.page->_count); | 399 | |
383 | } | 400 | /* reset page count bias and offset to start of new frag */ |
384 | nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; | 401 | nc->pagecnt_bias = size; |
385 | nc->frag.offset = 0; | 402 | offset = size - fragsz; |
386 | } | 403 | } |
387 | 404 | ||
388 | data = page_address(nc->frag.page) + nc->frag.offset; | ||
389 | nc->frag.offset += fragsz; | ||
390 | nc->pagecnt_bias--; | 405 | nc->pagecnt_bias--; |
391 | end: | 406 | nc->frag.offset = offset; |
407 | |||
408 | return page_address(page) + offset; | ||
409 | } | ||
410 | |||
411 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | ||
412 | { | ||
413 | unsigned long flags; | ||
414 | void *data; | ||
415 | |||
416 | local_irq_save(flags); | ||
417 | data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); | ||
392 | local_irq_restore(flags); | 418 | local_irq_restore(flags); |
393 | return data; | 419 | return data; |
394 | } | 420 | } |
@@ -406,11 +432,25 @@ void *netdev_alloc_frag(unsigned int fragsz) | |||
406 | } | 432 | } |
407 | EXPORT_SYMBOL(netdev_alloc_frag); | 433 | EXPORT_SYMBOL(netdev_alloc_frag); |
408 | 434 | ||
435 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | ||
436 | { | ||
437 | return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); | ||
438 | } | ||
439 | |||
440 | void *napi_alloc_frag(unsigned int fragsz) | ||
441 | { | ||
442 | return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | ||
443 | } | ||
444 | EXPORT_SYMBOL(napi_alloc_frag); | ||
445 | |||
409 | /** | 446 | /** |
410 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | 447 | * __alloc_rx_skb - allocate an skbuff for rx |
411 | * @dev: network device to receive on | ||
412 | * @length: length to allocate | 448 | * @length: length to allocate |
413 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | 449 | * @gfp_mask: get_free_pages mask, passed to alloc_skb |
450 | * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
451 | * allocations in case we have to fallback to __alloc_skb() | ||
452 | * If SKB_ALLOC_NAPI is set, page fragment will be allocated | ||
453 | * from napi_cache instead of netdev_cache. | ||
414 | * | 454 | * |
415 | * Allocate a new &sk_buff and assign it a usage count of one. The | 455 | * Allocate a new &sk_buff and assign it a usage count of one. The |
416 | * buffer has unspecified headroom built in. Users should allocate | 456 | * buffer has unspecified headroom built in. Users should allocate |
@@ -419,11 +459,11 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |||
419 | * | 459 | * |
420 | * %NULL is returned if there is no free memory. | 460 | * %NULL is returned if there is no free memory. |
421 | */ | 461 | */ |
422 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | 462 | static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, |
423 | unsigned int length, gfp_t gfp_mask) | 463 | int flags) |
424 | { | 464 | { |
425 | struct sk_buff *skb = NULL; | 465 | struct sk_buff *skb = NULL; |
426 | unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + | 466 | unsigned int fragsz = SKB_DATA_ALIGN(length) + |
427 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 467 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
428 | 468 | ||
429 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 469 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
@@ -432,7 +472,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
432 | if (sk_memalloc_socks()) | 472 | if (sk_memalloc_socks()) |
433 | gfp_mask |= __GFP_MEMALLOC; | 473 | gfp_mask |= __GFP_MEMALLOC; |
434 | 474 | ||
435 | data = __netdev_alloc_frag(fragsz, gfp_mask); | 475 | data = (flags & SKB_ALLOC_NAPI) ? |
476 | __napi_alloc_frag(fragsz, gfp_mask) : | ||
477 | __netdev_alloc_frag(fragsz, gfp_mask); | ||
436 | 478 | ||
437 | if (likely(data)) { | 479 | if (likely(data)) { |
438 | skb = build_skb(data, fragsz); | 480 | skb = build_skb(data, fragsz); |
@@ -440,17 +482,72 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
440 | put_page(virt_to_head_page(data)); | 482 | put_page(virt_to_head_page(data)); |
441 | } | 483 | } |
442 | } else { | 484 | } else { |
443 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, | 485 | skb = __alloc_skb(length, gfp_mask, |
444 | SKB_ALLOC_RX, NUMA_NO_NODE); | 486 | SKB_ALLOC_RX, NUMA_NO_NODE); |
445 | } | 487 | } |
488 | return skb; | ||
489 | } | ||
490 | |||
491 | /** | ||
492 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | ||
493 | * @dev: network device to receive on | ||
494 | * @length: length to allocate | ||
495 | * @gfp_mask: get_free_pages mask, passed to alloc_skb | ||
496 | * | ||
497 | * Allocate a new &sk_buff and assign it a usage count of one. The | ||
498 | * buffer has NET_SKB_PAD headroom built in. Users should allocate | ||
499 | * the headroom they think they need without accounting for the | ||
500 | * built in space. The built in space is used for optimisations. | ||
501 | * | ||
502 | * %NULL is returned if there is no free memory. | ||
503 | */ | ||
504 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | ||
505 | unsigned int length, gfp_t gfp_mask) | ||
506 | { | ||
507 | struct sk_buff *skb; | ||
508 | |||
509 | length += NET_SKB_PAD; | ||
510 | skb = __alloc_rx_skb(length, gfp_mask, 0); | ||
511 | |||
446 | if (likely(skb)) { | 512 | if (likely(skb)) { |
447 | skb_reserve(skb, NET_SKB_PAD); | 513 | skb_reserve(skb, NET_SKB_PAD); |
448 | skb->dev = dev; | 514 | skb->dev = dev; |
449 | } | 515 | } |
516 | |||
450 | return skb; | 517 | return skb; |
451 | } | 518 | } |
452 | EXPORT_SYMBOL(__netdev_alloc_skb); | 519 | EXPORT_SYMBOL(__netdev_alloc_skb); |
453 | 520 | ||
521 | /** | ||
522 | * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance | ||
523 | * @napi: napi instance this buffer was allocated for | ||
524 | * @length: length to allocate | ||
525 | * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages | ||
526 | * | ||
527 | * Allocate a new sk_buff for use in NAPI receive. This buffer will | ||
528 | * attempt to allocate the head from a special reserved region used | ||
529 | * only for NAPI Rx allocation. By doing this we can save several | ||
530 | * CPU cycles by avoiding having to disable and re-enable IRQs. | ||
531 | * | ||
532 | * %NULL is returned if there is no free memory. | ||
533 | */ | ||
534 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, | ||
535 | unsigned int length, gfp_t gfp_mask) | ||
536 | { | ||
537 | struct sk_buff *skb; | ||
538 | |||
539 | length += NET_SKB_PAD + NET_IP_ALIGN; | ||
540 | skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI); | ||
541 | |||
542 | if (likely(skb)) { | ||
543 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
544 | skb->dev = napi->dev; | ||
545 | } | ||
546 | |||
547 | return skb; | ||
548 | } | ||
549 | EXPORT_SYMBOL(__napi_alloc_skb); | ||
550 | |||
454 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, | 551 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, |
455 | int size, unsigned int truesize) | 552 | int size, unsigned int truesize) |
456 | { | 553 | { |