diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2011-08-26 03:43:54 -0400 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2011-09-20 02:58:20 -0400 |
commit | 16eb8815c2355b50bff218513367778e6303e9f9 (patch) | |
tree | 007e294a9e6d167dd95342357cc0bbeb95be4b6c /drivers/net | |
parent | 238ac817fd23f7dd5f61a8c51b4678f8d199db57 (diff) |
igb: Refactor clean_rx_irq to reduce overhead and improve performance
This change is meant to be a general cleanup and performance improvement
for clean_rx_irq. The previous patch should have updated the allocation so
that the rings can be treated as read-only within the clean_rx_irq
function. In addition I am re-ordering the operations such that several
goals are accomplished including reducing the overhead for packet
accounting, reducing the number of items on the stack, and improving
overall performance.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 96 |
1 files changed, 47 insertions, 49 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9fa2ad01c6b7..dd85df0ed7f2 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c | |||
@@ -138,7 +138,7 @@ static void igb_setup_dca(struct igb_adapter *); | |||
138 | #endif /* CONFIG_IGB_DCA */ | 138 | #endif /* CONFIG_IGB_DCA */ |
139 | static bool igb_clean_tx_irq(struct igb_q_vector *); | 139 | static bool igb_clean_tx_irq(struct igb_q_vector *); |
140 | static int igb_poll(struct napi_struct *, int); | 140 | static int igb_poll(struct napi_struct *, int); |
141 | static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int); | 141 | static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int); |
142 | static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); | 142 | static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); |
143 | static void igb_tx_timeout(struct net_device *); | 143 | static void igb_tx_timeout(struct net_device *); |
144 | static void igb_reset_task(struct work_struct *); | 144 | static void igb_reset_task(struct work_struct *); |
@@ -5481,28 +5481,27 @@ static int igb_poll(struct napi_struct *napi, int budget) | |||
5481 | struct igb_q_vector *q_vector = container_of(napi, | 5481 | struct igb_q_vector *q_vector = container_of(napi, |
5482 | struct igb_q_vector, | 5482 | struct igb_q_vector, |
5483 | napi); | 5483 | napi); |
5484 | int tx_clean_complete = 1, work_done = 0; | 5484 | bool clean_complete = true; |
5485 | 5485 | ||
5486 | #ifdef CONFIG_IGB_DCA | 5486 | #ifdef CONFIG_IGB_DCA |
5487 | if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) | 5487 | if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) |
5488 | igb_update_dca(q_vector); | 5488 | igb_update_dca(q_vector); |
5489 | #endif | 5489 | #endif |
5490 | if (q_vector->tx_ring) | 5490 | if (q_vector->tx_ring) |
5491 | tx_clean_complete = igb_clean_tx_irq(q_vector); | 5491 | clean_complete = !!igb_clean_tx_irq(q_vector); |
5492 | 5492 | ||
5493 | if (q_vector->rx_ring) | 5493 | if (q_vector->rx_ring) |
5494 | igb_clean_rx_irq_adv(q_vector, &work_done, budget); | 5494 | clean_complete &= igb_clean_rx_irq_adv(q_vector, budget); |
5495 | 5495 | ||
5496 | if (!tx_clean_complete) | 5496 | /* If all work not completed, return budget and keep polling */ |
5497 | work_done = budget; | 5497 | if (!clean_complete) |
5498 | return budget; | ||
5498 | 5499 | ||
5499 | /* If not enough Rx work done, exit the polling mode */ | 5500 | /* If not enough Rx work done, exit the polling mode */ |
5500 | if (work_done < budget) { | 5501 | napi_complete(napi); |
5501 | napi_complete(napi); | 5502 | igb_ring_irq_enable(q_vector); |
5502 | igb_ring_irq_enable(q_vector); | ||
5503 | } | ||
5504 | 5503 | ||
5505 | return work_done; | 5504 | return 0; |
5506 | } | 5505 | } |
5507 | 5506 | ||
5508 | /** | 5507 | /** |
@@ -5751,37 +5750,26 @@ static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) | |||
5751 | return hlen; | 5750 | return hlen; |
5752 | } | 5751 | } |
5753 | 5752 | ||
5754 | static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, | 5753 | static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, int budget) |
5755 | int *work_done, int budget) | ||
5756 | { | 5754 | { |
5757 | struct igb_ring *rx_ring = q_vector->rx_ring; | 5755 | struct igb_ring *rx_ring = q_vector->rx_ring; |
5758 | struct net_device *netdev = rx_ring->netdev; | 5756 | union e1000_adv_rx_desc *rx_desc; |
5759 | struct device *dev = rx_ring->dev; | 5757 | const int current_node = numa_node_id(); |
5760 | union e1000_adv_rx_desc *rx_desc , *next_rxd; | ||
5761 | struct igb_buffer *buffer_info , *next_buffer; | ||
5762 | struct sk_buff *skb; | ||
5763 | bool cleaned = false; | ||
5764 | u16 cleaned_count = igb_desc_unused(rx_ring); | ||
5765 | int current_node = numa_node_id(); | ||
5766 | unsigned int total_bytes = 0, total_packets = 0; | 5758 | unsigned int total_bytes = 0, total_packets = 0; |
5767 | unsigned int i; | ||
5768 | u32 staterr; | 5759 | u32 staterr; |
5769 | u16 length; | 5760 | u16 cleaned_count = igb_desc_unused(rx_ring); |
5761 | u16 i = rx_ring->next_to_clean; | ||
5770 | 5762 | ||
5771 | i = rx_ring->next_to_clean; | ||
5772 | buffer_info = &rx_ring->buffer_info[i]; | ||
5773 | rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); | 5763 | rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); |
5774 | staterr = le32_to_cpu(rx_desc->wb.upper.status_error); | 5764 | staterr = le32_to_cpu(rx_desc->wb.upper.status_error); |
5775 | 5765 | ||
5776 | while (staterr & E1000_RXD_STAT_DD) { | 5766 | while (staterr & E1000_RXD_STAT_DD) { |
5777 | if (*work_done >= budget) | 5767 | struct igb_buffer *buffer_info = &rx_ring->buffer_info[i]; |
5778 | break; | 5768 | struct sk_buff *skb = buffer_info->skb; |
5779 | (*work_done)++; | 5769 | union e1000_adv_rx_desc *next_rxd; |
5780 | rmb(); /* read descriptor and rx_buffer_info after status DD */ | ||
5781 | 5770 | ||
5782 | skb = buffer_info->skb; | ||
5783 | prefetch(skb->data - NET_IP_ALIGN); | ||
5784 | buffer_info->skb = NULL; | 5771 | buffer_info->skb = NULL; |
5772 | prefetch(skb->data); | ||
5785 | 5773 | ||
5786 | i++; | 5774 | i++; |
5787 | if (i == rx_ring->count) | 5775 | if (i == rx_ring->count) |
@@ -5789,42 +5777,48 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, | |||
5789 | 5777 | ||
5790 | next_rxd = E1000_RX_DESC_ADV(*rx_ring, i); | 5778 | next_rxd = E1000_RX_DESC_ADV(*rx_ring, i); |
5791 | prefetch(next_rxd); | 5779 | prefetch(next_rxd); |
5792 | next_buffer = &rx_ring->buffer_info[i]; | ||
5793 | 5780 | ||
5794 | length = le16_to_cpu(rx_desc->wb.upper.length); | 5781 | /* |
5795 | cleaned = true; | 5782 | * This memory barrier is needed to keep us from reading |
5796 | cleaned_count++; | 5783 | * any other fields out of the rx_desc until we know the |
5784 | * RXD_STAT_DD bit is set | ||
5785 | */ | ||
5786 | rmb(); | ||
5797 | 5787 | ||
5798 | if (buffer_info->dma) { | 5788 | if (!skb_is_nonlinear(skb)) { |
5799 | dma_unmap_single(dev, buffer_info->dma, | 5789 | __skb_put(skb, igb_get_hlen(rx_desc)); |
5790 | dma_unmap_single(rx_ring->dev, buffer_info->dma, | ||
5800 | IGB_RX_HDR_LEN, | 5791 | IGB_RX_HDR_LEN, |
5801 | DMA_FROM_DEVICE); | 5792 | DMA_FROM_DEVICE); |
5802 | buffer_info->dma = 0; | 5793 | buffer_info->dma = 0; |
5803 | skb_put(skb, igb_get_hlen(rx_desc)); | ||
5804 | } | 5794 | } |
5805 | 5795 | ||
5806 | if (length) { | 5796 | if (rx_desc->wb.upper.length) { |
5807 | dma_unmap_page(dev, buffer_info->page_dma, | 5797 | u16 length = le16_to_cpu(rx_desc->wb.upper.length); |
5808 | PAGE_SIZE / 2, DMA_FROM_DEVICE); | ||
5809 | buffer_info->page_dma = 0; | ||
5810 | 5798 | ||
5811 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, | 5799 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, |
5812 | buffer_info->page, | 5800 | buffer_info->page, |
5813 | buffer_info->page_offset, | 5801 | buffer_info->page_offset, |
5814 | length); | 5802 | length); |
5815 | 5803 | ||
5804 | skb->len += length; | ||
5805 | skb->data_len += length; | ||
5806 | skb->truesize += length; | ||
5807 | |||
5816 | if ((page_count(buffer_info->page) != 1) || | 5808 | if ((page_count(buffer_info->page) != 1) || |
5817 | (page_to_nid(buffer_info->page) != current_node)) | 5809 | (page_to_nid(buffer_info->page) != current_node)) |
5818 | buffer_info->page = NULL; | 5810 | buffer_info->page = NULL; |
5819 | else | 5811 | else |
5820 | get_page(buffer_info->page); | 5812 | get_page(buffer_info->page); |
5821 | 5813 | ||
5822 | skb->len += length; | 5814 | dma_unmap_page(rx_ring->dev, buffer_info->page_dma, |
5823 | skb->data_len += length; | 5815 | PAGE_SIZE / 2, DMA_FROM_DEVICE); |
5824 | skb->truesize += length; | 5816 | buffer_info->page_dma = 0; |
5825 | } | 5817 | } |
5826 | 5818 | ||
5827 | if (!(staterr & E1000_RXD_STAT_EOP)) { | 5819 | if (!(staterr & E1000_RXD_STAT_EOP)) { |
5820 | struct igb_buffer *next_buffer; | ||
5821 | next_buffer = &rx_ring->buffer_info[i]; | ||
5828 | buffer_info->skb = next_buffer->skb; | 5822 | buffer_info->skb = next_buffer->skb; |
5829 | buffer_info->dma = next_buffer->dma; | 5823 | buffer_info->dma = next_buffer->dma; |
5830 | next_buffer->skb = skb; | 5824 | next_buffer->skb = skb; |
@@ -5833,7 +5827,7 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, | |||
5833 | } | 5827 | } |
5834 | 5828 | ||
5835 | if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { | 5829 | if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { |
5836 | dev_kfree_skb_irq(skb); | 5830 | dev_kfree_skb_any(skb); |
5837 | goto next_desc; | 5831 | goto next_desc; |
5838 | } | 5832 | } |
5839 | 5833 | ||
@@ -5844,7 +5838,7 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, | |||
5844 | 5838 | ||
5845 | igb_rx_checksum_adv(rx_ring, staterr, skb); | 5839 | igb_rx_checksum_adv(rx_ring, staterr, skb); |
5846 | 5840 | ||
5847 | skb->protocol = eth_type_trans(skb, netdev); | 5841 | skb->protocol = eth_type_trans(skb, rx_ring->netdev); |
5848 | 5842 | ||
5849 | if (staterr & E1000_RXD_STAT_VP) { | 5843 | if (staterr & E1000_RXD_STAT_VP) { |
5850 | u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); | 5844 | u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); |
@@ -5853,7 +5847,12 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, | |||
5853 | } | 5847 | } |
5854 | napi_gro_receive(&q_vector->napi, skb); | 5848 | napi_gro_receive(&q_vector->napi, skb); |
5855 | 5849 | ||
5850 | budget--; | ||
5856 | next_desc: | 5851 | next_desc: |
5852 | if (!budget) | ||
5853 | break; | ||
5854 | |||
5855 | cleaned_count++; | ||
5857 | /* return some buffers to hardware, one at a time is too slow */ | 5856 | /* return some buffers to hardware, one at a time is too slow */ |
5858 | if (cleaned_count >= IGB_RX_BUFFER_WRITE) { | 5857 | if (cleaned_count >= IGB_RX_BUFFER_WRITE) { |
5859 | igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); | 5858 | igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); |
@@ -5862,7 +5861,6 @@ next_desc: | |||
5862 | 5861 | ||
5863 | /* use prefetched values */ | 5862 | /* use prefetched values */ |
5864 | rx_desc = next_rxd; | 5863 | rx_desc = next_rxd; |
5865 | buffer_info = next_buffer; | ||
5866 | staterr = le32_to_cpu(rx_desc->wb.upper.status_error); | 5864 | staterr = le32_to_cpu(rx_desc->wb.upper.status_error); |
5867 | } | 5865 | } |
5868 | 5866 | ||
@@ -5877,7 +5875,7 @@ next_desc: | |||
5877 | if (cleaned_count) | 5875 | if (cleaned_count) |
5878 | igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); | 5876 | igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); |
5879 | 5877 | ||
5880 | return cleaned; | 5878 | return !!budget; |
5881 | } | 5879 | } |
5882 | 5880 | ||
5883 | static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, | 5881 | static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, |