aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJesse Brandeburg <jesse.brandeburg@intel.com>2009-12-03 06:33:29 -0500
committerDavid S. Miller <davem@davemloft.net>2009-12-03 18:43:22 -0500
commit7ca3bc582cd24c2e6c0693a2ba2c71f3c2419c8b (patch)
tree4d49eb3c71f7b42a1007dce4c895acbef6c35454 /drivers
parent6bacb3007928deeb30ddd2a3b6d5011bc7e5a01f (diff)
ixgbe: performance tweaks
drop variables that had cache lines modified in simultaneous hot paths. keep some variables modified on hot paths but make their storage per queue. cache align DMA data buffer start addresses. cache align (padding) some structures that end within a cacheline. Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ixgbe/ixgbe.h20
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c5
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c32
3 files changed, 27 insertions, 30 deletions
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 91d80b731352..8da8eb535084 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -161,10 +161,12 @@ struct ixgbe_ring {
161 unsigned long reinit_state; 161 unsigned long reinit_state;
162 u64 rsc_count; /* stat for coalesced packets */ 162 u64 rsc_count; /* stat for coalesced packets */
163 u64 rsc_flush; /* stats for flushed packets */ 163 u64 rsc_flush; /* stats for flushed packets */
164 u32 restart_queue; /* track tx queue restarts */
165 u32 non_eop_descs; /* track hardware descriptor chaining */
164 166
165 unsigned int size; /* length in bytes */ 167 unsigned int size; /* length in bytes */
166 dma_addr_t dma; /* phys. address of descriptor ring */ 168 dma_addr_t dma; /* phys. address of descriptor ring */
167}; 169} ____cacheline_internodealigned_in_smp;
168 170
169enum ixgbe_ring_f_enum { 171enum ixgbe_ring_f_enum {
170 RING_F_NONE = 0, 172 RING_F_NONE = 0,
@@ -189,7 +191,7 @@ enum ixgbe_ring_f_enum {
189struct ixgbe_ring_feature { 191struct ixgbe_ring_feature {
190 int indices; 192 int indices;
191 int mask; 193 int mask;
192}; 194} ____cacheline_internodealigned_in_smp;
193 195
194#define MAX_RX_QUEUES 128 196#define MAX_RX_QUEUES 128
195#define MAX_TX_QUEUES 128 197#define MAX_TX_QUEUES 128
@@ -275,29 +277,25 @@ struct ixgbe_adapter {
275 u16 eitr_high; 277 u16 eitr_high;
276 278
277 /* TX */ 279 /* TX */
278 struct ixgbe_ring *tx_ring; /* One per active queue */ 280 struct ixgbe_ring *tx_ring ____cacheline_aligned_in_smp; /* One per active queue */
279 int num_tx_queues; 281 int num_tx_queues;
280 u64 restart_queue;
281 u64 hw_csum_tx_good;
282 u64 lsc_int;
283 u64 hw_tso_ctxt;
284 u64 hw_tso6_ctxt;
285 u32 tx_timeout_count; 282 u32 tx_timeout_count;
286 bool detect_tx_hung; 283 bool detect_tx_hung;
287 284
285 u64 restart_queue;
286 u64 lsc_int;
287
288 /* RX */ 288 /* RX */
289 struct ixgbe_ring *rx_ring; /* One per active queue */ 289 struct ixgbe_ring *rx_ring ____cacheline_aligned_in_smp; /* One per active queue */
290 int num_rx_queues; 290 int num_rx_queues;
291 u64 hw_csum_rx_error; 291 u64 hw_csum_rx_error;
292 u64 hw_rx_no_dma_resources; 292 u64 hw_rx_no_dma_resources;
293 u64 hw_csum_rx_good;
294 u64 non_eop_descs; 293 u64 non_eop_descs;
295 int num_msix_vectors; 294 int num_msix_vectors;
296 int max_msix_q_vectors; /* true count of q_vectors for device */ 295 int max_msix_q_vectors; /* true count of q_vectors for device */
297 struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE]; 296 struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
298 struct msix_entry *msix_entries; 297 struct msix_entry *msix_entries;
299 298
300 u64 rx_hdr_split;
301 u32 alloc_rx_page_failed; 299 u32 alloc_rx_page_failed;
302 u32 alloc_rx_buff_failed; 300 u32 alloc_rx_buff_failed;
303 301
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 1928d559e65f..06a9d18bbdbc 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -93,16 +93,11 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
93 {"tx_restart_queue", IXGBE_STAT(restart_queue)}, 93 {"tx_restart_queue", IXGBE_STAT(restart_queue)},
94 {"rx_long_length_errors", IXGBE_STAT(stats.roc)}, 94 {"rx_long_length_errors", IXGBE_STAT(stats.roc)},
95 {"rx_short_length_errors", IXGBE_STAT(stats.ruc)}, 95 {"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
96 {"tx_tcp4_seg_ctxt", IXGBE_STAT(hw_tso_ctxt)},
97 {"tx_tcp6_seg_ctxt", IXGBE_STAT(hw_tso6_ctxt)},
98 {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)}, 96 {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
99 {"rx_flow_control_xon", IXGBE_STAT(stats.lxonrxc)}, 97 {"rx_flow_control_xon", IXGBE_STAT(stats.lxonrxc)},
100 {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)}, 98 {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
101 {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)}, 99 {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
102 {"rx_csum_offload_good", IXGBE_STAT(hw_csum_rx_good)},
103 {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)}, 100 {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
104 {"tx_csum_offload_ctxt", IXGBE_STAT(hw_csum_tx_good)},
105 {"rx_header_split", IXGBE_STAT(rx_hdr_split)},
106 {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)}, 101 {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
107 {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)}, 102 {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
108 {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)}, 103 {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e3dc68ba4b70..db05030a30ec 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -413,7 +413,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
413 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && 413 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
414 !test_bit(__IXGBE_DOWN, &adapter->state)) { 414 !test_bit(__IXGBE_DOWN, &adapter->state)) {
415 netif_wake_subqueue(netdev, tx_ring->queue_index); 415 netif_wake_subqueue(netdev, tx_ring->queue_index);
416 ++adapter->restart_queue; 416 ++tx_ring->restart_queue;
417 } 417 }
418 } 418 }
419 419
@@ -624,7 +624,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter,
624 624
625 /* It must be a TCP or UDP packet with a valid checksum */ 625 /* It must be a TCP or UDP packet with a valid checksum */
626 skb->ip_summed = CHECKSUM_UNNECESSARY; 626 skb->ip_summed = CHECKSUM_UNNECESSARY;
627 adapter->hw_csum_rx_good++;
628} 627}
629 628
630static inline void ixgbe_release_rx_desc(struct ixgbe_hw *hw, 629static inline void ixgbe_release_rx_desc(struct ixgbe_hw *hw,
@@ -681,14 +680,19 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter,
681 680
682 if (!bi->skb) { 681 if (!bi->skb) {
683 struct sk_buff *skb; 682 struct sk_buff *skb;
684 skb = netdev_alloc_skb_ip_align(adapter->netdev, 683 /* netdev_alloc_skb reserves 32 bytes up front!! */
685 rx_ring->rx_buf_len); 684 uint bufsz = rx_ring->rx_buf_len + SMP_CACHE_BYTES;
685 skb = netdev_alloc_skb(adapter->netdev, bufsz);
686 686
687 if (!skb) { 687 if (!skb) {
688 adapter->alloc_rx_buff_failed++; 688 adapter->alloc_rx_buff_failed++;
689 goto no_buffers; 689 goto no_buffers;
690 } 690 }
691 691
692 /* advance the data pointer to the next cache line */
693 skb_reserve(skb, (PTR_ALIGN(skb->data, SMP_CACHE_BYTES)
694 - skb->data));
695
692 bi->skb = skb; 696 bi->skb = skb;
693 bi->dma = pci_map_single(pdev, skb->data, 697 bi->dma = pci_map_single(pdev, skb->data,
694 rx_ring->rx_buf_len, 698 rx_ring->rx_buf_len,
@@ -801,8 +805,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
801 hdr_info = le16_to_cpu(ixgbe_get_hdr_info(rx_desc)); 805 hdr_info = le16_to_cpu(ixgbe_get_hdr_info(rx_desc));
802 len = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >> 806 len = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
803 IXGBE_RXDADV_HDRBUFLEN_SHIFT; 807 IXGBE_RXDADV_HDRBUFLEN_SHIFT;
804 if (hdr_info & IXGBE_RXDADV_SPH)
805 adapter->rx_hdr_split++;
806 if (len > IXGBE_RX_HDR_SIZE) 808 if (len > IXGBE_RX_HDR_SIZE)
807 len = IXGBE_RX_HDR_SIZE; 809 len = IXGBE_RX_HDR_SIZE;
808 upper_len = le16_to_cpu(rx_desc->wb.upper.length); 810 upper_len = le16_to_cpu(rx_desc->wb.upper.length);
@@ -812,7 +814,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
812 814
813 cleaned = true; 815 cleaned = true;
814 skb = rx_buffer_info->skb; 816 skb = rx_buffer_info->skb;
815 prefetch(skb->data - NET_IP_ALIGN); 817 prefetch(skb->data);
816 rx_buffer_info->skb = NULL; 818 rx_buffer_info->skb = NULL;
817 819
818 if (rx_buffer_info->dma) { 820 if (rx_buffer_info->dma) {
@@ -884,7 +886,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
884 skb->next = next_buffer->skb; 886 skb->next = next_buffer->skb;
885 skb->next->prev = skb; 887 skb->next->prev = skb;
886 } 888 }
887 adapter->non_eop_descs++; 889 rx_ring->non_eop_descs++;
888 goto next_desc; 890 goto next_desc;
889 } 891 }
890 892
@@ -4511,6 +4513,13 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
4511 adapter->rsc_total_flush = rsc_flush; 4513 adapter->rsc_total_flush = rsc_flush;
4512 } 4514 }
4513 4515
4516 /* gather some stats to the adapter struct that are per queue */
4517 for (i = 0; i < adapter->num_tx_queues; i++)
4518 adapter->restart_queue += adapter->tx_ring[i].restart_queue;
4519
4520 for (i = 0; i < adapter->num_rx_queues; i++)
4521 adapter->non_eop_descs += adapter->tx_ring[i].non_eop_descs;
4522
4514 adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); 4523 adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
4515 for (i = 0; i < 8; i++) { 4524 for (i = 0; i < 8; i++) {
4516 /* for packet buffers not used, the register should read 0 */ 4525 /* for packet buffers not used, the register should read 0 */
@@ -4893,14 +4902,12 @@ static int ixgbe_tso(struct ixgbe_adapter *adapter,
4893 iph->daddr, 0, 4902 iph->daddr, 0,
4894 IPPROTO_TCP, 4903 IPPROTO_TCP,
4895 0); 4904 0);
4896 adapter->hw_tso_ctxt++;
4897 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) { 4905 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
4898 ipv6_hdr(skb)->payload_len = 0; 4906 ipv6_hdr(skb)->payload_len = 0;
4899 tcp_hdr(skb)->check = 4907 tcp_hdr(skb)->check =
4900 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 4908 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4901 &ipv6_hdr(skb)->daddr, 4909 &ipv6_hdr(skb)->daddr,
4902 0, IPPROTO_TCP, 0); 4910 0, IPPROTO_TCP, 0);
4903 adapter->hw_tso6_ctxt++;
4904 } 4911 }
4905 4912
4906 i = tx_ring->next_to_use; 4913 i = tx_ring->next_to_use;
@@ -5019,7 +5026,6 @@ static bool ixgbe_tx_csum(struct ixgbe_adapter *adapter,
5019 tx_buffer_info->time_stamp = jiffies; 5026 tx_buffer_info->time_stamp = jiffies;
5020 tx_buffer_info->next_to_watch = i; 5027 tx_buffer_info->next_to_watch = i;
5021 5028
5022 adapter->hw_csum_tx_good++;
5023 i++; 5029 i++;
5024 if (i == tx_ring->count) 5030 if (i == tx_ring->count)
5025 i = 0; 5031 i = 0;
@@ -5256,8 +5262,6 @@ static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb,
5256static int __ixgbe_maybe_stop_tx(struct net_device *netdev, 5262static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
5257 struct ixgbe_ring *tx_ring, int size) 5263 struct ixgbe_ring *tx_ring, int size)
5258{ 5264{
5259 struct ixgbe_adapter *adapter = netdev_priv(netdev);
5260
5261 netif_stop_subqueue(netdev, tx_ring->queue_index); 5265 netif_stop_subqueue(netdev, tx_ring->queue_index);
5262 /* Herbert's original patch had: 5266 /* Herbert's original patch had:
5263 * smp_mb__after_netif_stop_queue(); 5267 * smp_mb__after_netif_stop_queue();
@@ -5271,7 +5275,7 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
5271 5275
5272 /* A reprieve! - use start_queue because it doesn't call schedule */ 5276 /* A reprieve! - use start_queue because it doesn't call schedule */
5273 netif_start_subqueue(netdev, tx_ring->queue_index); 5277 netif_start_subqueue(netdev, tx_ring->queue_index);
5274 ++adapter->restart_queue; 5278 ++tx_ring->restart_queue;
5275 return 0; 5279 return 0;
5276} 5280}
5277 5281