aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Brandeburg <jesse.brandeburg@intel.com>2014-02-13 21:14:40 -0500
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2014-03-19 20:17:21 -0400
commit1943d8ba9507d49fa5cdb51eb1b63810d94e1969 (patch)
tree7fe5b89bf44b8c5ec8c4f3177b0159073c94a4e8
parent6c167f582ea93d0b66a187ec06a7c015fecd723a (diff)
i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor ring by marking each descriptor with a DD bit, and instead writes a memory location with an update to where the driver should clean up to. Enable this feature. Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c46
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c3
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c46
4 files changed, 88 insertions, 12 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 661cd427bce6..95433d4d2f6a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
2181 tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED | 2181 tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
2182 I40E_FLAG_FD_ATR_ENABLED)); 2182 I40E_FLAG_FD_ATR_ENABLED));
2183 tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP); 2183 tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
2184 /* FDIR VSI tx ring can still use RS bit and writebacks */
2185 if (vsi->type != I40E_VSI_FDIR)
2186 tx_ctx.head_wb_ena = 1;
2187 tx_ctx.head_wb_addr = ring->dma +
2188 (ring->count * sizeof(struct i40e_tx_desc));
2184 2189
2185 /* As part of VSI creation/update, FW allocates certain 2190 /* As part of VSI creation/update, FW allocates certain
2186 * Tx arbitration queue sets for each TC enabled for 2191 * Tx arbitration queue sets for each TC enabled for
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 88666adb0743..079c6b2bafc3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -619,6 +619,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
619} 619}
620 620
621/** 621/**
622 * i40e_get_head - Retrieve head from head writeback
623 * @tx_ring: tx ring to fetch head of
624 *
625 * Returns value of Tx ring head based on value stored
626 * in head write-back location
627 **/
628static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
629{
630 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
631
632 return le32_to_cpu(*(volatile __le32 *)head);
633}
634
635/**
622 * i40e_clean_tx_irq - Reclaim resources after transmit completes 636 * i40e_clean_tx_irq - Reclaim resources after transmit completes
623 * @tx_ring: tx ring to clean 637 * @tx_ring: tx ring to clean
624 * @budget: how many cleans we're allowed 638 * @budget: how many cleans we're allowed
@@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
629{ 643{
630 u16 i = tx_ring->next_to_clean; 644 u16 i = tx_ring->next_to_clean;
631 struct i40e_tx_buffer *tx_buf; 645 struct i40e_tx_buffer *tx_buf;
646 struct i40e_tx_desc *tx_head;
632 struct i40e_tx_desc *tx_desc; 647 struct i40e_tx_desc *tx_desc;
633 unsigned int total_packets = 0; 648 unsigned int total_packets = 0;
634 unsigned int total_bytes = 0; 649 unsigned int total_bytes = 0;
@@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
637 tx_desc = I40E_TX_DESC(tx_ring, i); 652 tx_desc = I40E_TX_DESC(tx_ring, i);
638 i -= tx_ring->count; 653 i -= tx_ring->count;
639 654
655 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
656
640 do { 657 do {
641 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 658 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
642 659
@@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
647 /* prevent any other reads prior to eop_desc */ 664 /* prevent any other reads prior to eop_desc */
648 read_barrier_depends(); 665 read_barrier_depends();
649 666
650 /* if the descriptor isn't done, no work yet to do */ 667 /* we have caught up to head, no work left to do */
651 if (!(eop_desc->cmd_type_offset_bsz & 668 if (tx_head == tx_desc)
652 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
653 break; 669 break;
654 670
655 /* clear next_to_watch to prevent false hangs */ 671 /* clear next_to_watch to prevent false hangs */
@@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
905 921
906 /* round up to nearest 4K */ 922 /* round up to nearest 4K */
907 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 923 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
924 /* add u32 for head writeback, align after this takes care of
925 * guaranteeing this is at least one cache line in size
926 */
927 tx_ring->size += sizeof(u32);
908 tx_ring->size = ALIGN(tx_ring->size, 4096); 928 tx_ring->size = ALIGN(tx_ring->size, 4096);
909 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 929 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
910 &tx_ring->dma, GFP_KERNEL); 930 &tx_ring->dma, GFP_KERNEL);
@@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2042 tx_bi = &tx_ring->tx_bi[i]; 2062 tx_bi = &tx_ring->tx_bi[i];
2043 } 2063 }
2044 2064
2045 tx_desc->cmd_type_offset_bsz = 2065 /* Place RS bit on last descriptor of any packet that spans across the
2046 build_ctob(td_cmd, td_offset, size, td_tag) | 2066 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2047 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); 2067 */
2068#define WB_STRIDE 0x3
2069 if (((i & WB_STRIDE) != WB_STRIDE) &&
2070 (first <= &tx_ring->tx_bi[i]) &&
2071 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2072 tx_desc->cmd_type_offset_bsz =
2073 build_ctob(td_cmd, td_offset, size, td_tag) |
2074 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2075 I40E_TXD_QW1_CMD_SHIFT);
2076 } else {
2077 tx_desc->cmd_type_offset_bsz =
2078 build_ctob(td_cmd, td_offset, size, td_tag) |
2079 cpu_to_le64((u64)I40E_TXD_CMD <<
2080 I40E_TXD_QW1_CMD_SHIFT);
2081 }
2048 2082
2049 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 2083 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2050 tx_ring->queue_index), 2084 tx_ring->queue_index),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 2086a62062c2..b2da079cd696 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
230 tx_ctx.qlen = info->ring_len; 230 tx_ctx.qlen = info->ring_len;
231 tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]); 231 tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
232 tx_ctx.rdylist_act = 0; 232 tx_ctx.rdylist_act = 0;
233 tx_ctx.head_wb_ena = 1;
234 tx_ctx.head_wb_addr = info->dma_ring_addr +
235 (info->ring_len * sizeof(struct i40e_tx_desc));
233 236
234 /* clear the context in the HMC */ 237 /* clear the context in the HMC */
235 ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id); 238 ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index b1d87c6a5c35..626c08a98edb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -170,6 +170,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
170} 170}
171 171
172/** 172/**
173 * i40e_get_head - Retrieve head from head writeback
174 * @tx_ring: tx ring to fetch head of
175 *
176 * Returns value of Tx ring head based on value stored
177 * in head write-back location
178 **/
179static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
180{
181 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
182
183 return le32_to_cpu(*(volatile __le32 *)head);
184}
185
186/**
173 * i40e_clean_tx_irq - Reclaim resources after transmit completes 187 * i40e_clean_tx_irq - Reclaim resources after transmit completes
174 * @tx_ring: tx ring to clean 188 * @tx_ring: tx ring to clean
175 * @budget: how many cleans we're allowed 189 * @budget: how many cleans we're allowed
@@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
180{ 194{
181 u16 i = tx_ring->next_to_clean; 195 u16 i = tx_ring->next_to_clean;
182 struct i40e_tx_buffer *tx_buf; 196 struct i40e_tx_buffer *tx_buf;
197 struct i40e_tx_desc *tx_head;
183 struct i40e_tx_desc *tx_desc; 198 struct i40e_tx_desc *tx_desc;
184 unsigned int total_packets = 0; 199 unsigned int total_packets = 0;
185 unsigned int total_bytes = 0; 200 unsigned int total_bytes = 0;
@@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
188 tx_desc = I40E_TX_DESC(tx_ring, i); 203 tx_desc = I40E_TX_DESC(tx_ring, i);
189 i -= tx_ring->count; 204 i -= tx_ring->count;
190 205
206 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
207
191 do { 208 do {
192 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 209 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
193 210
@@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
198 /* prevent any other reads prior to eop_desc */ 215 /* prevent any other reads prior to eop_desc */
199 read_barrier_depends(); 216 read_barrier_depends();
200 217
201 /* if the descriptor isn't done, no work yet to do */ 218 /* we have caught up to head, no work left to do */
202 if (!(eop_desc->cmd_type_offset_bsz & 219 if (tx_head == tx_desc)
203 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
204 break; 220 break;
205 221
206 /* clear next_to_watch to prevent false hangs */ 222 /* clear next_to_watch to prevent false hangs */
@@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
432 448
433 /* round up to nearest 4K */ 449 /* round up to nearest 4K */
434 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 450 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
451 /* add u32 for head writeback, align after this takes care of
452 * guaranteeing this is at least one cache line in size
453 */
454 tx_ring->size += sizeof(u32);
435 tx_ring->size = ALIGN(tx_ring->size, 4096); 455 tx_ring->size = ALIGN(tx_ring->size, 4096);
436 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 456 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
437 &tx_ring->dma, GFP_KERNEL); 457 &tx_ring->dma, GFP_KERNEL);
@@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1377 tx_bi = &tx_ring->tx_bi[i]; 1397 tx_bi = &tx_ring->tx_bi[i];
1378 } 1398 }
1379 1399
1380 tx_desc->cmd_type_offset_bsz = 1400 /* Place RS bit on last descriptor of any packet that spans across the
1381 build_ctob(td_cmd, td_offset, size, td_tag) | 1401 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
1382 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); 1402 */
1403#define WB_STRIDE 0x3
1404 if (((i & WB_STRIDE) != WB_STRIDE) &&
1405 (first <= &tx_ring->tx_bi[i]) &&
1406 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
1407 tx_desc->cmd_type_offset_bsz =
1408 build_ctob(td_cmd, td_offset, size, td_tag) |
1409 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
1410 I40E_TXD_QW1_CMD_SHIFT);
1411 } else {
1412 tx_desc->cmd_type_offset_bsz =
1413 build_ctob(td_cmd, td_offset, size, td_tag) |
1414 cpu_to_le64((u64)I40E_TXD_CMD <<
1415 I40E_TXD_QW1_CMD_SHIFT);
1416 }
1383 1417
1384 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 1418 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
1385 tx_ring->queue_index), 1419 tx_ring->queue_index),