diff options
author | Jesse Brandeburg <jesse.brandeburg@intel.com> | 2014-02-13 21:14:40 -0500 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2014-03-19 20:17:21 -0400 |
commit | 1943d8ba9507d49fa5cdb51eb1b63810d94e1969 (patch) | |
tree | 7fe5b89bf44b8c5ec8c4f3177b0159073c94a4e8 | |
parent | 6c167f582ea93d0b66a187ec06a7c015fecd723a (diff) |
i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor
ring by marking each descriptor with a DD bit, and instead
writes a memory location with an update to where the driver
should clean up to. Enable this feature.
Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 46 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 46 |
4 files changed, 88 insertions, 12 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 661cd427bce6..95433d4d2f6a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c | |||
@@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) | |||
2181 | tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED | | 2181 | tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED | |
2182 | I40E_FLAG_FD_ATR_ENABLED)); | 2182 | I40E_FLAG_FD_ATR_ENABLED)); |
2183 | tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP); | 2183 | tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP); |
2184 | /* FDIR VSI tx ring can still use RS bit and writebacks */ | ||
2185 | if (vsi->type != I40E_VSI_FDIR) | ||
2186 | tx_ctx.head_wb_ena = 1; | ||
2187 | tx_ctx.head_wb_addr = ring->dma + | ||
2188 | (ring->count * sizeof(struct i40e_tx_desc)); | ||
2184 | 2189 | ||
2185 | /* As part of VSI creation/update, FW allocates certain | 2190 | /* As part of VSI creation/update, FW allocates certain |
2186 | * Tx arbitration queue sets for each TC enabled for | 2191 | * Tx arbitration queue sets for each TC enabled for |
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 88666adb0743..079c6b2bafc3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c | |||
@@ -619,6 +619,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) | |||
619 | } | 619 | } |
620 | 620 | ||
621 | /** | 621 | /** |
622 | * i40e_get_head - Retrieve head from head writeback | ||
623 | * @tx_ring: tx ring to fetch head of | ||
624 | * | ||
625 | * Returns value of Tx ring head based on value stored | ||
626 | * in head write-back location | ||
627 | **/ | ||
628 | static inline u32 i40e_get_head(struct i40e_ring *tx_ring) | ||
629 | { | ||
630 | void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; | ||
631 | |||
632 | return le32_to_cpu(*(volatile __le32 *)head); | ||
633 | } | ||
634 | |||
635 | /** | ||
622 | * i40e_clean_tx_irq - Reclaim resources after transmit completes | 636 | * i40e_clean_tx_irq - Reclaim resources after transmit completes |
623 | * @tx_ring: tx ring to clean | 637 | * @tx_ring: tx ring to clean |
624 | * @budget: how many cleans we're allowed | 638 | * @budget: how many cleans we're allowed |
@@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
629 | { | 643 | { |
630 | u16 i = tx_ring->next_to_clean; | 644 | u16 i = tx_ring->next_to_clean; |
631 | struct i40e_tx_buffer *tx_buf; | 645 | struct i40e_tx_buffer *tx_buf; |
646 | struct i40e_tx_desc *tx_head; | ||
632 | struct i40e_tx_desc *tx_desc; | 647 | struct i40e_tx_desc *tx_desc; |
633 | unsigned int total_packets = 0; | 648 | unsigned int total_packets = 0; |
634 | unsigned int total_bytes = 0; | 649 | unsigned int total_bytes = 0; |
@@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
637 | tx_desc = I40E_TX_DESC(tx_ring, i); | 652 | tx_desc = I40E_TX_DESC(tx_ring, i); |
638 | i -= tx_ring->count; | 653 | i -= tx_ring->count; |
639 | 654 | ||
655 | tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); | ||
656 | |||
640 | do { | 657 | do { |
641 | struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; | 658 | struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; |
642 | 659 | ||
@@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
647 | /* prevent any other reads prior to eop_desc */ | 664 | /* prevent any other reads prior to eop_desc */ |
648 | read_barrier_depends(); | 665 | read_barrier_depends(); |
649 | 666 | ||
650 | /* if the descriptor isn't done, no work yet to do */ | 667 | /* we have caught up to head, no work left to do */ |
651 | if (!(eop_desc->cmd_type_offset_bsz & | 668 | if (tx_head == tx_desc) |
652 | cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) | ||
653 | break; | 669 | break; |
654 | 670 | ||
655 | /* clear next_to_watch to prevent false hangs */ | 671 | /* clear next_to_watch to prevent false hangs */ |
@@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) | |||
905 | 921 | ||
906 | /* round up to nearest 4K */ | 922 | /* round up to nearest 4K */ |
907 | tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); | 923 | tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); |
924 | /* add u32 for head writeback, align after this takes care of | ||
925 | * guaranteeing this is at least one cache line in size | ||
926 | */ | ||
927 | tx_ring->size += sizeof(u32); | ||
908 | tx_ring->size = ALIGN(tx_ring->size, 4096); | 928 | tx_ring->size = ALIGN(tx_ring->size, 4096); |
909 | tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, | 929 | tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, |
910 | &tx_ring->dma, GFP_KERNEL); | 930 | &tx_ring->dma, GFP_KERNEL); |
@@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, | |||
2042 | tx_bi = &tx_ring->tx_bi[i]; | 2062 | tx_bi = &tx_ring->tx_bi[i]; |
2043 | } | 2063 | } |
2044 | 2064 | ||
2045 | tx_desc->cmd_type_offset_bsz = | 2065 | /* Place RS bit on last descriptor of any packet that spans across the |
2046 | build_ctob(td_cmd, td_offset, size, td_tag) | | 2066 | * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. |
2047 | cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); | 2067 | */ |
2068 | #define WB_STRIDE 0x3 | ||
2069 | if (((i & WB_STRIDE) != WB_STRIDE) && | ||
2070 | (first <= &tx_ring->tx_bi[i]) && | ||
2071 | (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { | ||
2072 | tx_desc->cmd_type_offset_bsz = | ||
2073 | build_ctob(td_cmd, td_offset, size, td_tag) | | ||
2074 | cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << | ||
2075 | I40E_TXD_QW1_CMD_SHIFT); | ||
2076 | } else { | ||
2077 | tx_desc->cmd_type_offset_bsz = | ||
2078 | build_ctob(td_cmd, td_offset, size, td_tag) | | ||
2079 | cpu_to_le64((u64)I40E_TXD_CMD << | ||
2080 | I40E_TXD_QW1_CMD_SHIFT); | ||
2081 | } | ||
2048 | 2082 | ||
2049 | netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, | 2083 | netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, |
2050 | tx_ring->queue_index), | 2084 | tx_ring->queue_index), |
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2086a62062c2..b2da079cd696 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | |||
@@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx, | |||
230 | tx_ctx.qlen = info->ring_len; | 230 | tx_ctx.qlen = info->ring_len; |
231 | tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]); | 231 | tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]); |
232 | tx_ctx.rdylist_act = 0; | 232 | tx_ctx.rdylist_act = 0; |
233 | tx_ctx.head_wb_ena = 1; | ||
234 | tx_ctx.head_wb_addr = info->dma_ring_addr + | ||
235 | (info->ring_len * sizeof(struct i40e_tx_desc)); | ||
233 | 236 | ||
234 | /* clear the context in the HMC */ | 237 | /* clear the context in the HMC */ |
235 | ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id); | 238 | ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id); |
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index b1d87c6a5c35..626c08a98edb 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c | |||
@@ -170,6 +170,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) | |||
170 | } | 170 | } |
171 | 171 | ||
172 | /** | 172 | /** |
173 | * i40e_get_head - Retrieve head from head writeback | ||
174 | * @tx_ring: tx ring to fetch head of | ||
175 | * | ||
176 | * Returns value of Tx ring head based on value stored | ||
177 | * in head write-back location | ||
178 | **/ | ||
179 | static inline u32 i40e_get_head(struct i40e_ring *tx_ring) | ||
180 | { | ||
181 | void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; | ||
182 | |||
183 | return le32_to_cpu(*(volatile __le32 *)head); | ||
184 | } | ||
185 | |||
186 | /** | ||
173 | * i40e_clean_tx_irq - Reclaim resources after transmit completes | 187 | * i40e_clean_tx_irq - Reclaim resources after transmit completes |
174 | * @tx_ring: tx ring to clean | 188 | * @tx_ring: tx ring to clean |
175 | * @budget: how many cleans we're allowed | 189 | * @budget: how many cleans we're allowed |
@@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
180 | { | 194 | { |
181 | u16 i = tx_ring->next_to_clean; | 195 | u16 i = tx_ring->next_to_clean; |
182 | struct i40e_tx_buffer *tx_buf; | 196 | struct i40e_tx_buffer *tx_buf; |
197 | struct i40e_tx_desc *tx_head; | ||
183 | struct i40e_tx_desc *tx_desc; | 198 | struct i40e_tx_desc *tx_desc; |
184 | unsigned int total_packets = 0; | 199 | unsigned int total_packets = 0; |
185 | unsigned int total_bytes = 0; | 200 | unsigned int total_bytes = 0; |
@@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
188 | tx_desc = I40E_TX_DESC(tx_ring, i); | 203 | tx_desc = I40E_TX_DESC(tx_ring, i); |
189 | i -= tx_ring->count; | 204 | i -= tx_ring->count; |
190 | 205 | ||
206 | tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); | ||
207 | |||
191 | do { | 208 | do { |
192 | struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; | 209 | struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; |
193 | 210 | ||
@@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) | |||
198 | /* prevent any other reads prior to eop_desc */ | 215 | /* prevent any other reads prior to eop_desc */ |
199 | read_barrier_depends(); | 216 | read_barrier_depends(); |
200 | 217 | ||
201 | /* if the descriptor isn't done, no work yet to do */ | 218 | /* we have caught up to head, no work left to do */ |
202 | if (!(eop_desc->cmd_type_offset_bsz & | 219 | if (tx_head == tx_desc) |
203 | cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) | ||
204 | break; | 220 | break; |
205 | 221 | ||
206 | /* clear next_to_watch to prevent false hangs */ | 222 | /* clear next_to_watch to prevent false hangs */ |
@@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) | |||
432 | 448 | ||
433 | /* round up to nearest 4K */ | 449 | /* round up to nearest 4K */ |
434 | tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); | 450 | tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); |
451 | /* add u32 for head writeback, align after this takes care of | ||
452 | * guaranteeing this is at least one cache line in size | ||
453 | */ | ||
454 | tx_ring->size += sizeof(u32); | ||
435 | tx_ring->size = ALIGN(tx_ring->size, 4096); | 455 | tx_ring->size = ALIGN(tx_ring->size, 4096); |
436 | tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, | 456 | tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, |
437 | &tx_ring->dma, GFP_KERNEL); | 457 | &tx_ring->dma, GFP_KERNEL); |
@@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, | |||
1377 | tx_bi = &tx_ring->tx_bi[i]; | 1397 | tx_bi = &tx_ring->tx_bi[i]; |
1378 | } | 1398 | } |
1379 | 1399 | ||
1380 | tx_desc->cmd_type_offset_bsz = | 1400 | /* Place RS bit on last descriptor of any packet that spans across the |
1381 | build_ctob(td_cmd, td_offset, size, td_tag) | | 1401 | * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. |
1382 | cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); | 1402 | */ |
1403 | #define WB_STRIDE 0x3 | ||
1404 | if (((i & WB_STRIDE) != WB_STRIDE) && | ||
1405 | (first <= &tx_ring->tx_bi[i]) && | ||
1406 | (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { | ||
1407 | tx_desc->cmd_type_offset_bsz = | ||
1408 | build_ctob(td_cmd, td_offset, size, td_tag) | | ||
1409 | cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << | ||
1410 | I40E_TXD_QW1_CMD_SHIFT); | ||
1411 | } else { | ||
1412 | tx_desc->cmd_type_offset_bsz = | ||
1413 | build_ctob(td_cmd, td_offset, size, td_tag) | | ||
1414 | cpu_to_le64((u64)I40E_TXD_CMD << | ||
1415 | I40E_TXD_QW1_CMD_SHIFT); | ||
1416 | } | ||
1383 | 1417 | ||
1384 | netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, | 1418 | netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, |
1385 | tx_ring->queue_index), | 1419 | tx_ring->queue_index), |