aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJacob Keller <jacob.e.keller@intel.com>2017-08-29 05:32:42 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2017-10-06 11:11:32 -0400
commita5340d933e3cd7829a24bacc156dd1e475a1ae2c (patch)
treeefaf663628170a4cb16edf3f3dae457a39a8df1c
parent0a3b4f702fb1f76b03530d58af9efc5e10392185 (diff)
i40e: ignore skb->xmit_more when deciding to set RS bit
Since commit 6a7fded776a7 ("i40e: Fix RS bit update in Tx path and disable force WB workaround") we've tried to "optimize" setting the RS bit based around skb->xmit_more. This same logic was refactored in commit 1dc8b538795f ("i40e: Reorder logic for coalescing RS bits"), but ultimately was not functionally changed. Using skb->xmit_more in this way is incorrect, because in certain circumstances we may see a large number of skbs in sequence with xmit_more set. This leads to a performance loss as the hardware does not writeback anything for those packets, which delays the time it takes for us to respond to the stack transmit requests. This significantly impacts UDP performance, especially when layered with multiple devices, such as bonding, VLANs, and vnet setups. This was not noticed until now because it is difficult to create a setup which reproduces the issue. It was discovered in a UDP_STREAM test in a VM, connected using a vnet device to a bridge, which is connected to a bonded pair of X710 ports in active-backup mode with a VLAN. These layered devices seem to compound the number of skbs transmitted at once by the qdisc. Additionally, the problem can be masked by reducing the ITR value. Since the original commit does not provide strong justification for this RS bit "optimization", revert to the previous behavior of setting the RS bit every 4th packet. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c34
1 files changed, 4 insertions, 30 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index d9fdf69bbc6e..3bd176606c09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3167,38 +3167,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
3167 /* write last descriptor with EOP bit */ 3167 /* write last descriptor with EOP bit */
3168 td_cmd |= I40E_TX_DESC_CMD_EOP; 3168 td_cmd |= I40E_TX_DESC_CMD_EOP;
3169 3169
3170 /* We can OR these values together as they both are checked against 3170 /* We OR these values together to check both against 4 (WB_STRIDE)
3171 * 4 below and at this point desc_count will be used as a boolean value 3171 * below. This is safe since we don't re-use desc_count afterwards.
3172 * after this if/else block.
3173 */ 3172 */
3174 desc_count |= ++tx_ring->packet_stride; 3173 desc_count |= ++tx_ring->packet_stride;
3175 3174
3176 /* Algorithm to optimize tail and RS bit setting: 3175 if (desc_count >= WB_STRIDE) {
3177 * if queue is stopped
3178 * mark RS bit
3179 * reset packet counter
3180 * else if xmit_more is supported and is true
3181 * advance packet counter to 4
3182 * reset desc_count to 0
3183 *
3184 * if desc_count >= 4
3185 * mark RS bit
3186 * reset packet counter
3187 * if desc_count > 0
3188 * update tail
3189 *
3190 * Note: If there are less than 4 descriptors
3191 * pending and interrupts were disabled the service task will
3192 * trigger a force WB.
3193 */
3194 if (netif_xmit_stopped(txring_txq(tx_ring))) {
3195 goto do_rs;
3196 } else if (skb->xmit_more) {
3197 /* set stride to arm on next packet and reset desc_count */
3198 tx_ring->packet_stride = WB_STRIDE;
3199 desc_count = 0;
3200 } else if (desc_count >= WB_STRIDE) {
3201do_rs:
3202 /* write last descriptor with RS bit set */ 3176 /* write last descriptor with RS bit set */
3203 td_cmd |= I40E_TX_DESC_CMD_RS; 3177 td_cmd |= I40E_TX_DESC_CMD_RS;
3204 tx_ring->packet_stride = 0; 3178 tx_ring->packet_stride = 0;
@@ -3219,7 +3193,7 @@ do_rs:
3219 first->next_to_watch = tx_desc; 3193 first->next_to_watch = tx_desc;
3220 3194
3221 /* notify HW of packet */ 3195 /* notify HW of packet */
3222 if (desc_count) { 3196 if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
3223 writel(i, tx_ring->tail); 3197 writel(i, tx_ring->tail);
3224 3198
3225 /* we need this if more than one processor can write to our tail 3199 /* we need this if more than one processor can write to our tail