aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEran Ben Elisha <eranbe@mellanox.com>2018-01-16 10:25:06 -0500
committerSaeed Mahameed <saeedm@mellanox.com>2018-03-27 20:17:27 -0400
commitbfc647d52e67dc756c605e9a50d45b71054c2533 (patch)
tree7b924b7b877f6bd1cfab9efea1be380f56c1346e
parentc4554fbccaa3306f65954ed0f1dab7abce7889f8 (diff)
net/mlx5e: Move all TX timeout logic to be under state lock
Driver callback for handling TX timeout should access some internal resources (SQ, CQ) in order to decide if the tx timeout work should be scheduled. These resources might be unavailable if channels are closed in parallel (ifdown for example). The state lock is the mechanism to protect from such races. Move all TX timeout logic to be in the work under a state lock. In addition, Move the work from the global WQ to mlx5e WQ to make sure this work is flushed when device is detached.. Also, move the mlx5e_tx_timeout_work code to be next to the TX timeout NDO for better code locality. Fixes: 3947ca185999 ("net/mlx5e: Implement ndo_tx_timeout callback") Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c61
1 files changed, 34 insertions, 27 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5d8eb0a9c0f0..e0b75f52d556 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -177,26 +177,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
177 mutex_unlock(&priv->state_lock); 177 mutex_unlock(&priv->state_lock);
178} 178}
179 179
180static void mlx5e_tx_timeout_work(struct work_struct *work)
181{
182 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
183 tx_timeout_work);
184 int err;
185
186 rtnl_lock();
187 mutex_lock(&priv->state_lock);
188 if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
189 goto unlock;
190 mlx5e_close_locked(priv->netdev);
191 err = mlx5e_open_locked(priv->netdev);
192 if (err)
193 netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
194 err);
195unlock:
196 mutex_unlock(&priv->state_lock);
197 rtnl_unlock();
198}
199
200void mlx5e_update_stats(struct mlx5e_priv *priv) 180void mlx5e_update_stats(struct mlx5e_priv *priv)
201{ 181{
202 int i; 182 int i;
@@ -3658,13 +3638,19 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
3658 return true; 3638 return true;
3659} 3639}
3660 3640
3661static void mlx5e_tx_timeout(struct net_device *dev) 3641static void mlx5e_tx_timeout_work(struct work_struct *work)
3662{ 3642{
3663 struct mlx5e_priv *priv = netdev_priv(dev); 3643 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
3644 tx_timeout_work);
3645 struct net_device *dev = priv->netdev;
3664 bool reopen_channels = false; 3646 bool reopen_channels = false;
3665 int i; 3647 int i, err;
3666 3648
3667 netdev_err(dev, "TX timeout detected\n"); 3649 rtnl_lock();
3650 mutex_lock(&priv->state_lock);
3651
3652 if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3653 goto unlock;
3668 3654
3669 for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { 3655 for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
3670 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); 3656 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
@@ -3672,7 +3658,9 @@ static void mlx5e_tx_timeout(struct net_device *dev)
3672 3658
3673 if (!netif_xmit_stopped(dev_queue)) 3659 if (!netif_xmit_stopped(dev_queue))
3674 continue; 3660 continue;
3675 netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", 3661
3662 netdev_err(dev,
3663 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
3676 i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 3664 i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
3677 jiffies_to_usecs(jiffies - dev_queue->trans_start)); 3665 jiffies_to_usecs(jiffies - dev_queue->trans_start));
3678 3666
@@ -3685,8 +3673,27 @@ static void mlx5e_tx_timeout(struct net_device *dev)
3685 } 3673 }
3686 } 3674 }
3687 3675
3688 if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state)) 3676 if (!reopen_channels)
3689 schedule_work(&priv->tx_timeout_work); 3677 goto unlock;
3678
3679 mlx5e_close_locked(dev);
3680 err = mlx5e_open_locked(dev);
3681 if (err)
3682 netdev_err(priv->netdev,
3683 "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
3684 err);
3685
3686unlock:
3687 mutex_unlock(&priv->state_lock);
3688 rtnl_unlock();
3689}
3690
3691static void mlx5e_tx_timeout(struct net_device *dev)
3692{
3693 struct mlx5e_priv *priv = netdev_priv(dev);
3694
3695 netdev_err(dev, "TX timeout detected\n");
3696 queue_work(priv->wq, &priv->tx_timeout_work);
3690} 3697}
3691 3698
3692static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) 3699static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)