aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorMohamad Haj Yahia <mohamad@mellanox.com>2017-02-23 04:19:36 -0500
committerSaeed Mahameed <saeedm@mellanox.com>2017-05-23 09:23:31 -0400
commit73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 (patch)
tree95b44e558e85cb07532eca3ae3f0644d9ae30f59 /drivers/net
parentb57fe691961cc8f00541f9a435c70df45d41e514 (diff)
net/mlx5: Avoid using pending command interface slots
Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com> Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
3 files changed, 38 insertions, 7 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d545b2..10d282841f5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
774 mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", 774 mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
775 mlx5_command_str(msg_to_opcode(ent->in)), 775 mlx5_command_str(msg_to_opcode(ent->in)),
776 msg_to_opcode(ent->in)); 776 msg_to_opcode(ent->in));
777 mlx5_cmd_comp_handler(dev, 1UL << ent->idx); 777 mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
778} 778}
779 779
780static void cmd_work_handler(struct work_struct *work) 780static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
804 } 804 }
805 805
806 cmd->ent_arr[ent->idx] = ent; 806 cmd->ent_arr[ent->idx] = ent;
807 set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
807 lay = get_inst(cmd, ent->idx); 808 lay = get_inst(cmd, ent->idx);
808 ent->lay = lay; 809 ent->lay = lay;
809 memset(lay, 0, sizeof(*lay)); 810 memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
825 if (ent->callback) 826 if (ent->callback)
826 schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); 827 schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
827 828
829 /* Skip sending command to fw if internal error */
830 if (pci_channel_offline(dev->pdev) ||
831 dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
832 u8 status = 0;
833 u32 drv_synd;
834
835 ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
836 MLX5_SET(mbox_out, ent->out, status, status);
837 MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
838
839 mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
840 return;
841 }
842
828 /* ring doorbell after the descriptor is valid */ 843 /* ring doorbell after the descriptor is valid */
829 mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); 844 mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
830 wmb(); 845 wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
835 poll_timeout(ent); 850 poll_timeout(ent);
836 /* make sure we read the descriptor after ownership is SW */ 851 /* make sure we read the descriptor after ownership is SW */
837 rmb(); 852 rmb();
838 mlx5_cmd_comp_handler(dev, 1UL << ent->idx); 853 mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
839 } 854 }
840} 855}
841 856
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
879 wait_for_completion(&ent->done); 894 wait_for_completion(&ent->done);
880 } else if (!wait_for_completion_timeout(&ent->done, timeout)) { 895 } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
881 ent->ret = -ETIMEDOUT; 896 ent->ret = -ETIMEDOUT;
882 mlx5_cmd_comp_handler(dev, 1UL << ent->idx); 897 mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
883 } 898 }
884 899
885 err = ent->ret; 900 err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
1375 } 1390 }
1376} 1391}
1377 1392
1378void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) 1393void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
1379{ 1394{
1380 struct mlx5_cmd *cmd = &dev->cmd; 1395 struct mlx5_cmd *cmd = &dev->cmd;
1381 struct mlx5_cmd_work_ent *ent; 1396 struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
1395 struct semaphore *sem; 1410 struct semaphore *sem;
1396 1411
1397 ent = cmd->ent_arr[i]; 1412 ent = cmd->ent_arr[i];
1413
1414 /* if we already completed the command, ignore it */
1415 if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
1416 &ent->state)) {
1417 /* only real completion can free the cmd slot */
1418 if (!forced) {
1419 mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
1420 ent->idx);
1421 free_ent(cmd, ent->idx);
1422 }
1423 continue;
1424 }
1425
1398 if (ent->callback) 1426 if (ent->callback)
1399 cancel_delayed_work(&ent->cb_timeout_work); 1427 cancel_delayed_work(&ent->cb_timeout_work);
1400 if (ent->page_queue) 1428 if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
1417 mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", 1445 mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
1418 ent->ret, deliv_status_to_str(ent->status), ent->status); 1446 ent->ret, deliv_status_to_str(ent->status), ent->status);
1419 } 1447 }
1420 free_ent(cmd, ent->idx); 1448
1449 /* only real completion will free the entry slot */
1450 if (!forced)
1451 free_ent(cmd, ent->idx);
1421 1452
1422 if (ent->callback) { 1453 if (ent->callback) {
1423 ds = ent->ts2 - ent->ts1; 1454 ds = ent->ts2 - ent->ts1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..33eae5ad2fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
422 break; 422 break;
423 423
424 case MLX5_EVENT_TYPE_CMD: 424 case MLX5_EVENT_TYPE_CMD:
425 mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); 425 mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
426 break; 426 break;
427 427
428 case MLX5_EVENT_TYPE_PORT_CHANGE: 428 case MLX5_EVENT_TYPE_PORT_CHANGE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..44f59b1d6f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
90 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); 90 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
91 91
92 mlx5_core_dbg(dev, "vector 0x%llx\n", vector); 92 mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
93 mlx5_cmd_comp_handler(dev, vector); 93 mlx5_cmd_comp_handler(dev, vector, true);
94 return; 94 return;
95 95
96no_trig: 96no_trig: