aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-10-14 22:14:50 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-14 22:14:50 -0400
commitf6bba8945e7cff591127be1f6992c3e466cfff4c (patch)
treef5dbb21663ed267c5c32606fa4a9ba9a328bed3f
parentf985c65c908f6b26c30019a83dc5ea295f5fcf62 (diff)
parent2b3ddf27f48c8061f0676c5a8796008099945280 (diff)
Merge branch 'mlx-next'
Or Gerlitz says: ==================== Mellanox driver update, Oct 14 2015 This series contains two more patches from Eli, patch from Majd to support PCI error handlers and a fix from Jack to mlx4 VFs when probed without a provisioned mac address. The patch set applied on top of net-next commit bbb300e "Merge branch 'bridge-vlan'" changes from V0: - made the health flag int --> bool to address comment from Dave on patch #1 - fixed sparse warning noted by the 0-day build tests in patch #2 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c13
-rw-r--r--include/linux/mlx4/device.h1
-rw-r--r--include/linux/mlx5/device.h3
-rw-r--r--include/linux/mlx5/driver.h28
12 files changed, 562 insertions, 32 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 597d8923c8e1..886e1bc86374 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2816,7 +2816,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
2816 struct mlx4_en_priv *priv; 2816 struct mlx4_en_priv *priv;
2817 int i; 2817 int i;
2818 int err; 2818 int err;
2819 u64 mac_u64;
2820 2819
2821 dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), 2820 dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
2822 MAX_TX_RINGS, MAX_RX_RINGS); 2821 MAX_TX_RINGS, MAX_RX_RINGS);
@@ -2908,17 +2907,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
2908 dev->addr_len = ETH_ALEN; 2907 dev->addr_len = ETH_ALEN;
2909 mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); 2908 mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
2910 if (!is_valid_ether_addr(dev->dev_addr)) { 2909 if (!is_valid_ether_addr(dev->dev_addr)) {
2911 if (mlx4_is_slave(priv->mdev->dev)) { 2910 en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n",
2912 eth_hw_addr_random(dev); 2911 priv->port, dev->dev_addr);
2913 en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); 2912 err = -EINVAL;
2914 mac_u64 = mlx4_mac_to_u64(dev->dev_addr); 2913 goto out;
2915 mdev->dev->caps.def_mac[priv->port] = mac_u64; 2914 } else if (mlx4_is_slave(priv->mdev->dev) &&
2916 } else { 2915 (priv->mdev->dev->port_random_macs & 1 << priv->port)) {
2917 en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", 2916 /* Random MAC was assigned in mlx4_slave_cap
2918 priv->port, dev->dev_addr); 2917 * in mlx4_core module
2919 err = -EINVAL; 2918 */
2920 goto out; 2919 dev->addr_assign_type |= NET_ADDR_RANDOM;
2921 } 2920 en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr);
2922 } 2921 }
2923 2922
2924 memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac)); 2923 memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac));
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e8ec1dec5789..f13a4d7bbf95 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -2840,3 +2840,19 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val)
2840 return -EOPNOTSUPP; 2840 return -EOPNOTSUPP;
2841} 2841}
2842EXPORT_SYMBOL(set_phv_bit); 2842EXPORT_SYMBOL(set_phv_bit);
2843
2844void mlx4_replace_zero_macs(struct mlx4_dev *dev)
2845{
2846 int i;
2847 u8 mac_addr[ETH_ALEN];
2848
2849 dev->port_random_macs = 0;
2850 for (i = 1; i <= dev->caps.num_ports; ++i)
2851 if (!dev->caps.def_mac[i] &&
2852 dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
2853 eth_random_addr(mac_addr);
2854 dev->port_random_macs |= 1 << i;
2855 dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr);
2856 }
2857}
2858EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 006757f80988..bcbdfab1fe19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -863,6 +863,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
863 return -ENODEV; 863 return -ENODEV;
864 } 864 }
865 865
866 mlx4_replace_zero_macs(dev);
867
866 dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); 868 dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
867 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 869 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
868 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 870 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 232b2b55f23b..e1cf9036af22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1378,6 +1378,8 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
1378 1378
1379void mlx4_init_quotas(struct mlx4_dev *dev); 1379void mlx4_init_quotas(struct mlx4_dev *dev);
1380 1380
1381/* for VFs, replace zero MACs with randomly-generated MACs at driver start */
1382void mlx4_replace_zero_macs(struct mlx4_dev *dev);
1381int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); 1383int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
1382/* Returns the VF index of slave */ 1384/* Returns the VF index of slave */
1383int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); 1385int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c3e54b7e8780..fabfc9e0a948 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -256,8 +256,154 @@ static void dump_buf(void *buf, int size, int data_only, int offset)
256 256
257enum { 257enum {
258 MLX5_DRIVER_STATUS_ABORTED = 0xfe, 258 MLX5_DRIVER_STATUS_ABORTED = 0xfe,
259 MLX5_DRIVER_SYND = 0xbadd00de,
259}; 260};
260 261
262static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
263 u32 *synd, u8 *status)
264{
265 *synd = 0;
266 *status = 0;
267
268 switch (op) {
269 case MLX5_CMD_OP_TEARDOWN_HCA:
270 case MLX5_CMD_OP_DISABLE_HCA:
271 case MLX5_CMD_OP_MANAGE_PAGES:
272 case MLX5_CMD_OP_DESTROY_MKEY:
273 case MLX5_CMD_OP_DESTROY_EQ:
274 case MLX5_CMD_OP_DESTROY_CQ:
275 case MLX5_CMD_OP_DESTROY_QP:
276 case MLX5_CMD_OP_DESTROY_PSV:
277 case MLX5_CMD_OP_DESTROY_SRQ:
278 case MLX5_CMD_OP_DESTROY_XRC_SRQ:
279 case MLX5_CMD_OP_DESTROY_DCT:
280 case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
281 case MLX5_CMD_OP_DEALLOC_PD:
282 case MLX5_CMD_OP_DEALLOC_UAR:
283 case MLX5_CMD_OP_DETTACH_FROM_MCG:
284 case MLX5_CMD_OP_DEALLOC_XRCD:
285 case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
286 case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
287 case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
288 case MLX5_CMD_OP_DESTROY_TIR:
289 case MLX5_CMD_OP_DESTROY_SQ:
290 case MLX5_CMD_OP_DESTROY_RQ:
291 case MLX5_CMD_OP_DESTROY_RMP:
292 case MLX5_CMD_OP_DESTROY_TIS:
293 case MLX5_CMD_OP_DESTROY_RQT:
294 case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
295 case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
296 case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
297 return MLX5_CMD_STAT_OK;
298
299 case MLX5_CMD_OP_QUERY_HCA_CAP:
300 case MLX5_CMD_OP_QUERY_ADAPTER:
301 case MLX5_CMD_OP_INIT_HCA:
302 case MLX5_CMD_OP_ENABLE_HCA:
303 case MLX5_CMD_OP_QUERY_PAGES:
304 case MLX5_CMD_OP_SET_HCA_CAP:
305 case MLX5_CMD_OP_QUERY_ISSI:
306 case MLX5_CMD_OP_SET_ISSI:
307 case MLX5_CMD_OP_CREATE_MKEY:
308 case MLX5_CMD_OP_QUERY_MKEY:
309 case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
310 case MLX5_CMD_OP_PAGE_FAULT_RESUME:
311 case MLX5_CMD_OP_CREATE_EQ:
312 case MLX5_CMD_OP_QUERY_EQ:
313 case MLX5_CMD_OP_GEN_EQE:
314 case MLX5_CMD_OP_CREATE_CQ:
315 case MLX5_CMD_OP_QUERY_CQ:
316 case MLX5_CMD_OP_MODIFY_CQ:
317 case MLX5_CMD_OP_CREATE_QP:
318 case MLX5_CMD_OP_RST2INIT_QP:
319 case MLX5_CMD_OP_INIT2RTR_QP:
320 case MLX5_CMD_OP_RTR2RTS_QP:
321 case MLX5_CMD_OP_RTS2RTS_QP:
322 case MLX5_CMD_OP_SQERR2RTS_QP:
323 case MLX5_CMD_OP_2ERR_QP:
324 case MLX5_CMD_OP_2RST_QP:
325 case MLX5_CMD_OP_QUERY_QP:
326 case MLX5_CMD_OP_SQD_RTS_QP:
327 case MLX5_CMD_OP_INIT2INIT_QP:
328 case MLX5_CMD_OP_CREATE_PSV:
329 case MLX5_CMD_OP_CREATE_SRQ:
330 case MLX5_CMD_OP_QUERY_SRQ:
331 case MLX5_CMD_OP_ARM_RQ:
332 case MLX5_CMD_OP_CREATE_XRC_SRQ:
333 case MLX5_CMD_OP_QUERY_XRC_SRQ:
334 case MLX5_CMD_OP_ARM_XRC_SRQ:
335 case MLX5_CMD_OP_CREATE_DCT:
336 case MLX5_CMD_OP_DRAIN_DCT:
337 case MLX5_CMD_OP_QUERY_DCT:
338 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
339 case MLX5_CMD_OP_QUERY_VPORT_STATE:
340 case MLX5_CMD_OP_MODIFY_VPORT_STATE:
341 case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
342 case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
343 case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
344 case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
345 case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
346 case MLX5_CMD_OP_SET_ROCE_ADDRESS:
347 case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
348 case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
349 case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
350 case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
351 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
352 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
353 case MLX5_CMD_OP_QUERY_Q_COUNTER:
354 case MLX5_CMD_OP_ALLOC_PD:
355 case MLX5_CMD_OP_ALLOC_UAR:
356 case MLX5_CMD_OP_CONFIG_INT_MODERATION:
357 case MLX5_CMD_OP_ACCESS_REG:
358 case MLX5_CMD_OP_ATTACH_TO_MCG:
359 case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
360 case MLX5_CMD_OP_MAD_IFC:
361 case MLX5_CMD_OP_QUERY_MAD_DEMUX:
362 case MLX5_CMD_OP_SET_MAD_DEMUX:
363 case MLX5_CMD_OP_NOP:
364 case MLX5_CMD_OP_ALLOC_XRCD:
365 case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
366 case MLX5_CMD_OP_QUERY_CONG_STATUS:
367 case MLX5_CMD_OP_MODIFY_CONG_STATUS:
368 case MLX5_CMD_OP_QUERY_CONG_PARAMS:
369 case MLX5_CMD_OP_MODIFY_CONG_PARAMS:
370 case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
371 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
372 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
373 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
374 case MLX5_CMD_OP_CREATE_TIR:
375 case MLX5_CMD_OP_MODIFY_TIR:
376 case MLX5_CMD_OP_QUERY_TIR:
377 case MLX5_CMD_OP_CREATE_SQ:
378 case MLX5_CMD_OP_MODIFY_SQ:
379 case MLX5_CMD_OP_QUERY_SQ:
380 case MLX5_CMD_OP_CREATE_RQ:
381 case MLX5_CMD_OP_MODIFY_RQ:
382 case MLX5_CMD_OP_QUERY_RQ:
383 case MLX5_CMD_OP_CREATE_RMP:
384 case MLX5_CMD_OP_MODIFY_RMP:
385 case MLX5_CMD_OP_QUERY_RMP:
386 case MLX5_CMD_OP_CREATE_TIS:
387 case MLX5_CMD_OP_MODIFY_TIS:
388 case MLX5_CMD_OP_QUERY_TIS:
389 case MLX5_CMD_OP_CREATE_RQT:
390 case MLX5_CMD_OP_MODIFY_RQT:
391 case MLX5_CMD_OP_QUERY_RQT:
392 case MLX5_CMD_OP_CREATE_FLOW_TABLE:
393 case MLX5_CMD_OP_QUERY_FLOW_TABLE:
394 case MLX5_CMD_OP_CREATE_FLOW_GROUP:
395 case MLX5_CMD_OP_QUERY_FLOW_GROUP:
396 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
397 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
398 *status = MLX5_DRIVER_STATUS_ABORTED;
399 *synd = MLX5_DRIVER_SYND;
400 return -EIO;
401 default:
402 mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
403 return -EINVAL;
404 }
405}
406
261const char *mlx5_command_str(int command) 407const char *mlx5_command_str(int command)
262{ 408{
263 switch (command) { 409 switch (command) {
@@ -592,6 +738,16 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
592 return err; 738 return err;
593} 739}
594 740
741static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out)
742{
743 return &out->syndrome;
744}
745
746static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
747{
748 return &out->status;
749}
750
595/* Notes: 751/* Notes:
596 * 1. Callback functions may not sleep 752 * 1. Callback functions may not sleep
597 * 2. page queue commands do not support asynchrous completion 753 * 2. page queue commands do not support asynchrous completion
@@ -1200,6 +1356,11 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
1200 return msg; 1356 return msg;
1201} 1357}
1202 1358
1359static u16 opcode_from_in(struct mlx5_inbox_hdr *in)
1360{
1361 return be16_to_cpu(in->opcode);
1362}
1363
1203static int is_manage_pages(struct mlx5_inbox_hdr *in) 1364static int is_manage_pages(struct mlx5_inbox_hdr *in)
1204{ 1365{
1205 return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; 1366 return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
@@ -1214,6 +1375,15 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1214 gfp_t gfp; 1375 gfp_t gfp;
1215 int err; 1376 int err;
1216 u8 status = 0; 1377 u8 status = 0;
1378 u32 drv_synd;
1379
1380 if (pci_channel_offline(dev->pdev) ||
1381 dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1382 err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status);
1383 *get_synd_ptr(out) = cpu_to_be32(drv_synd);
1384 *get_status_ptr(out) = status;
1385 return err;
1386 }
1217 1387
1218 pages_queue = is_manage_pages(in); 1388 pages_queue = is_manage_pages(in);
1219 gfp = callback ? GFP_ATOMIC : GFP_KERNEL; 1389 gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9b81e1ceb8de..f5deb642d0d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/random.h> 35#include <linux/random.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/hardirq.h>
37#include <linux/mlx5/driver.h> 38#include <linux/mlx5/driver.h>
38#include <linux/mlx5/cmd.h> 39#include <linux/mlx5/cmd.h>
39#include "mlx5_core.h" 40#include "mlx5_core.h"
@@ -57,6 +58,91 @@ enum {
57 MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 58 MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
58}; 59};
59 60
61enum {
62 MLX5_NIC_IFC_FULL = 0,
63 MLX5_NIC_IFC_DISABLED = 1,
64 MLX5_NIC_IFC_NO_DRAM_NIC = 2
65};
66
67static u8 get_nic_interface(struct mlx5_core_dev *dev)
68{
69 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
70}
71
72static void trigger_cmd_completions(struct mlx5_core_dev *dev)
73{
74 unsigned long flags;
75 u64 vector;
76
77 /* wait for pending handlers to complete */
78 synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
79 spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
80 vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
81 if (!vector)
82 goto no_trig;
83
84 vector |= MLX5_TRIGGERED_CMD_COMP;
85 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
86
87 mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
88 mlx5_cmd_comp_handler(dev, vector);
89 return;
90
91no_trig:
92 spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
93}
94
95static int in_fatal(struct mlx5_core_dev *dev)
96{
97 struct mlx5_core_health *health = &dev->priv.health;
98 struct health_buffer __iomem *h = health->health;
99
100 if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
101 return 1;
102
103 if (ioread32be(&h->fw_ver) == 0xffffffff)
104 return 1;
105
106 return 0;
107}
108
109void mlx5_enter_error_state(struct mlx5_core_dev *dev)
110{
111 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
112 return;
113
114 mlx5_core_err(dev, "start\n");
115 if (pci_channel_offline(dev->pdev) || in_fatal(dev))
116 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
117
118 mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
119 mlx5_core_err(dev, "end\n");
120}
121
122static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
123{
124 u8 nic_interface = get_nic_interface(dev);
125
126 switch (nic_interface) {
127 case MLX5_NIC_IFC_FULL:
128 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
129 break;
130
131 case MLX5_NIC_IFC_DISABLED:
132 mlx5_core_warn(dev, "starting teardown\n");
133 break;
134
135 case MLX5_NIC_IFC_NO_DRAM_NIC:
136 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
137 break;
138 default:
139 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
140 nic_interface);
141 }
142
143 mlx5_disable_device(dev);
144}
145
60static void health_care(struct work_struct *work) 146static void health_care(struct work_struct *work)
61{ 147{
62 struct mlx5_core_health *health; 148 struct mlx5_core_health *health;
@@ -67,6 +153,7 @@ static void health_care(struct work_struct *work)
67 priv = container_of(health, struct mlx5_priv, health); 153 priv = container_of(health, struct mlx5_priv, health);
68 dev = container_of(priv, struct mlx5_core_dev, priv); 154 dev = container_of(priv, struct mlx5_core_dev, priv);
69 mlx5_core_warn(dev, "handling bad device here\n"); 155 mlx5_core_warn(dev, "handling bad device here\n");
156 mlx5_handle_bad_state(dev);
70} 157}
71 158
72static const char *hsynd_str(u8 synd) 159static const char *hsynd_str(u8 synd)
@@ -122,6 +209,10 @@ static void print_health_info(struct mlx5_core_dev *dev)
122 u32 fw; 209 u32 fw;
123 int i; 210 int i;
124 211
212 /* If the syndrom is 0, the device is OK and no need to print buffer */
213 if (!ioread8(&h->synd))
214 return;
215
125 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 216 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
126 dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); 217 dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
127 218
@@ -136,13 +227,29 @@ static void print_health_info(struct mlx5_core_dev *dev)
136 dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 227 dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
137} 228}
138 229
230static unsigned long get_next_poll_jiffies(void)
231{
232 unsigned long next;
233
234 get_random_bytes(&next, sizeof(next));
235 next %= HZ;
236 next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
237
238 return next;
239}
240
139static void poll_health(unsigned long data) 241static void poll_health(unsigned long data)
140{ 242{
141 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; 243 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
142 struct mlx5_core_health *health = &dev->priv.health; 244 struct mlx5_core_health *health = &dev->priv.health;
143 unsigned long next;
144 u32 count; 245 u32 count;
145 246
247 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
248 trigger_cmd_completions(dev);
249 mod_timer(&health->timer, get_next_poll_jiffies());
250 return;
251 }
252
146 count = ioread32be(health->health_counter); 253 count = ioread32be(health->health_counter);
147 if (count == health->prev) 254 if (count == health->prev)
148 ++health->miss_counter; 255 ++health->miss_counter;
@@ -151,14 +258,16 @@ static void poll_health(unsigned long data)
151 258
152 health->prev = count; 259 health->prev = count;
153 if (health->miss_counter == MAX_MISSES) { 260 if (health->miss_counter == MAX_MISSES) {
154 mlx5_core_err(dev, "device's health compromised\n"); 261 dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
155 print_health_info(dev); 262 print_health_info(dev);
156 queue_work(health->wq, &health->work);
157 } else { 263 } else {
158 get_random_bytes(&next, sizeof(next)); 264 mod_timer(&health->timer, get_next_poll_jiffies());
159 next %= HZ; 265 }
160 next += jiffies + MLX5_HEALTH_POLL_INTERVAL; 266
161 mod_timer(&health->timer, next); 267 if (in_fatal(dev) && !health->sick) {
268 health->sick = true;
269 print_health_info(dev);
270 queue_work(health->wq, &health->work);
162 } 271 }
163} 272}
164 273
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b6edc58766ad..2388aec208fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -39,12 +39,14 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/io-mapping.h> 40#include <linux/io-mapping.h>
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/delay.h>
42#include <linux/mlx5/driver.h> 43#include <linux/mlx5/driver.h>
43#include <linux/mlx5/cq.h> 44#include <linux/mlx5/cq.h>
44#include <linux/mlx5/qp.h> 45#include <linux/mlx5/qp.h>
45#include <linux/mlx5/srq.h> 46#include <linux/mlx5/srq.h>
46#include <linux/debugfs.h> 47#include <linux/debugfs.h>
47#include <linux/kmod.h> 48#include <linux/kmod.h>
49#include <linux/delay.h>
48#include <linux/mlx5/mlx5_ifc.h> 50#include <linux/mlx5/mlx5_ifc.h>
49#include "mlx5_core.h" 51#include "mlx5_core.h"
50 52
@@ -151,6 +153,25 @@ static struct mlx5_profile profile[] = {
151 }, 153 },
152}; 154};
153 155
156#define FW_INIT_TIMEOUT_MILI 2000
157#define FW_INIT_WAIT_MS 2
158
159static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
160{
161 unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
162 int err = 0;
163
164 while (fw_initializing(dev)) {
165 if (time_after(jiffies, end)) {
166 err = -EBUSY;
167 break;
168 }
169 msleep(FW_INIT_WAIT_MS);
170 }
171
172 return err;
173}
174
154static int set_dma_caps(struct pci_dev *pdev) 175static int set_dma_caps(struct pci_dev *pdev)
155{ 176{
156 int err; 177 int err;
@@ -181,6 +202,34 @@ static int set_dma_caps(struct pci_dev *pdev)
181 return err; 202 return err;
182} 203}
183 204
205static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
206{
207 struct pci_dev *pdev = dev->pdev;
208 int err = 0;
209
210 mutex_lock(&dev->pci_status_mutex);
211 if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
212 err = pci_enable_device(pdev);
213 if (!err)
214 dev->pci_status = MLX5_PCI_STATUS_ENABLED;
215 }
216 mutex_unlock(&dev->pci_status_mutex);
217
218 return err;
219}
220
221static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
222{
223 struct pci_dev *pdev = dev->pdev;
224
225 mutex_lock(&dev->pci_status_mutex);
226 if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
227 pci_disable_device(pdev);
228 dev->pci_status = MLX5_PCI_STATUS_DISABLED;
229 }
230 mutex_unlock(&dev->pci_status_mutex);
231}
232
184static int request_bar(struct pci_dev *pdev) 233static int request_bar(struct pci_dev *pdev)
185{ 234{
186 int err = 0; 235 int err = 0;
@@ -807,7 +856,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
807 if (!priv->dbg_root) 856 if (!priv->dbg_root)
808 return -ENOMEM; 857 return -ENOMEM;
809 858
810 err = pci_enable_device(pdev); 859 err = mlx5_pci_enable_device(dev);
811 if (err) { 860 if (err) {
812 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); 861 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
813 goto err_dbg; 862 goto err_dbg;
@@ -841,7 +890,7 @@ err_clr_master:
841 pci_clear_master(dev->pdev); 890 pci_clear_master(dev->pdev);
842 release_bar(dev->pdev); 891 release_bar(dev->pdev);
843err_disable: 892err_disable:
844 pci_disable_device(dev->pdev); 893 mlx5_pci_disable_device(dev);
845 894
846err_dbg: 895err_dbg:
847 debugfs_remove(priv->dbg_root); 896 debugfs_remove(priv->dbg_root);
@@ -853,7 +902,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
853 iounmap(dev->iseg); 902 iounmap(dev->iseg);
854 pci_clear_master(dev->pdev); 903 pci_clear_master(dev->pdev);
855 release_bar(dev->pdev); 904 release_bar(dev->pdev);
856 pci_disable_device(dev->pdev); 905 mlx5_pci_disable_device(dev);
857 debugfs_remove(priv->dbg_root); 906 debugfs_remove(priv->dbg_root);
858} 907}
859 908
@@ -863,13 +912,32 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
863 struct pci_dev *pdev = dev->pdev; 912 struct pci_dev *pdev = dev->pdev;
864 int err; 913 int err;
865 914
915 mutex_lock(&dev->intf_state_mutex);
916 if (dev->interface_state == MLX5_INTERFACE_STATE_UP) {
917 dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
918 __func__);
919 goto out;
920 }
921
866 dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), 922 dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
867 fw_rev_min(dev), fw_rev_sub(dev)); 923 fw_rev_min(dev), fw_rev_sub(dev));
868 924
925 /* on load removing any previous indication of internal error, device is
926 * up
927 */
928 dev->state = MLX5_DEVICE_STATE_UP;
929
869 err = mlx5_cmd_init(dev); 930 err = mlx5_cmd_init(dev);
870 if (err) { 931 if (err) {
871 dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); 932 dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
872 return err; 933 goto out_err;
934 }
935
936 err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
937 if (err) {
938 dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
939 FW_INIT_TIMEOUT_MILI);
940 goto out_err;
873 } 941 }
874 942
875 mlx5_pagealloc_init(dev); 943 mlx5_pagealloc_init(dev);
@@ -994,6 +1062,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
994 if (err) 1062 if (err)
995 pr_info("failed request module on %s\n", MLX5_IB_MOD); 1063 pr_info("failed request module on %s\n", MLX5_IB_MOD);
996 1064
1065 dev->interface_state = MLX5_INTERFACE_STATE_UP;
1066out:
1067 mutex_unlock(&dev->intf_state_mutex);
1068
997 return 0; 1069 return 0;
998 1070
999err_reg_dev: 1071err_reg_dev:
@@ -1024,7 +1096,7 @@ err_stop_poll:
1024 mlx5_stop_health_poll(dev); 1096 mlx5_stop_health_poll(dev);
1025 if (mlx5_cmd_teardown_hca(dev)) { 1097 if (mlx5_cmd_teardown_hca(dev)) {
1026 dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); 1098 dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
1027 return err; 1099 goto out_err;
1028 } 1100 }
1029 1101
1030err_pagealloc_stop: 1102err_pagealloc_stop:
@@ -1040,13 +1112,23 @@ err_pagealloc_cleanup:
1040 mlx5_pagealloc_cleanup(dev); 1112 mlx5_pagealloc_cleanup(dev);
1041 mlx5_cmd_cleanup(dev); 1113 mlx5_cmd_cleanup(dev);
1042 1114
1115out_err:
1116 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1117 mutex_unlock(&dev->intf_state_mutex);
1118
1043 return err; 1119 return err;
1044} 1120}
1045 1121
1046static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) 1122static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
1047{ 1123{
1048 int err; 1124 int err = 0;
1049 1125
1126 mutex_lock(&dev->intf_state_mutex);
1127 if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) {
1128 dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
1129 __func__);
1130 goto out;
1131 }
1050 mlx5_unregister_device(dev); 1132 mlx5_unregister_device(dev);
1051 mlx5_cleanup_mr_table(dev); 1133 mlx5_cleanup_mr_table(dev);
1052 mlx5_cleanup_srq_table(dev); 1134 mlx5_cleanup_srq_table(dev);
@@ -1072,10 +1154,12 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
1072 mlx5_cmd_cleanup(dev); 1154 mlx5_cmd_cleanup(dev);
1073 1155
1074out: 1156out:
1157 dev->interface_state = MLX5_INTERFACE_STATE_DOWN;
1158 mutex_unlock(&dev->intf_state_mutex);
1075 return err; 1159 return err;
1076} 1160}
1077 1161
1078static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, 1162void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1079 unsigned long param) 1163 unsigned long param)
1080{ 1164{
1081 struct mlx5_priv *priv = &dev->priv; 1165 struct mlx5_priv *priv = &dev->priv;
@@ -1125,6 +1209,8 @@ static int init_one(struct pci_dev *pdev,
1125 1209
1126 INIT_LIST_HEAD(&priv->ctx_list); 1210 INIT_LIST_HEAD(&priv->ctx_list);
1127 spin_lock_init(&priv->ctx_lock); 1211 spin_lock_init(&priv->ctx_lock);
1212 mutex_init(&dev->pci_status_mutex);
1213 mutex_init(&dev->intf_state_mutex);
1128 err = mlx5_pci_init(dev, priv); 1214 err = mlx5_pci_init(dev, priv);
1129 if (err) { 1215 if (err) {
1130 dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); 1216 dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
@@ -1172,6 +1258,112 @@ static void remove_one(struct pci_dev *pdev)
1172 kfree(dev); 1258 kfree(dev);
1173} 1259}
1174 1260
1261static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1262 pci_channel_state_t state)
1263{
1264 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1265 struct mlx5_priv *priv = &dev->priv;
1266
1267 dev_info(&pdev->dev, "%s was called\n", __func__);
1268 mlx5_enter_error_state(dev);
1269 mlx5_unload_one(dev, priv);
1270 mlx5_pci_disable_device(dev);
1271 return state == pci_channel_io_perm_failure ?
1272 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1273}
1274
1275static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1276{
1277 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1278 int err = 0;
1279
1280 dev_info(&pdev->dev, "%s was called\n", __func__);
1281
1282 err = mlx5_pci_enable_device(dev);
1283 if (err) {
1284 dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
1285 , __func__, err);
1286 return PCI_ERS_RESULT_DISCONNECT;
1287 }
1288 pci_set_master(pdev);
1289 pci_set_power_state(pdev, PCI_D0);
1290 pci_restore_state(pdev);
1291
1292 return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1293}
1294
1295void mlx5_disable_device(struct mlx5_core_dev *dev)
1296{
1297 mlx5_pci_err_detected(dev->pdev, 0);
1298}
1299
1300/* wait for the device to show vital signs. For now we check
1301 * that we can read the device ID and that the health buffer
1302 * shows a non zero value which is different than 0xffffffff
1303 */
1304static void wait_vital(struct pci_dev *pdev)
1305{
1306 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1307 struct mlx5_core_health *health = &dev->priv.health;
1308 const int niter = 100;
1309 u32 count;
1310 u16 did;
1311 int i;
1312
1313 /* Wait for firmware to be ready after reset */
1314 msleep(1000);
1315 for (i = 0; i < niter; i++) {
1316 if (pci_read_config_word(pdev, 2, &did)) {
1317 dev_warn(&pdev->dev, "failed reading config word\n");
1318 break;
1319 }
1320 if (did == pdev->device) {
1321 dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
1322 break;
1323 }
1324 msleep(50);
1325 }
1326 if (i == niter)
1327 dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1328
1329 for (i = 0; i < niter; i++) {
1330 count = ioread32be(health->health_counter);
1331 if (count && count != 0xffffffff) {
1332 dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
1333 break;
1334 }
1335 msleep(50);
1336 }
1337
1338 if (i == niter)
1339 dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1340}
1341
1342static void mlx5_pci_resume(struct pci_dev *pdev)
1343{
1344 struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1345 struct mlx5_priv *priv = &dev->priv;
1346 int err;
1347
1348 dev_info(&pdev->dev, "%s was called\n", __func__);
1349
1350 pci_save_state(pdev);
1351 wait_vital(pdev);
1352
1353 err = mlx5_load_one(dev, priv);
1354 if (err)
1355 dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
1356 , __func__, err);
1357 else
1358 dev_info(&pdev->dev, "%s: device recovered\n", __func__);
1359}
1360
1361static const struct pci_error_handlers mlx5_err_handler = {
1362 .error_detected = mlx5_pci_err_detected,
1363 .slot_reset = mlx5_pci_slot_reset,
1364 .resume = mlx5_pci_resume
1365};
1366
1175static const struct pci_device_id mlx5_core_pci_table[] = { 1367static const struct pci_device_id mlx5_core_pci_table[] = {
1176 { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ 1368 { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
1177 { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ 1369 { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */
@@ -1188,7 +1380,8 @@ static struct pci_driver mlx5_core_driver = {
1188 .name = DRIVER_NAME, 1380 .name = DRIVER_NAME,
1189 .id_table = mlx5_core_pci_table, 1381 .id_table = mlx5_core_pci_table,
1190 .probe = init_one, 1382 .probe = init_one,
1191 .remove = remove_one 1383 .remove = remove_one,
1384 .err_handler = &mlx5_err_handler
1192}; 1385};
1193 1386
1194static int __init init(void) 1387static int __init init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 30c0be721b08..cee5b7a839bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -86,6 +86,10 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
86int mlx5_query_board_id(struct mlx5_core_dev *dev); 86int mlx5_query_board_id(struct mlx5_core_dev *dev);
87int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); 87int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
88int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); 88int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
89void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
90 unsigned long param);
91void mlx5_enter_error_state(struct mlx5_core_dev *dev);
92void mlx5_disable_device(struct mlx5_core_dev *dev);
89 93
90void mlx5e_init(void); 94void mlx5e_init(void);
91void mlx5e_cleanup(void); 95void mlx5e_cleanup(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 76432a510ac2..1cda5d268ec9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -493,15 +493,20 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
493 struct fw_page *fwp; 493 struct fw_page *fwp;
494 struct rb_node *p; 494 struct rb_node *p;
495 int nclaimed = 0; 495 int nclaimed = 0;
496 int err; 496 int err = 0;
497 497
498 do { 498 do {
499 p = rb_first(&dev->priv.page_root); 499 p = rb_first(&dev->priv.page_root);
500 if (p) { 500 if (p) {
501 fwp = rb_entry(p, struct fw_page, rb_node); 501 fwp = rb_entry(p, struct fw_page, rb_node);
502 err = reclaim_pages(dev, fwp->func_id, 502 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
503 optimal_reclaimed_pages(), 503 free_4k(dev, fwp->addr);
504 &nclaimed); 504 nclaimed = 1;
505 } else {
506 err = reclaim_pages(dev, fwp->func_id,
507 optimal_reclaimed_pages(),
508 &nclaimed);
509 }
505 if (err) { 510 if (err) {
506 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 511 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
507 err); 512 err);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index baad4cb8e9b0..5a8677bafe04 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -833,6 +833,7 @@ struct mlx4_dev {
833 struct mlx4_quotas quotas; 833 struct mlx4_quotas quotas;
834 struct radix_tree_root qp_table_tree; 834 struct radix_tree_root qp_table_tree;
835 u8 rev_id; 835 u8 rev_id;
836 u8 port_random_macs;
836 char board_id[MLX4_BOARD_ID_LEN]; 837 char board_id[MLX4_BOARD_ID_LEN];
837 int numa_node; 838 int numa_node;
838 int oper_log_mgm_entry_size; 839 int oper_log_mgm_entry_size;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2a0b95662548..0b473cbfa7ef 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -439,7 +439,8 @@ struct mlx5_init_seg {
439 __be32 cmdq_addr_h; 439 __be32 cmdq_addr_h;
440 __be32 cmdq_addr_l_sz; 440 __be32 cmdq_addr_l_sz;
441 __be32 cmd_dbell; 441 __be32 cmd_dbell;
442 __be32 rsvd1[121]; 442 __be32 rsvd1[120];
443 __be32 initializing;
443 struct health_buffer health; 444 struct health_buffer health;
444 __be32 rsvd2[884]; 445 __be32 rsvd2[884];
445 __be32 health_counter; 446 __be32 health_counter;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 41a32873f608..5c857f2a20d7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -393,6 +393,7 @@ struct mlx5_core_health {
393 struct timer_list timer; 393 struct timer_list timer;
394 u32 prev; 394 u32 prev;
395 int miss_counter; 395 int miss_counter;
396 bool sick;
396 struct workqueue_struct *wq; 397 struct workqueue_struct *wq;
397 struct work_struct work; 398 struct work_struct work;
398}; 399};
@@ -486,8 +487,26 @@ struct mlx5_priv {
486 spinlock_t ctx_lock; 487 spinlock_t ctx_lock;
487}; 488};
488 489
490enum mlx5_device_state {
491 MLX5_DEVICE_STATE_UP,
492 MLX5_DEVICE_STATE_INTERNAL_ERROR,
493};
494
495enum mlx5_interface_state {
496 MLX5_INTERFACE_STATE_DOWN,
497 MLX5_INTERFACE_STATE_UP,
498};
499
500enum mlx5_pci_status {
501 MLX5_PCI_STATUS_DISABLED,
502 MLX5_PCI_STATUS_ENABLED,
503};
504
489struct mlx5_core_dev { 505struct mlx5_core_dev {
490 struct pci_dev *pdev; 506 struct pci_dev *pdev;
507 /* sync pci state */
508 struct mutex pci_status_mutex;
509 enum mlx5_pci_status pci_status;
491 u8 rev_id; 510 u8 rev_id;
492 char board_id[MLX5_BOARD_ID_LEN]; 511 char board_id[MLX5_BOARD_ID_LEN];
493 struct mlx5_cmd cmd; 512 struct mlx5_cmd cmd;
@@ -496,6 +515,10 @@ struct mlx5_core_dev {
496 u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; 515 u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
497 phys_addr_t iseg_base; 516 phys_addr_t iseg_base;
498 struct mlx5_init_seg __iomem *iseg; 517 struct mlx5_init_seg __iomem *iseg;
518 enum mlx5_device_state state;
519 /* sync interface state */
520 struct mutex intf_state_mutex;
521 enum mlx5_interface_state interface_state;
499 void (*event) (struct mlx5_core_dev *dev, 522 void (*event) (struct mlx5_core_dev *dev,
500 enum mlx5_dev_event event, 523 enum mlx5_dev_event event,
501 unsigned long param); 524 unsigned long param);
@@ -803,6 +826,11 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
803int mlx5_query_odp_caps(struct mlx5_core_dev *dev, 826int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
804 struct mlx5_odp_caps *odp_caps); 827 struct mlx5_odp_caps *odp_caps);
805 828
829static inline int fw_initializing(struct mlx5_core_dev *dev)
830{
831 return ioread32be(&dev->iseg->initializing) >> 31;
832}
833
806static inline u32 mlx5_mkey_to_idx(u32 mkey) 834static inline u32 mlx5_mkey_to_idx(u32 mkey)
807{ 835{
808 return mkey >> 8; 836 return mkey >> 8;