diff options
author | David S. Miller <davem@davemloft.net> | 2015-10-14 22:14:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-14 22:14:50 -0400 |
commit | f6bba8945e7cff591127be1f6992c3e466cfff4c (patch) | |
tree | f5dbb21663ed267c5c32606fa4a9ba9a328bed3f | |
parent | f985c65c908f6b26c30019a83dc5ea295f5fcf62 (diff) | |
parent | 2b3ddf27f48c8061f0676c5a8796008099945280 (diff) |
Merge branch 'mlx-next'
Or Gerlitz says:
====================
Mellanox driver update, Oct 14 2015
This series contains two more patches from Eli, patch from Majd
to support PCI error handlers and a fix from Jack to mlx4 VFs
when probed without a provisioned mac address.
The patch set applied on top of net-next commit bbb300e "Merge branch 'bridge-vlan'"
changes from V0:
- made the health flag int --> bool to address comment from Dave on patch #1
- fixed sparse warning noted by the 0-day build tests in patch #2
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/fw.c | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/main.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 170 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/health.c | 123 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 209 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 13 | ||||
-rw-r--r-- | include/linux/mlx4/device.h | 1 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 3 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 28 |
12 files changed, 562 insertions, 32 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 597d8923c8e1..886e1bc86374 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c | |||
@@ -2816,7 +2816,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, | |||
2816 | struct mlx4_en_priv *priv; | 2816 | struct mlx4_en_priv *priv; |
2817 | int i; | 2817 | int i; |
2818 | int err; | 2818 | int err; |
2819 | u64 mac_u64; | ||
2820 | 2819 | ||
2821 | dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), | 2820 | dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), |
2822 | MAX_TX_RINGS, MAX_RX_RINGS); | 2821 | MAX_TX_RINGS, MAX_RX_RINGS); |
@@ -2908,17 +2907,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, | |||
2908 | dev->addr_len = ETH_ALEN; | 2907 | dev->addr_len = ETH_ALEN; |
2909 | mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); | 2908 | mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); |
2910 | if (!is_valid_ether_addr(dev->dev_addr)) { | 2909 | if (!is_valid_ether_addr(dev->dev_addr)) { |
2911 | if (mlx4_is_slave(priv->mdev->dev)) { | 2910 | en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", |
2912 | eth_hw_addr_random(dev); | 2911 | priv->port, dev->dev_addr); |
2913 | en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); | 2912 | err = -EINVAL; |
2914 | mac_u64 = mlx4_mac_to_u64(dev->dev_addr); | 2913 | goto out; |
2915 | mdev->dev->caps.def_mac[priv->port] = mac_u64; | 2914 | } else if (mlx4_is_slave(priv->mdev->dev) && |
2916 | } else { | 2915 | (priv->mdev->dev->port_random_macs & 1 << priv->port)) { |
2917 | en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", | 2916 | /* Random MAC was assigned in mlx4_slave_cap |
2918 | priv->port, dev->dev_addr); | 2917 | * in mlx4_core module |
2919 | err = -EINVAL; | 2918 | */ |
2920 | goto out; | 2919 | dev->addr_assign_type |= NET_ADDR_RANDOM; |
2921 | } | 2920 | en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); |
2922 | } | 2921 | } |
2923 | 2922 | ||
2924 | memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac)); | 2923 | memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac)); |
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e8ec1dec5789..f13a4d7bbf95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c | |||
@@ -2840,3 +2840,19 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) | |||
2840 | return -EOPNOTSUPP; | 2840 | return -EOPNOTSUPP; |
2841 | } | 2841 | } |
2842 | EXPORT_SYMBOL(set_phv_bit); | 2842 | EXPORT_SYMBOL(set_phv_bit); |
2843 | |||
2844 | void mlx4_replace_zero_macs(struct mlx4_dev *dev) | ||
2845 | { | ||
2846 | int i; | ||
2847 | u8 mac_addr[ETH_ALEN]; | ||
2848 | |||
2849 | dev->port_random_macs = 0; | ||
2850 | for (i = 1; i <= dev->caps.num_ports; ++i) | ||
2851 | if (!dev->caps.def_mac[i] && | ||
2852 | dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { | ||
2853 | eth_random_addr(mac_addr); | ||
2854 | dev->port_random_macs |= 1 << i; | ||
2855 | dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr); | ||
2856 | } | ||
2857 | } | ||
2858 | EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs); | ||
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 006757f80988..bcbdfab1fe19 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c | |||
@@ -863,6 +863,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) | |||
863 | return -ENODEV; | 863 | return -ENODEV; |
864 | } | 864 | } |
865 | 865 | ||
866 | mlx4_replace_zero_macs(dev); | ||
867 | |||
866 | dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); | 868 | dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); |
867 | dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); | 869 | dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); |
868 | dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); | 870 | dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); |
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 232b2b55f23b..e1cf9036af22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h | |||
@@ -1378,6 +1378,8 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); | |||
1378 | 1378 | ||
1379 | void mlx4_init_quotas(struct mlx4_dev *dev); | 1379 | void mlx4_init_quotas(struct mlx4_dev *dev); |
1380 | 1380 | ||
1381 | /* for VFs, replace zero MACs with randomly-generated MACs at driver start */ | ||
1382 | void mlx4_replace_zero_macs(struct mlx4_dev *dev); | ||
1381 | int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); | 1383 | int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); |
1382 | /* Returns the VF index of slave */ | 1384 | /* Returns the VF index of slave */ |
1383 | int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); | 1385 | int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c3e54b7e8780..fabfc9e0a948 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c | |||
@@ -256,8 +256,154 @@ static void dump_buf(void *buf, int size, int data_only, int offset) | |||
256 | 256 | ||
257 | enum { | 257 | enum { |
258 | MLX5_DRIVER_STATUS_ABORTED = 0xfe, | 258 | MLX5_DRIVER_STATUS_ABORTED = 0xfe, |
259 | MLX5_DRIVER_SYND = 0xbadd00de, | ||
259 | }; | 260 | }; |
260 | 261 | ||
262 | static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, | ||
263 | u32 *synd, u8 *status) | ||
264 | { | ||
265 | *synd = 0; | ||
266 | *status = 0; | ||
267 | |||
268 | switch (op) { | ||
269 | case MLX5_CMD_OP_TEARDOWN_HCA: | ||
270 | case MLX5_CMD_OP_DISABLE_HCA: | ||
271 | case MLX5_CMD_OP_MANAGE_PAGES: | ||
272 | case MLX5_CMD_OP_DESTROY_MKEY: | ||
273 | case MLX5_CMD_OP_DESTROY_EQ: | ||
274 | case MLX5_CMD_OP_DESTROY_CQ: | ||
275 | case MLX5_CMD_OP_DESTROY_QP: | ||
276 | case MLX5_CMD_OP_DESTROY_PSV: | ||
277 | case MLX5_CMD_OP_DESTROY_SRQ: | ||
278 | case MLX5_CMD_OP_DESTROY_XRC_SRQ: | ||
279 | case MLX5_CMD_OP_DESTROY_DCT: | ||
280 | case MLX5_CMD_OP_DEALLOC_Q_COUNTER: | ||
281 | case MLX5_CMD_OP_DEALLOC_PD: | ||
282 | case MLX5_CMD_OP_DEALLOC_UAR: | ||
283 | case MLX5_CMD_OP_DETTACH_FROM_MCG: | ||
284 | case MLX5_CMD_OP_DEALLOC_XRCD: | ||
285 | case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: | ||
286 | case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: | ||
287 | case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: | ||
288 | case MLX5_CMD_OP_DESTROY_TIR: | ||
289 | case MLX5_CMD_OP_DESTROY_SQ: | ||
290 | case MLX5_CMD_OP_DESTROY_RQ: | ||
291 | case MLX5_CMD_OP_DESTROY_RMP: | ||
292 | case MLX5_CMD_OP_DESTROY_TIS: | ||
293 | case MLX5_CMD_OP_DESTROY_RQT: | ||
294 | case MLX5_CMD_OP_DESTROY_FLOW_TABLE: | ||
295 | case MLX5_CMD_OP_DESTROY_FLOW_GROUP: | ||
296 | case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: | ||
297 | return MLX5_CMD_STAT_OK; | ||
298 | |||
299 | case MLX5_CMD_OP_QUERY_HCA_CAP: | ||
300 | case MLX5_CMD_OP_QUERY_ADAPTER: | ||
301 | case MLX5_CMD_OP_INIT_HCA: | ||
302 | case MLX5_CMD_OP_ENABLE_HCA: | ||
303 | case MLX5_CMD_OP_QUERY_PAGES: | ||
304 | case MLX5_CMD_OP_SET_HCA_CAP: | ||
305 | case MLX5_CMD_OP_QUERY_ISSI: | ||
306 | case MLX5_CMD_OP_SET_ISSI: | ||
307 | case MLX5_CMD_OP_CREATE_MKEY: | ||
308 | case MLX5_CMD_OP_QUERY_MKEY: | ||
309 | case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: | ||
310 | case MLX5_CMD_OP_PAGE_FAULT_RESUME: | ||
311 | case MLX5_CMD_OP_CREATE_EQ: | ||
312 | case MLX5_CMD_OP_QUERY_EQ: | ||
313 | case MLX5_CMD_OP_GEN_EQE: | ||
314 | case MLX5_CMD_OP_CREATE_CQ: | ||
315 | case MLX5_CMD_OP_QUERY_CQ: | ||
316 | case MLX5_CMD_OP_MODIFY_CQ: | ||
317 | case MLX5_CMD_OP_CREATE_QP: | ||
318 | case MLX5_CMD_OP_RST2INIT_QP: | ||
319 | case MLX5_CMD_OP_INIT2RTR_QP: | ||
320 | case MLX5_CMD_OP_RTR2RTS_QP: | ||
321 | case MLX5_CMD_OP_RTS2RTS_QP: | ||
322 | case MLX5_CMD_OP_SQERR2RTS_QP: | ||
323 | case MLX5_CMD_OP_2ERR_QP: | ||
324 | case MLX5_CMD_OP_2RST_QP: | ||
325 | case MLX5_CMD_OP_QUERY_QP: | ||
326 | case MLX5_CMD_OP_SQD_RTS_QP: | ||
327 | case MLX5_CMD_OP_INIT2INIT_QP: | ||
328 | case MLX5_CMD_OP_CREATE_PSV: | ||
329 | case MLX5_CMD_OP_CREATE_SRQ: | ||
330 | case MLX5_CMD_OP_QUERY_SRQ: | ||
331 | case MLX5_CMD_OP_ARM_RQ: | ||
332 | case MLX5_CMD_OP_CREATE_XRC_SRQ: | ||
333 | case MLX5_CMD_OP_QUERY_XRC_SRQ: | ||
334 | case MLX5_CMD_OP_ARM_XRC_SRQ: | ||
335 | case MLX5_CMD_OP_CREATE_DCT: | ||
336 | case MLX5_CMD_OP_DRAIN_DCT: | ||
337 | case MLX5_CMD_OP_QUERY_DCT: | ||
338 | case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: | ||
339 | case MLX5_CMD_OP_QUERY_VPORT_STATE: | ||
340 | case MLX5_CMD_OP_MODIFY_VPORT_STATE: | ||
341 | case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: | ||
342 | case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: | ||
343 | case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: | ||
344 | case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: | ||
345 | case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: | ||
346 | case MLX5_CMD_OP_SET_ROCE_ADDRESS: | ||
347 | case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: | ||
348 | case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: | ||
349 | case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: | ||
350 | case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: | ||
351 | case MLX5_CMD_OP_QUERY_VPORT_COUNTER: | ||
352 | case MLX5_CMD_OP_ALLOC_Q_COUNTER: | ||
353 | case MLX5_CMD_OP_QUERY_Q_COUNTER: | ||
354 | case MLX5_CMD_OP_ALLOC_PD: | ||
355 | case MLX5_CMD_OP_ALLOC_UAR: | ||
356 | case MLX5_CMD_OP_CONFIG_INT_MODERATION: | ||
357 | case MLX5_CMD_OP_ACCESS_REG: | ||
358 | case MLX5_CMD_OP_ATTACH_TO_MCG: | ||
359 | case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: | ||
360 | case MLX5_CMD_OP_MAD_IFC: | ||
361 | case MLX5_CMD_OP_QUERY_MAD_DEMUX: | ||
362 | case MLX5_CMD_OP_SET_MAD_DEMUX: | ||
363 | case MLX5_CMD_OP_NOP: | ||
364 | case MLX5_CMD_OP_ALLOC_XRCD: | ||
365 | case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: | ||
366 | case MLX5_CMD_OP_QUERY_CONG_STATUS: | ||
367 | case MLX5_CMD_OP_MODIFY_CONG_STATUS: | ||
368 | case MLX5_CMD_OP_QUERY_CONG_PARAMS: | ||
369 | case MLX5_CMD_OP_MODIFY_CONG_PARAMS: | ||
370 | case MLX5_CMD_OP_QUERY_CONG_STATISTICS: | ||
371 | case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: | ||
372 | case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: | ||
373 | case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: | ||
374 | case MLX5_CMD_OP_CREATE_TIR: | ||
375 | case MLX5_CMD_OP_MODIFY_TIR: | ||
376 | case MLX5_CMD_OP_QUERY_TIR: | ||
377 | case MLX5_CMD_OP_CREATE_SQ: | ||
378 | case MLX5_CMD_OP_MODIFY_SQ: | ||
379 | case MLX5_CMD_OP_QUERY_SQ: | ||
380 | case MLX5_CMD_OP_CREATE_RQ: | ||
381 | case MLX5_CMD_OP_MODIFY_RQ: | ||
382 | case MLX5_CMD_OP_QUERY_RQ: | ||
383 | case MLX5_CMD_OP_CREATE_RMP: | ||
384 | case MLX5_CMD_OP_MODIFY_RMP: | ||
385 | case MLX5_CMD_OP_QUERY_RMP: | ||
386 | case MLX5_CMD_OP_CREATE_TIS: | ||
387 | case MLX5_CMD_OP_MODIFY_TIS: | ||
388 | case MLX5_CMD_OP_QUERY_TIS: | ||
389 | case MLX5_CMD_OP_CREATE_RQT: | ||
390 | case MLX5_CMD_OP_MODIFY_RQT: | ||
391 | case MLX5_CMD_OP_QUERY_RQT: | ||
392 | case MLX5_CMD_OP_CREATE_FLOW_TABLE: | ||
393 | case MLX5_CMD_OP_QUERY_FLOW_TABLE: | ||
394 | case MLX5_CMD_OP_CREATE_FLOW_GROUP: | ||
395 | case MLX5_CMD_OP_QUERY_FLOW_GROUP: | ||
396 | case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: | ||
397 | case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: | ||
398 | *status = MLX5_DRIVER_STATUS_ABORTED; | ||
399 | *synd = MLX5_DRIVER_SYND; | ||
400 | return -EIO; | ||
401 | default: | ||
402 | mlx5_core_err(dev, "Unknown FW command (%d)\n", op); | ||
403 | return -EINVAL; | ||
404 | } | ||
405 | } | ||
406 | |||
261 | const char *mlx5_command_str(int command) | 407 | const char *mlx5_command_str(int command) |
262 | { | 408 | { |
263 | switch (command) { | 409 | switch (command) { |
@@ -592,6 +738,16 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) | |||
592 | return err; | 738 | return err; |
593 | } | 739 | } |
594 | 740 | ||
741 | static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out) | ||
742 | { | ||
743 | return &out->syndrome; | ||
744 | } | ||
745 | |||
746 | static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) | ||
747 | { | ||
748 | return &out->status; | ||
749 | } | ||
750 | |||
595 | /* Notes: | 751 | /* Notes: |
596 | * 1. Callback functions may not sleep | 752 | * 1. Callback functions may not sleep |
597 | * 2. page queue commands do not support asynchrous completion | 753 | * 2. page queue commands do not support asynchrous completion |
@@ -1200,6 +1356,11 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, | |||
1200 | return msg; | 1356 | return msg; |
1201 | } | 1357 | } |
1202 | 1358 | ||
1359 | static u16 opcode_from_in(struct mlx5_inbox_hdr *in) | ||
1360 | { | ||
1361 | return be16_to_cpu(in->opcode); | ||
1362 | } | ||
1363 | |||
1203 | static int is_manage_pages(struct mlx5_inbox_hdr *in) | 1364 | static int is_manage_pages(struct mlx5_inbox_hdr *in) |
1204 | { | 1365 | { |
1205 | return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; | 1366 | return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; |
@@ -1214,6 +1375,15 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, | |||
1214 | gfp_t gfp; | 1375 | gfp_t gfp; |
1215 | int err; | 1376 | int err; |
1216 | u8 status = 0; | 1377 | u8 status = 0; |
1378 | u32 drv_synd; | ||
1379 | |||
1380 | if (pci_channel_offline(dev->pdev) || | ||
1381 | dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { | ||
1382 | err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status); | ||
1383 | *get_synd_ptr(out) = cpu_to_be32(drv_synd); | ||
1384 | *get_status_ptr(out) = status; | ||
1385 | return err; | ||
1386 | } | ||
1217 | 1387 | ||
1218 | pages_queue = is_manage_pages(in); | 1388 | pages_queue = is_manage_pages(in); |
1219 | gfp = callback ? GFP_ATOMIC : GFP_KERNEL; | 1389 | gfp = callback ? GFP_ATOMIC : GFP_KERNEL; |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9b81e1ceb8de..f5deb642d0d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/module.h> | 34 | #include <linux/module.h> |
35 | #include <linux/random.h> | 35 | #include <linux/random.h> |
36 | #include <linux/vmalloc.h> | 36 | #include <linux/vmalloc.h> |
37 | #include <linux/hardirq.h> | ||
37 | #include <linux/mlx5/driver.h> | 38 | #include <linux/mlx5/driver.h> |
38 | #include <linux/mlx5/cmd.h> | 39 | #include <linux/mlx5/cmd.h> |
39 | #include "mlx5_core.h" | 40 | #include "mlx5_core.h" |
@@ -57,6 +58,91 @@ enum { | |||
57 | MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 | 58 | MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 |
58 | }; | 59 | }; |
59 | 60 | ||
61 | enum { | ||
62 | MLX5_NIC_IFC_FULL = 0, | ||
63 | MLX5_NIC_IFC_DISABLED = 1, | ||
64 | MLX5_NIC_IFC_NO_DRAM_NIC = 2 | ||
65 | }; | ||
66 | |||
67 | static u8 get_nic_interface(struct mlx5_core_dev *dev) | ||
68 | { | ||
69 | return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; | ||
70 | } | ||
71 | |||
72 | static void trigger_cmd_completions(struct mlx5_core_dev *dev) | ||
73 | { | ||
74 | unsigned long flags; | ||
75 | u64 vector; | ||
76 | |||
77 | /* wait for pending handlers to complete */ | ||
78 | synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); | ||
79 | spin_lock_irqsave(&dev->cmd.alloc_lock, flags); | ||
80 | vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); | ||
81 | if (!vector) | ||
82 | goto no_trig; | ||
83 | |||
84 | vector |= MLX5_TRIGGERED_CMD_COMP; | ||
85 | spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); | ||
86 | |||
87 | mlx5_core_dbg(dev, "vector 0x%llx\n", vector); | ||
88 | mlx5_cmd_comp_handler(dev, vector); | ||
89 | return; | ||
90 | |||
91 | no_trig: | ||
92 | spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); | ||
93 | } | ||
94 | |||
95 | static int in_fatal(struct mlx5_core_dev *dev) | ||
96 | { | ||
97 | struct mlx5_core_health *health = &dev->priv.health; | ||
98 | struct health_buffer __iomem *h = health->health; | ||
99 | |||
100 | if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED) | ||
101 | return 1; | ||
102 | |||
103 | if (ioread32be(&h->fw_ver) == 0xffffffff) | ||
104 | return 1; | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | void mlx5_enter_error_state(struct mlx5_core_dev *dev) | ||
110 | { | ||
111 | if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) | ||
112 | return; | ||
113 | |||
114 | mlx5_core_err(dev, "start\n"); | ||
115 | if (pci_channel_offline(dev->pdev) || in_fatal(dev)) | ||
116 | dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; | ||
117 | |||
118 | mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); | ||
119 | mlx5_core_err(dev, "end\n"); | ||
120 | } | ||
121 | |||
122 | static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) | ||
123 | { | ||
124 | u8 nic_interface = get_nic_interface(dev); | ||
125 | |||
126 | switch (nic_interface) { | ||
127 | case MLX5_NIC_IFC_FULL: | ||
128 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); | ||
129 | break; | ||
130 | |||
131 | case MLX5_NIC_IFC_DISABLED: | ||
132 | mlx5_core_warn(dev, "starting teardown\n"); | ||
133 | break; | ||
134 | |||
135 | case MLX5_NIC_IFC_NO_DRAM_NIC: | ||
136 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); | ||
137 | break; | ||
138 | default: | ||
139 | mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", | ||
140 | nic_interface); | ||
141 | } | ||
142 | |||
143 | mlx5_disable_device(dev); | ||
144 | } | ||
145 | |||
60 | static void health_care(struct work_struct *work) | 146 | static void health_care(struct work_struct *work) |
61 | { | 147 | { |
62 | struct mlx5_core_health *health; | 148 | struct mlx5_core_health *health; |
@@ -67,6 +153,7 @@ static void health_care(struct work_struct *work) | |||
67 | priv = container_of(health, struct mlx5_priv, health); | 153 | priv = container_of(health, struct mlx5_priv, health); |
68 | dev = container_of(priv, struct mlx5_core_dev, priv); | 154 | dev = container_of(priv, struct mlx5_core_dev, priv); |
69 | mlx5_core_warn(dev, "handling bad device here\n"); | 155 | mlx5_core_warn(dev, "handling bad device here\n"); |
156 | mlx5_handle_bad_state(dev); | ||
70 | } | 157 | } |
71 | 158 | ||
72 | static const char *hsynd_str(u8 synd) | 159 | static const char *hsynd_str(u8 synd) |
@@ -122,6 +209,10 @@ static void print_health_info(struct mlx5_core_dev *dev) | |||
122 | u32 fw; | 209 | u32 fw; |
123 | int i; | 210 | int i; |
124 | 211 | ||
212 | /* If the syndrom is 0, the device is OK and no need to print buffer */ | ||
213 | if (!ioread8(&h->synd)) | ||
214 | return; | ||
215 | |||
125 | for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) | 216 | for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) |
126 | dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); | 217 | dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); |
127 | 218 | ||
@@ -136,13 +227,29 @@ static void print_health_info(struct mlx5_core_dev *dev) | |||
136 | dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); | 227 | dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); |
137 | } | 228 | } |
138 | 229 | ||
230 | static unsigned long get_next_poll_jiffies(void) | ||
231 | { | ||
232 | unsigned long next; | ||
233 | |||
234 | get_random_bytes(&next, sizeof(next)); | ||
235 | next %= HZ; | ||
236 | next += jiffies + MLX5_HEALTH_POLL_INTERVAL; | ||
237 | |||
238 | return next; | ||
239 | } | ||
240 | |||
139 | static void poll_health(unsigned long data) | 241 | static void poll_health(unsigned long data) |
140 | { | 242 | { |
141 | struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; | 243 | struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; |
142 | struct mlx5_core_health *health = &dev->priv.health; | 244 | struct mlx5_core_health *health = &dev->priv.health; |
143 | unsigned long next; | ||
144 | u32 count; | 245 | u32 count; |
145 | 246 | ||
247 | if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { | ||
248 | trigger_cmd_completions(dev); | ||
249 | mod_timer(&health->timer, get_next_poll_jiffies()); | ||
250 | return; | ||
251 | } | ||
252 | |||
146 | count = ioread32be(health->health_counter); | 253 | count = ioread32be(health->health_counter); |
147 | if (count == health->prev) | 254 | if (count == health->prev) |
148 | ++health->miss_counter; | 255 | ++health->miss_counter; |
@@ -151,14 +258,16 @@ static void poll_health(unsigned long data) | |||
151 | 258 | ||
152 | health->prev = count; | 259 | health->prev = count; |
153 | if (health->miss_counter == MAX_MISSES) { | 260 | if (health->miss_counter == MAX_MISSES) { |
154 | mlx5_core_err(dev, "device's health compromised\n"); | 261 | dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); |
155 | print_health_info(dev); | 262 | print_health_info(dev); |
156 | queue_work(health->wq, &health->work); | ||
157 | } else { | 263 | } else { |
158 | get_random_bytes(&next, sizeof(next)); | 264 | mod_timer(&health->timer, get_next_poll_jiffies()); |
159 | next %= HZ; | 265 | } |
160 | next += jiffies + MLX5_HEALTH_POLL_INTERVAL; | 266 | |
161 | mod_timer(&health->timer, next); | 267 | if (in_fatal(dev) && !health->sick) { |
268 | health->sick = true; | ||
269 | print_health_info(dev); | ||
270 | queue_work(health->wq, &health->work); | ||
162 | } | 271 | } |
163 | } | 272 | } |
164 | 273 | ||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b6edc58766ad..2388aec208fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c | |||
@@ -39,12 +39,14 @@ | |||
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/io-mapping.h> | 40 | #include <linux/io-mapping.h> |
41 | #include <linux/interrupt.h> | 41 | #include <linux/interrupt.h> |
42 | #include <linux/delay.h> | ||
42 | #include <linux/mlx5/driver.h> | 43 | #include <linux/mlx5/driver.h> |
43 | #include <linux/mlx5/cq.h> | 44 | #include <linux/mlx5/cq.h> |
44 | #include <linux/mlx5/qp.h> | 45 | #include <linux/mlx5/qp.h> |
45 | #include <linux/mlx5/srq.h> | 46 | #include <linux/mlx5/srq.h> |
46 | #include <linux/debugfs.h> | 47 | #include <linux/debugfs.h> |
47 | #include <linux/kmod.h> | 48 | #include <linux/kmod.h> |
49 | #include <linux/delay.h> | ||
48 | #include <linux/mlx5/mlx5_ifc.h> | 50 | #include <linux/mlx5/mlx5_ifc.h> |
49 | #include "mlx5_core.h" | 51 | #include "mlx5_core.h" |
50 | 52 | ||
@@ -151,6 +153,25 @@ static struct mlx5_profile profile[] = { | |||
151 | }, | 153 | }, |
152 | }; | 154 | }; |
153 | 155 | ||
156 | #define FW_INIT_TIMEOUT_MILI 2000 | ||
157 | #define FW_INIT_WAIT_MS 2 | ||
158 | |||
159 | static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) | ||
160 | { | ||
161 | unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); | ||
162 | int err = 0; | ||
163 | |||
164 | while (fw_initializing(dev)) { | ||
165 | if (time_after(jiffies, end)) { | ||
166 | err = -EBUSY; | ||
167 | break; | ||
168 | } | ||
169 | msleep(FW_INIT_WAIT_MS); | ||
170 | } | ||
171 | |||
172 | return err; | ||
173 | } | ||
174 | |||
154 | static int set_dma_caps(struct pci_dev *pdev) | 175 | static int set_dma_caps(struct pci_dev *pdev) |
155 | { | 176 | { |
156 | int err; | 177 | int err; |
@@ -181,6 +202,34 @@ static int set_dma_caps(struct pci_dev *pdev) | |||
181 | return err; | 202 | return err; |
182 | } | 203 | } |
183 | 204 | ||
205 | static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) | ||
206 | { | ||
207 | struct pci_dev *pdev = dev->pdev; | ||
208 | int err = 0; | ||
209 | |||
210 | mutex_lock(&dev->pci_status_mutex); | ||
211 | if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { | ||
212 | err = pci_enable_device(pdev); | ||
213 | if (!err) | ||
214 | dev->pci_status = MLX5_PCI_STATUS_ENABLED; | ||
215 | } | ||
216 | mutex_unlock(&dev->pci_status_mutex); | ||
217 | |||
218 | return err; | ||
219 | } | ||
220 | |||
221 | static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) | ||
222 | { | ||
223 | struct pci_dev *pdev = dev->pdev; | ||
224 | |||
225 | mutex_lock(&dev->pci_status_mutex); | ||
226 | if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { | ||
227 | pci_disable_device(pdev); | ||
228 | dev->pci_status = MLX5_PCI_STATUS_DISABLED; | ||
229 | } | ||
230 | mutex_unlock(&dev->pci_status_mutex); | ||
231 | } | ||
232 | |||
184 | static int request_bar(struct pci_dev *pdev) | 233 | static int request_bar(struct pci_dev *pdev) |
185 | { | 234 | { |
186 | int err = 0; | 235 | int err = 0; |
@@ -807,7 +856,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | |||
807 | if (!priv->dbg_root) | 856 | if (!priv->dbg_root) |
808 | return -ENOMEM; | 857 | return -ENOMEM; |
809 | 858 | ||
810 | err = pci_enable_device(pdev); | 859 | err = mlx5_pci_enable_device(dev); |
811 | if (err) { | 860 | if (err) { |
812 | dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); | 861 | dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); |
813 | goto err_dbg; | 862 | goto err_dbg; |
@@ -841,7 +890,7 @@ err_clr_master: | |||
841 | pci_clear_master(dev->pdev); | 890 | pci_clear_master(dev->pdev); |
842 | release_bar(dev->pdev); | 891 | release_bar(dev->pdev); |
843 | err_disable: | 892 | err_disable: |
844 | pci_disable_device(dev->pdev); | 893 | mlx5_pci_disable_device(dev); |
845 | 894 | ||
846 | err_dbg: | 895 | err_dbg: |
847 | debugfs_remove(priv->dbg_root); | 896 | debugfs_remove(priv->dbg_root); |
@@ -853,7 +902,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | |||
853 | iounmap(dev->iseg); | 902 | iounmap(dev->iseg); |
854 | pci_clear_master(dev->pdev); | 903 | pci_clear_master(dev->pdev); |
855 | release_bar(dev->pdev); | 904 | release_bar(dev->pdev); |
856 | pci_disable_device(dev->pdev); | 905 | mlx5_pci_disable_device(dev); |
857 | debugfs_remove(priv->dbg_root); | 906 | debugfs_remove(priv->dbg_root); |
858 | } | 907 | } |
859 | 908 | ||
@@ -863,13 +912,32 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | |||
863 | struct pci_dev *pdev = dev->pdev; | 912 | struct pci_dev *pdev = dev->pdev; |
864 | int err; | 913 | int err; |
865 | 914 | ||
915 | mutex_lock(&dev->intf_state_mutex); | ||
916 | if (dev->interface_state == MLX5_INTERFACE_STATE_UP) { | ||
917 | dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", | ||
918 | __func__); | ||
919 | goto out; | ||
920 | } | ||
921 | |||
866 | dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), | 922 | dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), |
867 | fw_rev_min(dev), fw_rev_sub(dev)); | 923 | fw_rev_min(dev), fw_rev_sub(dev)); |
868 | 924 | ||
925 | /* on load removing any previous indication of internal error, device is | ||
926 | * up | ||
927 | */ | ||
928 | dev->state = MLX5_DEVICE_STATE_UP; | ||
929 | |||
869 | err = mlx5_cmd_init(dev); | 930 | err = mlx5_cmd_init(dev); |
870 | if (err) { | 931 | if (err) { |
871 | dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); | 932 | dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); |
872 | return err; | 933 | goto out_err; |
934 | } | ||
935 | |||
936 | err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); | ||
937 | if (err) { | ||
938 | dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", | ||
939 | FW_INIT_TIMEOUT_MILI); | ||
940 | goto out_err; | ||
873 | } | 941 | } |
874 | 942 | ||
875 | mlx5_pagealloc_init(dev); | 943 | mlx5_pagealloc_init(dev); |
@@ -994,6 +1062,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | |||
994 | if (err) | 1062 | if (err) |
995 | pr_info("failed request module on %s\n", MLX5_IB_MOD); | 1063 | pr_info("failed request module on %s\n", MLX5_IB_MOD); |
996 | 1064 | ||
1065 | dev->interface_state = MLX5_INTERFACE_STATE_UP; | ||
1066 | out: | ||
1067 | mutex_unlock(&dev->intf_state_mutex); | ||
1068 | |||
997 | return 0; | 1069 | return 0; |
998 | 1070 | ||
999 | err_reg_dev: | 1071 | err_reg_dev: |
@@ -1024,7 +1096,7 @@ err_stop_poll: | |||
1024 | mlx5_stop_health_poll(dev); | 1096 | mlx5_stop_health_poll(dev); |
1025 | if (mlx5_cmd_teardown_hca(dev)) { | 1097 | if (mlx5_cmd_teardown_hca(dev)) { |
1026 | dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); | 1098 | dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); |
1027 | return err; | 1099 | goto out_err; |
1028 | } | 1100 | } |
1029 | 1101 | ||
1030 | err_pagealloc_stop: | 1102 | err_pagealloc_stop: |
@@ -1040,13 +1112,23 @@ err_pagealloc_cleanup: | |||
1040 | mlx5_pagealloc_cleanup(dev); | 1112 | mlx5_pagealloc_cleanup(dev); |
1041 | mlx5_cmd_cleanup(dev); | 1113 | mlx5_cmd_cleanup(dev); |
1042 | 1114 | ||
1115 | out_err: | ||
1116 | dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; | ||
1117 | mutex_unlock(&dev->intf_state_mutex); | ||
1118 | |||
1043 | return err; | 1119 | return err; |
1044 | } | 1120 | } |
1045 | 1121 | ||
1046 | static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | 1122 | static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) |
1047 | { | 1123 | { |
1048 | int err; | 1124 | int err = 0; |
1049 | 1125 | ||
1126 | mutex_lock(&dev->intf_state_mutex); | ||
1127 | if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { | ||
1128 | dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", | ||
1129 | __func__); | ||
1130 | goto out; | ||
1131 | } | ||
1050 | mlx5_unregister_device(dev); | 1132 | mlx5_unregister_device(dev); |
1051 | mlx5_cleanup_mr_table(dev); | 1133 | mlx5_cleanup_mr_table(dev); |
1052 | mlx5_cleanup_srq_table(dev); | 1134 | mlx5_cleanup_srq_table(dev); |
@@ -1072,10 +1154,12 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) | |||
1072 | mlx5_cmd_cleanup(dev); | 1154 | mlx5_cmd_cleanup(dev); |
1073 | 1155 | ||
1074 | out: | 1156 | out: |
1157 | dev->interface_state = MLX5_INTERFACE_STATE_DOWN; | ||
1158 | mutex_unlock(&dev->intf_state_mutex); | ||
1075 | return err; | 1159 | return err; |
1076 | } | 1160 | } |
1077 | 1161 | ||
1078 | static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, | 1162 | void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, |
1079 | unsigned long param) | 1163 | unsigned long param) |
1080 | { | 1164 | { |
1081 | struct mlx5_priv *priv = &dev->priv; | 1165 | struct mlx5_priv *priv = &dev->priv; |
@@ -1125,6 +1209,8 @@ static int init_one(struct pci_dev *pdev, | |||
1125 | 1209 | ||
1126 | INIT_LIST_HEAD(&priv->ctx_list); | 1210 | INIT_LIST_HEAD(&priv->ctx_list); |
1127 | spin_lock_init(&priv->ctx_lock); | 1211 | spin_lock_init(&priv->ctx_lock); |
1212 | mutex_init(&dev->pci_status_mutex); | ||
1213 | mutex_init(&dev->intf_state_mutex); | ||
1128 | err = mlx5_pci_init(dev, priv); | 1214 | err = mlx5_pci_init(dev, priv); |
1129 | if (err) { | 1215 | if (err) { |
1130 | dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); | 1216 | dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); |
@@ -1172,6 +1258,112 @@ static void remove_one(struct pci_dev *pdev) | |||
1172 | kfree(dev); | 1258 | kfree(dev); |
1173 | } | 1259 | } |
1174 | 1260 | ||
1261 | static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, | ||
1262 | pci_channel_state_t state) | ||
1263 | { | ||
1264 | struct mlx5_core_dev *dev = pci_get_drvdata(pdev); | ||
1265 | struct mlx5_priv *priv = &dev->priv; | ||
1266 | |||
1267 | dev_info(&pdev->dev, "%s was called\n", __func__); | ||
1268 | mlx5_enter_error_state(dev); | ||
1269 | mlx5_unload_one(dev, priv); | ||
1270 | mlx5_pci_disable_device(dev); | ||
1271 | return state == pci_channel_io_perm_failure ? | ||
1272 | PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; | ||
1273 | } | ||
1274 | |||
1275 | static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) | ||
1276 | { | ||
1277 | struct mlx5_core_dev *dev = pci_get_drvdata(pdev); | ||
1278 | int err = 0; | ||
1279 | |||
1280 | dev_info(&pdev->dev, "%s was called\n", __func__); | ||
1281 | |||
1282 | err = mlx5_pci_enable_device(dev); | ||
1283 | if (err) { | ||
1284 | dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" | ||
1285 | , __func__, err); | ||
1286 | return PCI_ERS_RESULT_DISCONNECT; | ||
1287 | } | ||
1288 | pci_set_master(pdev); | ||
1289 | pci_set_power_state(pdev, PCI_D0); | ||
1290 | pci_restore_state(pdev); | ||
1291 | |||
1292 | return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; | ||
1293 | } | ||
1294 | |||
1295 | void mlx5_disable_device(struct mlx5_core_dev *dev) | ||
1296 | { | ||
1297 | mlx5_pci_err_detected(dev->pdev, 0); | ||
1298 | } | ||
1299 | |||
1300 | /* wait for the device to show vital signs. For now we check | ||
1301 | * that we can read the device ID and that the health buffer | ||
1302 | * shows a non zero value which is different than 0xffffffff | ||
1303 | */ | ||
1304 | static void wait_vital(struct pci_dev *pdev) | ||
1305 | { | ||
1306 | struct mlx5_core_dev *dev = pci_get_drvdata(pdev); | ||
1307 | struct mlx5_core_health *health = &dev->priv.health; | ||
1308 | const int niter = 100; | ||
1309 | u32 count; | ||
1310 | u16 did; | ||
1311 | int i; | ||
1312 | |||
1313 | /* Wait for firmware to be ready after reset */ | ||
1314 | msleep(1000); | ||
1315 | for (i = 0; i < niter; i++) { | ||
1316 | if (pci_read_config_word(pdev, 2, &did)) { | ||
1317 | dev_warn(&pdev->dev, "failed reading config word\n"); | ||
1318 | break; | ||
1319 | } | ||
1320 | if (did == pdev->device) { | ||
1321 | dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i); | ||
1322 | break; | ||
1323 | } | ||
1324 | msleep(50); | ||
1325 | } | ||
1326 | if (i == niter) | ||
1327 | dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); | ||
1328 | |||
1329 | for (i = 0; i < niter; i++) { | ||
1330 | count = ioread32be(health->health_counter); | ||
1331 | if (count && count != 0xffffffff) { | ||
1332 | dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); | ||
1333 | break; | ||
1334 | } | ||
1335 | msleep(50); | ||
1336 | } | ||
1337 | |||
1338 | if (i == niter) | ||
1339 | dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); | ||
1340 | } | ||
1341 | |||
1342 | static void mlx5_pci_resume(struct pci_dev *pdev) | ||
1343 | { | ||
1344 | struct mlx5_core_dev *dev = pci_get_drvdata(pdev); | ||
1345 | struct mlx5_priv *priv = &dev->priv; | ||
1346 | int err; | ||
1347 | |||
1348 | dev_info(&pdev->dev, "%s was called\n", __func__); | ||
1349 | |||
1350 | pci_save_state(pdev); | ||
1351 | wait_vital(pdev); | ||
1352 | |||
1353 | err = mlx5_load_one(dev, priv); | ||
1354 | if (err) | ||
1355 | dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" | ||
1356 | , __func__, err); | ||
1357 | else | ||
1358 | dev_info(&pdev->dev, "%s: device recovered\n", __func__); | ||
1359 | } | ||
1360 | |||
1361 | static const struct pci_error_handlers mlx5_err_handler = { | ||
1362 | .error_detected = mlx5_pci_err_detected, | ||
1363 | .slot_reset = mlx5_pci_slot_reset, | ||
1364 | .resume = mlx5_pci_resume | ||
1365 | }; | ||
1366 | |||
1175 | static const struct pci_device_id mlx5_core_pci_table[] = { | 1367 | static const struct pci_device_id mlx5_core_pci_table[] = { |
1176 | { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ | 1368 | { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ |
1177 | { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ | 1369 | { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ |
@@ -1188,7 +1380,8 @@ static struct pci_driver mlx5_core_driver = { | |||
1188 | .name = DRIVER_NAME, | 1380 | .name = DRIVER_NAME, |
1189 | .id_table = mlx5_core_pci_table, | 1381 | .id_table = mlx5_core_pci_table, |
1190 | .probe = init_one, | 1382 | .probe = init_one, |
1191 | .remove = remove_one | 1383 | .remove = remove_one, |
1384 | .err_handler = &mlx5_err_handler | ||
1192 | }; | 1385 | }; |
1193 | 1386 | ||
1194 | static int __init init(void) | 1387 | static int __init init(void) |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 30c0be721b08..cee5b7a839bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | |||
@@ -86,6 +86,10 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev); | |||
86 | int mlx5_query_board_id(struct mlx5_core_dev *dev); | 86 | int mlx5_query_board_id(struct mlx5_core_dev *dev); |
87 | int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); | 87 | int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); |
88 | int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); | 88 | int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); |
89 | void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, | ||
90 | unsigned long param); | ||
91 | void mlx5_enter_error_state(struct mlx5_core_dev *dev); | ||
92 | void mlx5_disable_device(struct mlx5_core_dev *dev); | ||
89 | 93 | ||
90 | void mlx5e_init(void); | 94 | void mlx5e_init(void); |
91 | void mlx5e_cleanup(void); | 95 | void mlx5e_cleanup(void); |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 76432a510ac2..1cda5d268ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | |||
@@ -493,15 +493,20 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) | |||
493 | struct fw_page *fwp; | 493 | struct fw_page *fwp; |
494 | struct rb_node *p; | 494 | struct rb_node *p; |
495 | int nclaimed = 0; | 495 | int nclaimed = 0; |
496 | int err; | 496 | int err = 0; |
497 | 497 | ||
498 | do { | 498 | do { |
499 | p = rb_first(&dev->priv.page_root); | 499 | p = rb_first(&dev->priv.page_root); |
500 | if (p) { | 500 | if (p) { |
501 | fwp = rb_entry(p, struct fw_page, rb_node); | 501 | fwp = rb_entry(p, struct fw_page, rb_node); |
502 | err = reclaim_pages(dev, fwp->func_id, | 502 | if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { |
503 | optimal_reclaimed_pages(), | 503 | free_4k(dev, fwp->addr); |
504 | &nclaimed); | 504 | nclaimed = 1; |
505 | } else { | ||
506 | err = reclaim_pages(dev, fwp->func_id, | ||
507 | optimal_reclaimed_pages(), | ||
508 | &nclaimed); | ||
509 | } | ||
505 | if (err) { | 510 | if (err) { |
506 | mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", | 511 | mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", |
507 | err); | 512 | err); |
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index baad4cb8e9b0..5a8677bafe04 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h | |||
@@ -833,6 +833,7 @@ struct mlx4_dev { | |||
833 | struct mlx4_quotas quotas; | 833 | struct mlx4_quotas quotas; |
834 | struct radix_tree_root qp_table_tree; | 834 | struct radix_tree_root qp_table_tree; |
835 | u8 rev_id; | 835 | u8 rev_id; |
836 | u8 port_random_macs; | ||
836 | char board_id[MLX4_BOARD_ID_LEN]; | 837 | char board_id[MLX4_BOARD_ID_LEN]; |
837 | int numa_node; | 838 | int numa_node; |
838 | int oper_log_mgm_entry_size; | 839 | int oper_log_mgm_entry_size; |
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2a0b95662548..0b473cbfa7ef 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h | |||
@@ -439,7 +439,8 @@ struct mlx5_init_seg { | |||
439 | __be32 cmdq_addr_h; | 439 | __be32 cmdq_addr_h; |
440 | __be32 cmdq_addr_l_sz; | 440 | __be32 cmdq_addr_l_sz; |
441 | __be32 cmd_dbell; | 441 | __be32 cmd_dbell; |
442 | __be32 rsvd1[121]; | 442 | __be32 rsvd1[120]; |
443 | __be32 initializing; | ||
443 | struct health_buffer health; | 444 | struct health_buffer health; |
444 | __be32 rsvd2[884]; | 445 | __be32 rsvd2[884]; |
445 | __be32 health_counter; | 446 | __be32 health_counter; |
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 41a32873f608..5c857f2a20d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
@@ -393,6 +393,7 @@ struct mlx5_core_health { | |||
393 | struct timer_list timer; | 393 | struct timer_list timer; |
394 | u32 prev; | 394 | u32 prev; |
395 | int miss_counter; | 395 | int miss_counter; |
396 | bool sick; | ||
396 | struct workqueue_struct *wq; | 397 | struct workqueue_struct *wq; |
397 | struct work_struct work; | 398 | struct work_struct work; |
398 | }; | 399 | }; |
@@ -486,8 +487,26 @@ struct mlx5_priv { | |||
486 | spinlock_t ctx_lock; | 487 | spinlock_t ctx_lock; |
487 | }; | 488 | }; |
488 | 489 | ||
490 | enum mlx5_device_state { | ||
491 | MLX5_DEVICE_STATE_UP, | ||
492 | MLX5_DEVICE_STATE_INTERNAL_ERROR, | ||
493 | }; | ||
494 | |||
495 | enum mlx5_interface_state { | ||
496 | MLX5_INTERFACE_STATE_DOWN, | ||
497 | MLX5_INTERFACE_STATE_UP, | ||
498 | }; | ||
499 | |||
500 | enum mlx5_pci_status { | ||
501 | MLX5_PCI_STATUS_DISABLED, | ||
502 | MLX5_PCI_STATUS_ENABLED, | ||
503 | }; | ||
504 | |||
489 | struct mlx5_core_dev { | 505 | struct mlx5_core_dev { |
490 | struct pci_dev *pdev; | 506 | struct pci_dev *pdev; |
507 | /* sync pci state */ | ||
508 | struct mutex pci_status_mutex; | ||
509 | enum mlx5_pci_status pci_status; | ||
491 | u8 rev_id; | 510 | u8 rev_id; |
492 | char board_id[MLX5_BOARD_ID_LEN]; | 511 | char board_id[MLX5_BOARD_ID_LEN]; |
493 | struct mlx5_cmd cmd; | 512 | struct mlx5_cmd cmd; |
@@ -496,6 +515,10 @@ struct mlx5_core_dev { | |||
496 | u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; | 515 | u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; |
497 | phys_addr_t iseg_base; | 516 | phys_addr_t iseg_base; |
498 | struct mlx5_init_seg __iomem *iseg; | 517 | struct mlx5_init_seg __iomem *iseg; |
518 | enum mlx5_device_state state; | ||
519 | /* sync interface state */ | ||
520 | struct mutex intf_state_mutex; | ||
521 | enum mlx5_interface_state interface_state; | ||
499 | void (*event) (struct mlx5_core_dev *dev, | 522 | void (*event) (struct mlx5_core_dev *dev, |
500 | enum mlx5_dev_event event, | 523 | enum mlx5_dev_event event, |
501 | unsigned long param); | 524 | unsigned long param); |
@@ -803,6 +826,11 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); | |||
803 | int mlx5_query_odp_caps(struct mlx5_core_dev *dev, | 826 | int mlx5_query_odp_caps(struct mlx5_core_dev *dev, |
804 | struct mlx5_odp_caps *odp_caps); | 827 | struct mlx5_odp_caps *odp_caps); |
805 | 828 | ||
829 | static inline int fw_initializing(struct mlx5_core_dev *dev) | ||
830 | { | ||
831 | return ioread32be(&dev->iseg->initializing) >> 31; | ||
832 | } | ||
833 | |||
806 | static inline u32 mlx5_mkey_to_idx(u32 mkey) | 834 | static inline u32 mlx5_mkey_to_idx(u32 mkey) |
807 | { | 835 | { |
808 | return mkey >> 8; | 836 | return mkey >> 8; |