aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatan Barak <matanb@mellanox.com>2014-11-13 07:45:32 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-13 15:16:21 -0500
commit7ae0e400cd9396c41fe596d35dcc34feaa89a04f (patch)
tree0cc4b53c1d8ad12d2b1624dcedc5a2e0c8b6e950
parente8c4265bea8437f5583d0c2f272058200ebc10ff (diff)
net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs
Previously, the driver queried the firmware in order to get the number of supported EQs. Under SRIOV, since this was done before the driver notified the firmware how many VFs it actually needs, the firmware had to take into account a worst case scenario and always allocated four EQs per VF, where one was used for events while the others were used for completions. Now, when the firmware supports the asymmetric allocation scheme, denoted by exposing num_sys_eqs > 0 (--> MLX4_DEV_CAP_FLAG2_SYS_EQS), we use the QUERY_FUNC command to query the firmware before enabling SRIOV. Thus we can get more EQs and MSI-X vectors per function. Moreover, when running in the new firmware/driver mode, the limitation that the number of EQs should be a power of two is lifted. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/hw/mlx4/main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c144
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/profile.c19
-rw-r--r--include/linux/mlx4/device.h4
7 files changed, 190 insertions, 42 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8b72cf392b34..0c3375524a64 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1975,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1975 dev->caps.num_ports > dev->caps.comp_pool) 1975 dev->caps.num_ports > dev->caps.comp_pool)
1976 return; 1976 return;
1977 1977
1978 eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ 1978 eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
1979 dev->caps.num_ports);
1980 1979
1981 /* Init eq table */ 1980 /* Init eq table */
1982 added_eqs = 0; 1981 added_eqs = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 49290a405903..d68b264cee4d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1123,8 +1123,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
1123 goto err_out_free; 1123 goto err_out_free;
1124 } 1124 }
1125 1125
1126 err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, 1126 err = mlx4_bitmap_init(&priv->eq_table.bitmap,
1127 dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); 1127 roundup_pow_of_two(dev->caps.num_eqs),
1128 dev->caps.num_eqs - 1,
1129 dev->caps.reserved_eqs,
1130 roundup_pow_of_two(dev->caps.num_eqs) -
1131 dev->caps.num_eqs);
1128 if (err) 1132 if (err)
1129 goto err_out_free; 1133 goto err_out_free;
1130 1134
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index b3bbeb97da14..d2f594fadfbf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
142 [13] = "Large cache line (>64B) EQE stride support", 142 [13] = "Large cache line (>64B) EQE stride support",
143 [14] = "Ethernet protocol control support", 143 [14] = "Ethernet protocol control support",
144 [15] = "Ethernet Backplane autoneg support", 144 [15] = "Ethernet Backplane autoneg support",
145 [16] = "CONFIG DEV support" 145 [16] = "CONFIG DEV support",
146 [17] = "Asymmetric EQs support"
146 }; 147 };
147 int i; 148 int i;
148 149
@@ -200,7 +201,6 @@ int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave)
200 outbox = mailbox->buf; 201 outbox = mailbox->buf;
201 202
202 in_modifier = slave; 203 in_modifier = slave;
203 mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier);
204 204
205 err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, 205 err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0,
206 MLX4_CMD_QUERY_FUNC, 206 MLX4_CMD_QUERY_FUNC,
@@ -243,6 +243,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
243 u8 field, port; 243 u8 field, port;
244 u32 size, proxy_qp, qkey; 244 u32 size, proxy_qp, qkey;
245 int err = 0; 245 int err = 0;
246 struct mlx4_func func;
246 247
247#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 248#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0
248#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 249#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1
@@ -287,6 +288,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
287#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 288#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08
288 289
289#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 290#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
291#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
290 292
291 if (vhcr->op_modifier == 1) { 293 if (vhcr->op_modifier == 1) {
292 struct mlx4_active_ports actv_ports = 294 struct mlx4_active_ports actv_ports =
@@ -365,11 +367,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
365 size = dev->caps.num_cqs; 367 size = dev->caps.num_cqs;
366 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); 368 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
367 369
368 size = dev->caps.num_eqs; 370 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) ||
369 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); 371 mlx4_QUERY_FUNC(dev, &func, slave)) {
370 372 size = vhcr->in_modifier &
371 size = dev->caps.reserved_eqs; 373 QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
372 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); 374 dev->caps.num_eqs :
375 rounddown_pow_of_two(dev->caps.num_eqs);
376 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
377 size = dev->caps.reserved_eqs;
378 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
379 } else {
380 size = vhcr->in_modifier &
381 QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
382 func.max_eq :
383 rounddown_pow_of_two(func.max_eq);
384 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
385 size = func.rsvd_eqs;
386 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
387 }
373 388
374 size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; 389 size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
375 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); 390 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
@@ -399,14 +414,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
399 u8 field, op_modifier; 414 u8 field, op_modifier;
400 u32 size, qkey; 415 u32 size, qkey;
401 int err = 0, quotas = 0; 416 int err = 0, quotas = 0;
417 u32 in_modifier;
402 418
403 op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ 419 op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
420 in_modifier = op_modifier ? gen_or_port :
421 QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
404 422
405 mailbox = mlx4_alloc_cmd_mailbox(dev); 423 mailbox = mlx4_alloc_cmd_mailbox(dev);
406 if (IS_ERR(mailbox)) 424 if (IS_ERR(mailbox))
407 return PTR_ERR(mailbox); 425 return PTR_ERR(mailbox);
408 426
409 err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, 427 err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier,
410 MLX4_CMD_QUERY_FUNC_CAP, 428 MLX4_CMD_QUERY_FUNC_CAP,
411 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 429 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
412 if (err) 430 if (err)
@@ -578,6 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
578#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 596#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21
579#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 597#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22
580#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 598#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23
599#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26
581#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 600#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27
582#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 601#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
583#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b 602#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
@@ -678,6 +697,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
678 dev_cap->reserved_mrws = 1 << (field & 0xf); 697 dev_cap->reserved_mrws = 1 << (field & 0xf);
679 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); 698 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
680 dev_cap->max_mtt_seg = 1 << (field & 0x3f); 699 dev_cap->max_mtt_seg = 1 << (field & 0x3f);
700 MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
701 dev_cap->num_sys_eqs = size & 0xfff;
681 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); 702 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
682 dev_cap->max_requester_per_qp = 1 << (field & 0x3f); 703 dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
683 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); 704 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
@@ -905,8 +926,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
905 * we can't use any EQs whose doorbell falls on that page, 926 * we can't use any EQs whose doorbell falls on that page,
906 * even if the EQ itself isn't reserved. 927 * even if the EQ itself isn't reserved.
907 */ 928 */
908 dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, 929 if (dev_cap->num_sys_eqs == 0)
909 dev_cap->reserved_eqs); 930 dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
931 dev_cap->reserved_eqs);
932 else
933 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
910 934
911 mlx4_dbg(dev, "Max ICM size %lld MB\n", 935 mlx4_dbg(dev, "Max ICM size %lld MB\n",
912 (unsigned long long) dev_cap->max_icm_sz >> 20); 936 (unsigned long long) dev_cap->max_icm_sz >> 20);
@@ -916,8 +940,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
916 dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); 940 dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
917 mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", 941 mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
918 dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); 942 dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
919 mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", 943 mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n",
920 dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); 944 dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs,
945 dev_cap->eqc_entry_sz);
921 mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", 946 mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
922 dev_cap->reserved_mrws, dev_cap->reserved_mtts); 947 dev_cap->reserved_mrws, dev_cap->reserved_mtts);
923 mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", 948 mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
@@ -1463,6 +1488,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
1463#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) 1488#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
1464#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) 1489#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
1465#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) 1490#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
1491#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a)
1466#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) 1492#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
1467#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) 1493#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77)
1468#define INIT_HCA_MCAST_OFFSET 0x0c0 1494#define INIT_HCA_MCAST_OFFSET 0x0c0
@@ -1566,6 +1592,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
1566 MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); 1592 MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET);
1567 MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); 1593 MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
1568 MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); 1594 MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
1595 MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET);
1569 MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); 1596 MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
1570 MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); 1597 MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
1571 1598
@@ -1676,6 +1703,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
1676 MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); 1703 MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET);
1677 MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); 1704 MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET);
1678 MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); 1705 MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET);
1706 MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
1679 MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); 1707 MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
1680 MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); 1708 MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
1681 1709
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 48c11b5e73e7..475215ee370f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -56,6 +56,7 @@ struct mlx4_dev_cap {
56 int max_mpts; 56 int max_mpts;
57 int reserved_eqs; 57 int reserved_eqs;
58 int max_eqs; 58 int max_eqs;
59 int num_sys_eqs;
59 int reserved_mtts; 60 int reserved_mtts;
60 int max_mrw_sz; 61 int max_mrw_sz;
61 int reserved_mrws; 62 int reserved_mrws;
@@ -180,6 +181,7 @@ struct mlx4_init_hca_param {
180 u8 log_num_srqs; 181 u8 log_num_srqs;
181 u8 log_num_cqs; 182 u8 log_num_cqs;
182 u8 log_num_eqs; 183 u8 log_num_eqs;
184 u16 num_sys_eqs;
183 u8 log_rd_per_qp; 185 u8 log_rd_per_qp;
184 u8 log_mc_table_sz; 186 u8 log_mc_table_sz;
185 u8 log_mpt_sz; 187 u8 log_mpt_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 43047b2a2aac..ebb279060a25 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -197,6 +197,29 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
197 dev->caps.port_mask[i] = dev->caps.port_type[i]; 197 dev->caps.port_mask[i] = dev->caps.port_type[i];
198} 198}
199 199
200enum {
201 MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
202};
203
204static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
205{
206 int err = 0;
207 struct mlx4_func func;
208
209 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
210 err = mlx4_QUERY_FUNC(dev, &func, 0);
211 if (err) {
212 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
213 return err;
214 }
215 dev_cap->max_eqs = func.max_eq;
216 dev_cap->reserved_eqs = func.rsvd_eqs;
217 dev_cap->reserved_uars = func.rsvd_uars;
218 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
219 }
220 return err;
221}
222
200static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) 223static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
201{ 224{
202 struct mlx4_caps *dev_cap = &dev->caps; 225 struct mlx4_caps *dev_cap = &dev->caps;
@@ -261,7 +284,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
261 } 284 }
262 285
263 dev->caps.num_ports = dev_cap->num_ports; 286 dev->caps.num_ports = dev_cap->num_ports;
264 dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; 287 dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
288 dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
289 dev->caps.num_sys_eqs :
290 MLX4_MAX_EQ_NUM;
265 for (i = 1; i <= dev->caps.num_ports; ++i) { 291 for (i = 1; i <= dev->caps.num_ports; ++i) {
266 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 292 dev->caps.vl_cap[i] = dev_cap->max_vl[i];
267 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 293 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i];
@@ -1130,8 +1156,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1130 if (err) 1156 if (err)
1131 goto err_srq; 1157 goto err_srq;
1132 1158
1133 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1159 num_eqs = dev->phys_caps.num_phys_eqs;
1134 dev->caps.num_eqs;
1135 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1160 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1136 cmpt_base + 1161 cmpt_base +
1137 ((u64) (MLX4_CMPT_TYPE_EQ * 1162 ((u64) (MLX4_CMPT_TYPE_EQ *
@@ -1193,8 +1218,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1193 } 1218 }
1194 1219
1195 1220
1196 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1221 num_eqs = dev->phys_caps.num_phys_eqs;
1197 dev->caps.num_eqs;
1198 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1222 err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1199 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1223 init_hca->eqc_base, dev_cap->eqc_entry_sz,
1200 num_eqs, num_eqs, 0, 0); 1224 num_eqs, num_eqs, 0, 0);
@@ -1719,6 +1743,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
1719 mlx4_err(dev, "INIT_HCA command failed, aborting\n"); 1743 mlx4_err(dev, "INIT_HCA command failed, aborting\n");
1720 goto err_free_icm; 1744 goto err_free_icm;
1721 } 1745 }
1746
1747 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
1748 err = mlx4_query_func(dev, &dev_cap);
1749 if (err < 0) {
1750 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
1751 goto err_stop_fw;
1752 } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
1753 dev->caps.num_eqs = dev_cap.max_eqs;
1754 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
1755 dev->caps.reserved_uars = dev_cap.reserved_uars;
1756 }
1757 }
1758
1722 /* 1759 /*
1723 * If TS is supported by FW 1760 * If TS is supported by FW
1724 * read HCA frequency by QUERY_HCA command 1761 * read HCA frequency by QUERY_HCA command
@@ -2085,12 +2122,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2085{ 2122{
2086 struct mlx4_priv *priv = mlx4_priv(dev); 2123 struct mlx4_priv *priv = mlx4_priv(dev);
2087 struct msix_entry *entries; 2124 struct msix_entry *entries;
2088 int nreq = min_t(int, dev->caps.num_ports *
2089 min_t(int, num_online_cpus() + 1,
2090 MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX);
2091 int i; 2125 int i;
2092 2126
2093 if (msi_x) { 2127 if (msi_x) {
2128 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
2129
2094 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2130 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2095 nreq); 2131 nreq);
2096 2132
@@ -2345,6 +2381,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2345 int err; 2381 int err;
2346 int port; 2382 int port;
2347 int i; 2383 int i;
2384 struct mlx4_dev_cap *dev_cap = NULL;
2348 int existing_vfs = 0; 2385 int existing_vfs = 0;
2349 2386
2350 dev = &priv->dev; 2387 dev = &priv->dev;
@@ -2381,15 +2418,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2381 } 2418 }
2382 } 2419 }
2383 2420
2384 if (total_vfs) {
2385 existing_vfs = pci_num_vf(pdev);
2386 dev->flags = MLX4_FLAG_MASTER;
2387 dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs,
2388 existing_vfs);
2389 if (!SRIOV_VALID_STATE(dev->flags))
2390 goto err_sriov;
2391 }
2392
2393 atomic_set(&priv->opreq_count, 0); 2421 atomic_set(&priv->opreq_count, 0);
2394 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 2422 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
2395 2423
@@ -2403,6 +2431,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2403 mlx4_err(dev, "Failed to reset HCA, aborting\n"); 2431 mlx4_err(dev, "Failed to reset HCA, aborting\n");
2404 goto err_sriov; 2432 goto err_sriov;
2405 } 2433 }
2434
2435 if (total_vfs) {
2436 existing_vfs = pci_num_vf(pdev);
2437 dev->flags = MLX4_FLAG_MASTER;
2438 dev->num_vfs = total_vfs;
2439 }
2406 } 2440 }
2407 2441
2408slave_start: 2442slave_start:
@@ -2416,9 +2450,10 @@ slave_start:
2416 * before posting commands. Also, init num_slaves before calling 2450 * before posting commands. Also, init num_slaves before calling
2417 * mlx4_init_hca */ 2451 * mlx4_init_hca */
2418 if (mlx4_is_mfunc(dev)) { 2452 if (mlx4_is_mfunc(dev)) {
2419 if (mlx4_is_master(dev)) 2453 if (mlx4_is_master(dev)) {
2420 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 2454 dev->num_slaves = MLX4_MAX_NUM_SLAVES;
2421 else { 2455
2456 } else {
2422 dev->num_slaves = 0; 2457 dev->num_slaves = 0;
2423 err = mlx4_multi_func_init(dev); 2458 err = mlx4_multi_func_init(dev);
2424 if (err) { 2459 if (err) {
@@ -2434,6 +2469,52 @@ slave_start:
2434 goto err_mfunc; 2469 goto err_mfunc;
2435 } 2470 }
2436 2471
2472 if (mlx4_is_master(dev)) {
2473 if (!dev_cap) {
2474 dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
2475
2476 if (!dev_cap) {
2477 err = -ENOMEM;
2478 goto err_fw;
2479 }
2480
2481 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2482 if (err) {
2483 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2484 goto err_fw;
2485 }
2486
2487 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2488 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
2489 existing_vfs);
2490
2491 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2492 dev->flags = dev_flags;
2493 if (!SRIOV_VALID_STATE(dev->flags)) {
2494 mlx4_err(dev, "Invalid SRIOV state\n");
2495 goto err_sriov;
2496 }
2497 err = mlx4_reset(dev);
2498 if (err) {
2499 mlx4_err(dev, "Failed to reset HCA, aborting.\n");
2500 goto err_sriov;
2501 }
2502 goto slave_start;
2503 }
2504 } else {
2505 /* Legacy mode FW requires SRIOV to be enabled before
2506 * doing QUERY_DEV_CAP, since max_eq's value is different if
2507 * SRIOV is enabled.
2508 */
2509 memset(dev_cap, 0, sizeof(*dev_cap));
2510 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2511 if (err) {
2512 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2513 goto err_fw;
2514 }
2515 }
2516 }
2517
2437 err = mlx4_init_hca(dev); 2518 err = mlx4_init_hca(dev);
2438 if (err) { 2519 if (err) {
2439 if (err == -EACCES) { 2520 if (err == -EACCES) {
@@ -2457,6 +2538,30 @@ slave_start:
2457 goto err_fw; 2538 goto err_fw;
2458 } 2539 }
2459 2540
2541 if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2542 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
2543
2544 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
2545 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
2546 dev->flags = dev_flags;
2547 err = mlx4_cmd_init(dev);
2548 if (err) {
2549 /* Only VHCR is cleaned up, so could still
2550 * send FW commands
2551 */
2552 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
2553 goto err_close;
2554 }
2555 } else {
2556 dev->flags = dev_flags;
2557 }
2558
2559 if (!SRIOV_VALID_STATE(dev->flags)) {
2560 mlx4_err(dev, "Invalid SRIOV state\n");
2561 goto err_close;
2562 }
2563 }
2564
2460 /* check if the device is functioning at its maximum possible speed. 2565 /* check if the device is functioning at its maximum possible speed.
2461 * No return code for this call, just warn the user in case of PCI 2566 * No return code for this call, just warn the user in case of PCI
2462 * express device capabilities are under-satisfied by the bus. 2567 * express device capabilities are under-satisfied by the bus.
@@ -2631,6 +2736,7 @@ err_sriov:
2631 if (!mlx4_is_slave(dev)) 2736 if (!mlx4_is_slave(dev))
2632 mlx4_free_ownership(dev); 2737 mlx4_free_ownership(dev);
2633 2738
2739 kfree(dev_cap);
2634 return err; 2740 return err;
2635} 2741}
2636 2742
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 14089d9e1667..2bf437aafc53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -126,8 +126,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
126 profile[MLX4_RES_AUXC].num = request->num_qp; 126 profile[MLX4_RES_AUXC].num = request->num_qp;
127 profile[MLX4_RES_SRQ].num = request->num_srq; 127 profile[MLX4_RES_SRQ].num = request->num_srq;
128 profile[MLX4_RES_CQ].num = request->num_cq; 128 profile[MLX4_RES_CQ].num = request->num_cq;
129 profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? 129 profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs :
130 dev->phys_caps.num_phys_eqs :
131 min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); 130 min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
132 profile[MLX4_RES_DMPT].num = request->num_mpt; 131 profile[MLX4_RES_DMPT].num = request->num_mpt;
133 profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; 132 profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
@@ -216,10 +215,18 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
216 init_hca->log_num_cqs = profile[i].log_num; 215 init_hca->log_num_cqs = profile[i].log_num;
217 break; 216 break;
218 case MLX4_RES_EQ: 217 case MLX4_RES_EQ:
219 dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs, 218 if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
220 MAX_MSIX)); 219 init_hca->log_num_eqs = 0x1f;
221 init_hca->eqc_base = profile[i].start; 220 init_hca->eqc_base = profile[i].start;
222 init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); 221 init_hca->num_sys_eqs = dev_cap->num_sys_eqs;
222 } else {
223 dev->caps.num_eqs = roundup_pow_of_two(
224 min_t(unsigned,
225 dev_cap->max_eqs,
226 MAX_MSIX));
227 init_hca->eqc_base = profile[i].start;
228 init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
229 }
223 break; 230 break;
224 case MLX4_RES_DMPT: 231 case MLX4_RES_DMPT:
225 dev->caps.num_mpts = profile[i].num; 232 dev->caps.num_mpts = profile[i].num;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3d9bff00f24a..1c560eb870ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,7 +189,8 @@ enum {
189 MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, 189 MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13,
190 MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, 190 MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14,
191 MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, 191 MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15,
192 MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16 192 MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16,
193 MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17
193}; 194};
194 195
195enum { 196enum {
@@ -443,6 +444,7 @@ struct mlx4_caps {
443 int num_cqs; 444 int num_cqs;
444 int max_cqes; 445 int max_cqes;
445 int reserved_cqs; 446 int reserved_cqs;
447 int num_sys_eqs;
446 int num_eqs; 448 int num_eqs;
447 int reserved_eqs; 449 int reserved_eqs;
448 int num_comp_vectors; 450 int num_comp_vectors;