aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2014-03-12 06:00:37 -0400
committerDavid S. Miller <davem@davemloft.net>2014-03-12 15:57:12 -0400
commit6ee51a4e866bbb0921180b457ed16cd172859346 (patch)
treec207c933969c0a492cc9806dec1e0c77dc0f4cf0
parent36f6fdb74997c3f456f1c6a9803c009c49a30053 (diff)
mlx4: Adjust QP1 multiplexing for RoCE/SRIOV
This requires the following modifications: 1. Fix build_mlx4_header to properly fill in the ETH fields 2. Adjust mux and demux QP1 flow to support RoCE. This commit still assumes only one GID per slave for RoCE. The commit enabling multiple GIDs is a subsequent commit, and is done separately because of its complexity. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c50
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c34
-rw-r--r--include/linux/mlx4/device.h4
6 files changed, 110 insertions, 20 deletions
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d1f5f1dd77b0..b8d911543783 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -315,7 +315,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
315} 315}
316 316
317int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, 317int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
318 struct ib_mad *mad) 318 struct ib_mad *mad)
319{ 319{
320 u32 pv_cm_id; 320 u32 pv_cm_id;
321 struct id_map_entry *id; 321 struct id_map_entry *id;
@@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
323 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { 323 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
324 union ib_gid gid; 324 union ib_gid gid;
325 325
326 if (!slave)
327 return 0;
328
326 gid = gid_from_req_msg(ibdev, mad); 329 gid = gid_from_req_msg(ibdev, mad);
327 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); 330 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
328 if (*slave < 0) { 331 if (*slave < 0) {
@@ -341,7 +344,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
341 return -ENOENT; 344 return -ENOENT;
342 } 345 }
343 346
344 *slave = id->slave_id; 347 if (slave)
348 *slave = id->slave_id;
345 set_remote_comm_id(mad, id->sl_cm_id); 349 set_remote_comm_id(mad, id->sl_cm_id);
346 350
347 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) 351 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f2a3f48107e7..c2e9879a5a34 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
467 int ret = 0; 467 int ret = 0;
468 u16 tun_pkey_ix; 468 u16 tun_pkey_ix;
469 u16 cached_pkey; 469 u16 cached_pkey;
470 u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
470 471
471 if (dest_qpt > IB_QPT_GSI) 472 if (dest_qpt > IB_QPT_GSI)
472 return -EINVAL; 473 return -EINVAL;
@@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
509 * The driver will set the force loopback bit in post_send */ 510 * The driver will set the force loopback bit in post_send */
510 memset(&attr, 0, sizeof attr); 511 memset(&attr, 0, sizeof attr);
511 attr.port_num = port; 512 attr.port_num = port;
513 if (is_eth) {
514 ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw);
515 if (ret)
516 return ret;
517 attr.ah_flags = IB_AH_GRH;
518 }
512 ah = ib_create_ah(tun_ctx->pd, &attr); 519 ah = ib_create_ah(tun_ctx->pd, &attr);
513 if (IS_ERR(ah)) 520 if (IS_ERR(ah))
514 return -ENOMEM; 521 return -ENOMEM;
@@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
580 int err; 587 int err;
581 int slave; 588 int slave;
582 u8 *slave_id; 589 u8 *slave_id;
590 int is_eth = 0;
591
592 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
593 is_eth = 0;
594 else
595 is_eth = 1;
596
597 if (is_eth) {
598 if (!(wc->wc_flags & IB_WC_GRH)) {
599 mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
600 return -EINVAL;
601 }
602 if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
603 mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
604 return -EINVAL;
605 }
606 if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
607 mlx4_ib_warn(ibdev, "failed matching grh\n");
608 return -ENOENT;
609 }
610 if (slave >= dev->dev->caps.sqp_demux) {
611 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
612 slave, dev->dev->caps.sqp_demux);
613 return -ENOENT;
614 }
615
616 if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
617 return 0;
618
619 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
620 if (err)
621 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
622 slave, err);
623 return 0;
624 }
583 625
584 /* Initially assume that this mad is for us */ 626 /* Initially assume that this mad is for us */
585 slave = mlx4_master_func_num(dev->dev); 627 slave = mlx4_master_func_num(dev->dev);
@@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
1260 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); 1302 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1261 ah.ibah.device = ctx->ib_dev; 1303 ah.ibah.device = ctx->ib_dev;
1262 mlx4_ib_query_ah(&ah.ibah, &ah_attr); 1304 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1263 if ((ah_attr.ah_flags & IB_AH_GRH) && 1305 if (ah_attr.ah_flags & IB_AH_GRH)
1264 (ah_attr.grh.sgid_index != slave)) { 1306 ah_attr.grh.sgid_index = slave;
1265 mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1266 slave, ah_attr.grh.sgid_index);
1267 return;
1268 }
1269 1307
1270 mlx4_ib_send_to_wire(dev, slave, ctx->port, 1308 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1271 is_proxy_qp0(dev, wc->src_qp, slave) ? 1309 is_proxy_qp0(dev, wc->src_qp, slave) ?
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index d8f4d1fe8494..c6ef2e7e3045 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1842{ 1842{
1843 struct ib_device *ib_dev = sqp->qp.ibqp.device; 1843 struct ib_device *ib_dev = sqp->qp.ibqp.device;
1844 struct mlx4_wqe_mlx_seg *mlx = wqe; 1844 struct mlx4_wqe_mlx_seg *mlx = wqe;
1845 struct mlx4_wqe_ctrl_seg *ctrl = wqe;
1845 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 1846 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1846 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 1847 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1847 struct net_device *ndev;
1848 union ib_gid sgid; 1848 union ib_gid sgid;
1849 u16 pkey; 1849 u16 pkey;
1850 int send_size; 1850 int send_size;
@@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1868 /* When multi-function is enabled, the ib_core gid 1868 /* When multi-function is enabled, the ib_core gid
1869 * indexes don't necessarily match the hw ones, so 1869 * indexes don't necessarily match the hw ones, so
1870 * we must use our own cache */ 1870 * we must use our own cache */
1871 sgid.global.subnet_prefix = 1871 err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
1872 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 1872 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1873 subnet_prefix; 1873 ah->av.ib.gid_index, &sgid.raw[0]);
1874 sgid.global.interface_id = 1874 if (err)
1875 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 1875 return err;
1876 guid_cache[ah->av.ib.gid_index];
1877 } else { 1876 } else {
1878 err = ib_get_cached_gid(ib_dev, 1877 err = ib_get_cached_gid(ib_dev,
1879 be32_to_cpu(ah->av.ib.port_pd) >> 24, 1878 be32_to_cpu(ah->av.ib.port_pd) >> 24,
@@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1902 sqp->ud_header.grh.flow_label = 1901 sqp->ud_header.grh.flow_label =
1903 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1902 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1904 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 1903 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1904 if (is_eth)
1905 memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
1906 else {
1905 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 1907 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1906 /* When multi-function is enabled, the ib_core gid 1908 /* When multi-function is enabled, the ib_core gid
1907 * indexes don't necessarily match the hw ones, so 1909 * indexes don't necessarily match the hw ones, so
@@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1917 be32_to_cpu(ah->av.ib.port_pd) >> 24, 1919 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1918 ah->av.ib.gid_index, 1920 ah->av.ib.gid_index,
1919 &sqp->ud_header.grh.source_gid); 1921 &sqp->ud_header.grh.source_gid);
1922 }
1920 memcpy(sqp->ud_header.grh.destination_gid.raw, 1923 memcpy(sqp->ud_header.grh.destination_gid.raw,
1921 ah->av.ib.dgid, 16); 1924 ah->av.ib.dgid, 16);
1922 } 1925 }
@@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1948 } 1951 }
1949 1952
1950 if (is_eth) { 1953 if (is_eth) {
1951 u8 *smac; 1954 u8 smac[6];
1955 struct in6_addr in6;
1956
1952 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; 1957 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
1953 1958
1954 mlx->sched_prio = cpu_to_be16(pcp); 1959 mlx->sched_prio = cpu_to_be16(pcp);
1955 1960
1956 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); 1961 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1957 /* FIXME: cache smac value? */ 1962 /* FIXME: cache smac value? */
1958 ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; 1963 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
1959 if (!ndev) 1964 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
1960 return -ENODEV; 1965 memcpy(&in6, sgid.raw, sizeof(in6));
1961 smac = ndev->dev_addr; 1966 rdma_get_ll_mac(&in6, smac);
1962 memcpy(sqp->ud_header.eth.smac_h, smac, 6); 1967 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1963 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) 1968 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1964 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); 1969 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 7aec6c833973..da829f4ef938 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -788,6 +788,10 @@ enum {
788 MLX4_USE_RR = 1, 788 MLX4_USE_RR = 1,
789}; 789};
790 790
791struct mlx4_roce_gid_entry {
792 u8 raw[16];
793};
794
791struct mlx4_priv { 795struct mlx4_priv {
792 struct mlx4_dev dev; 796 struct mlx4_dev dev;
793 797
@@ -834,6 +838,7 @@ struct mlx4_priv {
834 int fs_hash_mode; 838 int fs_hash_mode;
835 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; 839 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
836 __be64 slave_node_guids[MLX4_MFUNC_MAX]; 840 __be64 slave_node_guids[MLX4_MFUNC_MAX];
841 struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][MLX4_ROCE_MAX_GIDS];
837 842
838 atomic_t opreq_count; 843 atomic_t opreq_count;
839 struct work_struct opreq_task; 844 struct work_struct opreq_task;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index a58bcbf1b806..9c063d6122b3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -927,3 +927,37 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
927 *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; 927 *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
928} 928}
929EXPORT_SYMBOL(mlx4_set_stats_bitmap); 929EXPORT_SYMBOL(mlx4_set_stats_bitmap);
930
931int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id)
932{
933 struct mlx4_priv *priv = mlx4_priv(dev);
934 int i, found_ix = -1;
935
936 if (!mlx4_is_mfunc(dev))
937 return -EINVAL;
938
939 for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
940 if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
941 found_ix = i;
942 break;
943 }
944 }
945
946 if (found_ix >= 0)
947 *slave_id = found_ix;
948
949 return (found_ix >= 0) ? 0 : -EINVAL;
950}
951EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
952
953int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid)
954{
955 struct mlx4_priv *priv = mlx4_priv(dev);
956
957 if (!mlx4_is_master(dev))
958 return -EINVAL;
959
960 memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
961 return 0;
962}
963EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5edd2c68274d..fbe6cda00ba7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -48,6 +48,8 @@
48#define MSIX_LEGACY_SZ 4 48#define MSIX_LEGACY_SZ 4
49#define MIN_MSIX_P_PORT 5 49#define MIN_MSIX_P_PORT 5
50 50
51#define MLX4_ROCE_MAX_GIDS 128
52
51enum { 53enum {
52 MLX4_FLAG_MSI_X = 1 << 0, 54 MLX4_FLAG_MSI_X = 1 << 0,
53 MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, 55 MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
@@ -1182,6 +1184,8 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int
1182 1184
1183void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); 1185void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
1184__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); 1186__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
1187int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id);
1188int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid);
1185 1189
1186int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, 1190int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
1187 u32 max_range_qpn); 1191 u32 max_range_qpn);