diff options
author | Eli Cohen <eli@mellanox.co.il> | 2010-10-25 00:08:52 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-10-25 13:20:39 -0400 |
commit | fa417f7b520ee60b39f7e23528d2030af30a07d1 (patch) | |
tree | 8eca6ed53c985616e990b57b99a21714ed011534 /drivers/infiniband/hw/mlx4/qp.c | |
parent | 7ac870ed7d2316587ec06747c28cb9f44f67997d (diff) |
IB/mlx4: Add support for IBoE
Add support for IBoE to mlx4_ib. The bulk of the code is handling the
new address vector fields; mlx4 needs the MAC address of a remote node
to include it in a WQE (for datagrams) or in the QP context (for
connected QPs). Address resolution is done by assuming all unicast
GIDs are either link-local IPv6 addresses.
Multicast group attach/detach needs to update the NIC's multicast
filters; but since attaching a QP to a multicast group can be done
before the QP is bound to a port, for IBoE we need to keep track of
all multicast groups that a QP is attached too before it transitions
from INIT to RTR (since it does not have a port in the INIT state).
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
[ Many things cleaned up and otherwise monkeyed with; hope I didn't
introduce too many bugs. - Roland ]
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 130 |
1 files changed, 105 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bb1277c8fbf0..17f60fe6e5b6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #include <linux/log2.h> | 34 | #include <linux/log2.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/netdevice.h> | ||
36 | 37 | ||
37 | #include <rdma/ib_cache.h> | 38 | #include <rdma/ib_cache.h> |
38 | #include <rdma/ib_pack.h> | 39 | #include <rdma/ib_pack.h> |
@@ -48,17 +49,25 @@ enum { | |||
48 | 49 | ||
49 | enum { | 50 | enum { |
50 | MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, | 51 | MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, |
51 | MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f | 52 | MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, |
53 | MLX4_IB_LINK_TYPE_IB = 0, | ||
54 | MLX4_IB_LINK_TYPE_ETH = 1 | ||
52 | }; | 55 | }; |
53 | 56 | ||
54 | enum { | 57 | enum { |
55 | /* | 58 | /* |
56 | * Largest possible UD header: send with GRH and immediate data. | 59 | * Largest possible UD header: send with GRH and immediate |
60 | * data plus 14 bytes for an Ethernet header. (LRH would only | ||
61 | * use 8 bytes, so Ethernet is the biggest case) | ||
57 | */ | 62 | */ |
58 | MLX4_IB_UD_HEADER_SIZE = 72, | 63 | MLX4_IB_UD_HEADER_SIZE = 78, |
59 | MLX4_IB_LSO_HEADER_SPARE = 128, | 64 | MLX4_IB_LSO_HEADER_SPARE = 128, |
60 | }; | 65 | }; |
61 | 66 | ||
67 | enum { | ||
68 | MLX4_IB_IBOE_ETHERTYPE = 0x8915 | ||
69 | }; | ||
70 | |||
62 | struct mlx4_ib_sqp { | 71 | struct mlx4_ib_sqp { |
63 | struct mlx4_ib_qp qp; | 72 | struct mlx4_ib_qp qp; |
64 | int pkey_index; | 73 | int pkey_index; |
@@ -462,6 +471,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, | |||
462 | mutex_init(&qp->mutex); | 471 | mutex_init(&qp->mutex); |
463 | spin_lock_init(&qp->sq.lock); | 472 | spin_lock_init(&qp->sq.lock); |
464 | spin_lock_init(&qp->rq.lock); | 473 | spin_lock_init(&qp->rq.lock); |
474 | INIT_LIST_HEAD(&qp->gid_list); | ||
465 | 475 | ||
466 | qp->state = IB_QPS_RESET; | 476 | qp->state = IB_QPS_RESET; |
467 | if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) | 477 | if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) |
@@ -649,6 +659,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re | |||
649 | } | 659 | } |
650 | } | 660 | } |
651 | 661 | ||
662 | static void del_gid_entries(struct mlx4_ib_qp *qp) | ||
663 | { | ||
664 | struct mlx4_ib_gid_entry *ge, *tmp; | ||
665 | |||
666 | list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { | ||
667 | list_del(&ge->list); | ||
668 | kfree(ge); | ||
669 | } | ||
670 | } | ||
671 | |||
652 | static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, | 672 | static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, |
653 | int is_user) | 673 | int is_user) |
654 | { | 674 | { |
@@ -695,6 +715,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, | |||
695 | if (!qp->ibqp.srq) | 715 | if (!qp->ibqp.srq) |
696 | mlx4_db_free(dev->dev, &qp->db); | 716 | mlx4_db_free(dev->dev, &qp->db); |
697 | } | 717 | } |
718 | |||
719 | del_gid_entries(qp); | ||
698 | } | 720 | } |
699 | 721 | ||
700 | struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, | 722 | struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, |
@@ -852,6 +874,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) | |||
852 | static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, | 874 | static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, |
853 | struct mlx4_qp_path *path, u8 port) | 875 | struct mlx4_qp_path *path, u8 port) |
854 | { | 876 | { |
877 | int err; | ||
878 | int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == | ||
879 | IB_LINK_LAYER_ETHERNET; | ||
880 | u8 mac[6]; | ||
881 | int is_mcast; | ||
882 | |||
855 | path->grh_mylmc = ah->src_path_bits & 0x7f; | 883 | path->grh_mylmc = ah->src_path_bits & 0x7f; |
856 | path->rlid = cpu_to_be16(ah->dlid); | 884 | path->rlid = cpu_to_be16(ah->dlid); |
857 | if (ah->static_rate) { | 885 | if (ah->static_rate) { |
@@ -882,9 +910,35 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, | |||
882 | path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | | 910 | path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | |
883 | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); | 911 | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); |
884 | 912 | ||
913 | if (is_eth) { | ||
914 | if (!(ah->ah_flags & IB_AH_GRH)) | ||
915 | return -1; | ||
916 | |||
917 | err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); | ||
918 | if (err) | ||
919 | return err; | ||
920 | |||
921 | memcpy(path->dmac, mac, 6); | ||
922 | path->ackto = MLX4_IB_LINK_TYPE_ETH; | ||
923 | /* use index 0 into MAC table for IBoE */ | ||
924 | path->grh_mylmc &= 0x80; | ||
925 | } | ||
926 | |||
885 | return 0; | 927 | return 0; |
886 | } | 928 | } |
887 | 929 | ||
930 | static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) | ||
931 | { | ||
932 | struct mlx4_ib_gid_entry *ge, *tmp; | ||
933 | |||
934 | list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { | ||
935 | if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { | ||
936 | ge->added = 1; | ||
937 | ge->port = qp->port; | ||
938 | } | ||
939 | } | ||
940 | } | ||
941 | |||
888 | static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | 942 | static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, |
889 | const struct ib_qp_attr *attr, int attr_mask, | 943 | const struct ib_qp_attr *attr, int attr_mask, |
890 | enum ib_qp_state cur_state, enum ib_qp_state new_state) | 944 | enum ib_qp_state cur_state, enum ib_qp_state new_state) |
@@ -980,7 +1034,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | |||
980 | } | 1034 | } |
981 | 1035 | ||
982 | if (attr_mask & IB_QP_TIMEOUT) { | 1036 | if (attr_mask & IB_QP_TIMEOUT) { |
983 | context->pri_path.ackto = attr->timeout << 3; | 1037 | context->pri_path.ackto |= attr->timeout << 3; |
984 | optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; | 1038 | optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; |
985 | } | 1039 | } |
986 | 1040 | ||
@@ -1118,8 +1172,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | |||
1118 | qp->atomic_rd_en = attr->qp_access_flags; | 1172 | qp->atomic_rd_en = attr->qp_access_flags; |
1119 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | 1173 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) |
1120 | qp->resp_depth = attr->max_dest_rd_atomic; | 1174 | qp->resp_depth = attr->max_dest_rd_atomic; |
1121 | if (attr_mask & IB_QP_PORT) | 1175 | if (attr_mask & IB_QP_PORT) { |
1122 | qp->port = attr->port_num; | 1176 | qp->port = attr->port_num; |
1177 | update_mcg_macs(dev, qp); | ||
1178 | } | ||
1123 | if (attr_mask & IB_QP_ALT_PATH) | 1179 | if (attr_mask & IB_QP_ALT_PATH) |
1124 | qp->alt_port = attr->alt_port_num; | 1180 | qp->alt_port = attr->alt_port_num; |
1125 | 1181 | ||
@@ -1226,35 +1282,45 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, | |||
1226 | int header_size; | 1282 | int header_size; |
1227 | int spc; | 1283 | int spc; |
1228 | int i; | 1284 | int i; |
1285 | int is_eth; | ||
1286 | int is_grh; | ||
1229 | 1287 | ||
1230 | send_size = 0; | 1288 | send_size = 0; |
1231 | for (i = 0; i < wr->num_sge; ++i) | 1289 | for (i = 0; i < wr->num_sge; ++i) |
1232 | send_size += wr->sg_list[i].length; | 1290 | send_size += wr->sg_list[i].length; |
1233 | 1291 | ||
1234 | ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); | 1292 | is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; |
1293 | is_grh = mlx4_ib_ah_grh_present(ah); | ||
1294 | ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header); | ||
1295 | |||
1296 | if (!is_eth) { | ||
1297 | sqp->ud_header.lrh.service_level = | ||
1298 | be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; | ||
1299 | sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; | ||
1300 | sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); | ||
1301 | } | ||
1235 | 1302 | ||
1236 | sqp->ud_header.lrh.service_level = | 1303 | if (is_grh) { |
1237 | be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; | ||
1238 | sqp->ud_header.lrh.destination_lid = ah->av.dlid; | ||
1239 | sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); | ||
1240 | if (mlx4_ib_ah_grh_present(ah)) { | ||
1241 | sqp->ud_header.grh.traffic_class = | 1304 | sqp->ud_header.grh.traffic_class = |
1242 | (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; | 1305 | (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; |
1243 | sqp->ud_header.grh.flow_label = | 1306 | sqp->ud_header.grh.flow_label = |
1244 | ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); | 1307 | ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); |
1245 | sqp->ud_header.grh.hop_limit = ah->av.hop_limit; | 1308 | sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; |
1246 | ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, | 1309 | ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, |
1247 | ah->av.gid_index, &sqp->ud_header.grh.source_gid); | 1310 | ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); |
1248 | memcpy(sqp->ud_header.grh.destination_gid.raw, | 1311 | memcpy(sqp->ud_header.grh.destination_gid.raw, |
1249 | ah->av.dgid, 16); | 1312 | ah->av.ib.dgid, 16); |
1250 | } | 1313 | } |
1251 | 1314 | ||
1252 | mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); | 1315 | mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); |
1253 | mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | | 1316 | |
1254 | (sqp->ud_header.lrh.destination_lid == | 1317 | if (!is_eth) { |
1255 | IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | | 1318 | mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | |
1256 | (sqp->ud_header.lrh.service_level << 8)); | 1319 | (sqp->ud_header.lrh.destination_lid == |
1257 | mlx->rlid = sqp->ud_header.lrh.destination_lid; | 1320 | IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | |
1321 | (sqp->ud_header.lrh.service_level << 8)); | ||
1322 | mlx->rlid = sqp->ud_header.lrh.destination_lid; | ||
1323 | } | ||
1258 | 1324 | ||
1259 | switch (wr->opcode) { | 1325 | switch (wr->opcode) { |
1260 | case IB_WR_SEND: | 1326 | case IB_WR_SEND: |
@@ -1270,9 +1336,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, | |||
1270 | return -EINVAL; | 1336 | return -EINVAL; |
1271 | } | 1337 | } |
1272 | 1338 | ||
1273 | sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; | 1339 | if (is_eth) { |
1274 | if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) | 1340 | u8 *smac; |
1275 | sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; | 1341 | |
1342 | memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); | ||
1343 | /* FIXME: cache smac value? */ | ||
1344 | smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; | ||
1345 | memcpy(sqp->ud_header.eth.smac_h, smac, 6); | ||
1346 | if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) | ||
1347 | mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); | ||
1348 | sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); | ||
1349 | } else { | ||
1350 | sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; | ||
1351 | if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) | ||
1352 | sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; | ||
1353 | } | ||
1276 | sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); | 1354 | sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); |
1277 | if (!sqp->qp.ibqp.qp_num) | 1355 | if (!sqp->qp.ibqp.qp_num) |
1278 | ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); | 1356 | ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); |
@@ -1434,6 +1512,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, | |||
1434 | memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); | 1512 | memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); |
1435 | dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); | 1513 | dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); |
1436 | dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); | 1514 | dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); |
1515 | dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; | ||
1516 | memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); | ||
1437 | } | 1517 | } |
1438 | 1518 | ||
1439 | static void set_mlx_icrc_seg(void *dseg) | 1519 | static void set_mlx_icrc_seg(void *dseg) |