aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/qp.c
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.co.il>2010-10-25 00:08:52 -0400
committerRoland Dreier <rolandd@cisco.com>2010-10-25 13:20:39 -0400
commitfa417f7b520ee60b39f7e23528d2030af30a07d1 (patch)
tree8eca6ed53c985616e990b57b99a21714ed011534 /drivers/infiniband/hw/mlx4/qp.c
parent7ac870ed7d2316587ec06747c28cb9f44f67997d (diff)
IB/mlx4: Add support for IBoE
Add support for IBoE to mlx4_ib. The bulk of the code is handling the new address vector fields; mlx4 needs the MAC address of a remote node to include it in a WQE (for datagrams) or in the QP context (for connected QPs). Address resolution is done by assuming all unicast GIDs are either link-local IPv6 addresses. Multicast group attach/detach needs to update the NIC's multicast filters; but since attaching a QP to a multicast group can be done before the QP is bound to a port, for IBoE we need to keep track of all multicast groups that a QP is attached too before it transitions from INIT to RTR (since it does not have a port in the INIT state). Signed-off-by: Eli Cohen <eli@mellanox.co.il> [ Many things cleaned up and otherwise monkeyed with; hope I didn't introduce too many bugs. - Roland ] Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c130
1 files changed, 105 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bb1277c8fbf0..17f60fe6e5b6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -33,6 +33,7 @@
33 33
34#include <linux/log2.h> 34#include <linux/log2.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/netdevice.h>
36 37
37#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
38#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
@@ -48,17 +49,25 @@ enum {
48 49
49enum { 50enum {
50 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, 51 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
51 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f 52 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
53 MLX4_IB_LINK_TYPE_IB = 0,
54 MLX4_IB_LINK_TYPE_ETH = 1
52}; 55};
53 56
54enum { 57enum {
55 /* 58 /*
56 * Largest possible UD header: send with GRH and immediate data. 59 * Largest possible UD header: send with GRH and immediate
60 * data plus 14 bytes for an Ethernet header. (LRH would only
61 * use 8 bytes, so Ethernet is the biggest case)
57 */ 62 */
58 MLX4_IB_UD_HEADER_SIZE = 72, 63 MLX4_IB_UD_HEADER_SIZE = 78,
59 MLX4_IB_LSO_HEADER_SPARE = 128, 64 MLX4_IB_LSO_HEADER_SPARE = 128,
60}; 65};
61 66
67enum {
68 MLX4_IB_IBOE_ETHERTYPE = 0x8915
69};
70
62struct mlx4_ib_sqp { 71struct mlx4_ib_sqp {
63 struct mlx4_ib_qp qp; 72 struct mlx4_ib_qp qp;
64 int pkey_index; 73 int pkey_index;
@@ -462,6 +471,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
462 mutex_init(&qp->mutex); 471 mutex_init(&qp->mutex);
463 spin_lock_init(&qp->sq.lock); 472 spin_lock_init(&qp->sq.lock);
464 spin_lock_init(&qp->rq.lock); 473 spin_lock_init(&qp->rq.lock);
474 INIT_LIST_HEAD(&qp->gid_list);
465 475
466 qp->state = IB_QPS_RESET; 476 qp->state = IB_QPS_RESET;
467 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 477 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -649,6 +659,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
649 } 659 }
650} 660}
651 661
662static void del_gid_entries(struct mlx4_ib_qp *qp)
663{
664 struct mlx4_ib_gid_entry *ge, *tmp;
665
666 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
667 list_del(&ge->list);
668 kfree(ge);
669 }
670}
671
652static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 672static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
653 int is_user) 673 int is_user)
654{ 674{
@@ -695,6 +715,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
695 if (!qp->ibqp.srq) 715 if (!qp->ibqp.srq)
696 mlx4_db_free(dev->dev, &qp->db); 716 mlx4_db_free(dev->dev, &qp->db);
697 } 717 }
718
719 del_gid_entries(qp);
698} 720}
699 721
700struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 722struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
@@ -852,6 +874,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
852static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 874static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
853 struct mlx4_qp_path *path, u8 port) 875 struct mlx4_qp_path *path, u8 port)
854{ 876{
877 int err;
878 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
879 IB_LINK_LAYER_ETHERNET;
880 u8 mac[6];
881 int is_mcast;
882
855 path->grh_mylmc = ah->src_path_bits & 0x7f; 883 path->grh_mylmc = ah->src_path_bits & 0x7f;
856 path->rlid = cpu_to_be16(ah->dlid); 884 path->rlid = cpu_to_be16(ah->dlid);
857 if (ah->static_rate) { 885 if (ah->static_rate) {
@@ -882,9 +910,35 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
882 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 910 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
883 ((port - 1) << 6) | ((ah->sl & 0xf) << 2); 911 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
884 912
913 if (is_eth) {
914 if (!(ah->ah_flags & IB_AH_GRH))
915 return -1;
916
917 err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
918 if (err)
919 return err;
920
921 memcpy(path->dmac, mac, 6);
922 path->ackto = MLX4_IB_LINK_TYPE_ETH;
923 /* use index 0 into MAC table for IBoE */
924 path->grh_mylmc &= 0x80;
925 }
926
885 return 0; 927 return 0;
886} 928}
887 929
930static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
931{
932 struct mlx4_ib_gid_entry *ge, *tmp;
933
934 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
935 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
936 ge->added = 1;
937 ge->port = qp->port;
938 }
939 }
940}
941
888static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 942static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
889 const struct ib_qp_attr *attr, int attr_mask, 943 const struct ib_qp_attr *attr, int attr_mask,
890 enum ib_qp_state cur_state, enum ib_qp_state new_state) 944 enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -980,7 +1034,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
980 } 1034 }
981 1035
982 if (attr_mask & IB_QP_TIMEOUT) { 1036 if (attr_mask & IB_QP_TIMEOUT) {
983 context->pri_path.ackto = attr->timeout << 3; 1037 context->pri_path.ackto |= attr->timeout << 3;
984 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; 1038 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
985 } 1039 }
986 1040
@@ -1118,8 +1172,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1118 qp->atomic_rd_en = attr->qp_access_flags; 1172 qp->atomic_rd_en = attr->qp_access_flags;
1119 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 1173 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1120 qp->resp_depth = attr->max_dest_rd_atomic; 1174 qp->resp_depth = attr->max_dest_rd_atomic;
1121 if (attr_mask & IB_QP_PORT) 1175 if (attr_mask & IB_QP_PORT) {
1122 qp->port = attr->port_num; 1176 qp->port = attr->port_num;
1177 update_mcg_macs(dev, qp);
1178 }
1123 if (attr_mask & IB_QP_ALT_PATH) 1179 if (attr_mask & IB_QP_ALT_PATH)
1124 qp->alt_port = attr->alt_port_num; 1180 qp->alt_port = attr->alt_port_num;
1125 1181
@@ -1226,35 +1282,45 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1226 int header_size; 1282 int header_size;
1227 int spc; 1283 int spc;
1228 int i; 1284 int i;
1285 int is_eth;
1286 int is_grh;
1229 1287
1230 send_size = 0; 1288 send_size = 0;
1231 for (i = 0; i < wr->num_sge; ++i) 1289 for (i = 0; i < wr->num_sge; ++i)
1232 send_size += wr->sg_list[i].length; 1290 send_size += wr->sg_list[i].length;
1233 1291
1234 ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); 1292 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1293 is_grh = mlx4_ib_ah_grh_present(ah);
1294 ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header);
1295
1296 if (!is_eth) {
1297 sqp->ud_header.lrh.service_level =
1298 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1299 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
1300 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1301 }
1235 1302
1236 sqp->ud_header.lrh.service_level = 1303 if (is_grh) {
1237 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
1238 sqp->ud_header.lrh.destination_lid = ah->av.dlid;
1239 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
1240 if (mlx4_ib_ah_grh_present(ah)) {
1241 sqp->ud_header.grh.traffic_class = 1304 sqp->ud_header.grh.traffic_class =
1242 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1305 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
1243 sqp->ud_header.grh.flow_label = 1306 sqp->ud_header.grh.flow_label =
1244 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1307 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1245 sqp->ud_header.grh.hop_limit = ah->av.hop_limit; 1308 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1246 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1309 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
1247 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1310 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
1248 memcpy(sqp->ud_header.grh.destination_gid.raw, 1311 memcpy(sqp->ud_header.grh.destination_gid.raw,
1249 ah->av.dgid, 16); 1312 ah->av.ib.dgid, 16);
1250 } 1313 }
1251 1314
1252 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 1315 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1253 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | 1316
1254 (sqp->ud_header.lrh.destination_lid == 1317 if (!is_eth) {
1255 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1318 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
1256 (sqp->ud_header.lrh.service_level << 8)); 1319 (sqp->ud_header.lrh.destination_lid ==
1257 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1320 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1321 (sqp->ud_header.lrh.service_level << 8));
1322 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1323 }
1258 1324
1259 switch (wr->opcode) { 1325 switch (wr->opcode) {
1260 case IB_WR_SEND: 1326 case IB_WR_SEND:
@@ -1270,9 +1336,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1270 return -EINVAL; 1336 return -EINVAL;
1271 } 1337 }
1272 1338
1273 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 1339 if (is_eth) {
1274 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 1340 u8 *smac;
1275 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1341
1342 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1343 /* FIXME: cache smac value? */
1344 smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr;
1345 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1346 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1347 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
1348 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1349 } else {
1350 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1351 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1352 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1353 }
1276 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1354 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1277 if (!sqp->qp.ibqp.qp_num) 1355 if (!sqp->qp.ibqp.qp_num)
1278 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 1356 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
@@ -1434,6 +1512,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1434 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 1512 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1435 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1513 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1436 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1514 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1515 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
1516 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
1437} 1517}
1438 1518
1439static void set_mlx_icrc_seg(void *dseg) 1519static void set_mlx_icrc_seg(void *dseg)