aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.co.il>2010-10-25 00:08:52 -0400
committerRoland Dreier <rolandd@cisco.com>2010-10-25 13:20:39 -0400
commitfa417f7b520ee60b39f7e23528d2030af30a07d1 (patch)
tree8eca6ed53c985616e990b57b99a21714ed011534
parent7ac870ed7d2316587ec06747c28cb9f44f67997d (diff)
IB/mlx4: Add support for IBoE
Add support for IBoE to mlx4_ib. The bulk of the code is handling the new address vector fields; mlx4 needs the MAC address of a remote node to include it in a WQE (for datagrams) or in the QP context (for connected QPs). Address resolution is done by assuming all unicast GIDs are either link-local IPv6 addresses. Multicast group attach/detach needs to update the NIC's multicast filters; but since attaching a QP to a multicast group can be done before the QP is bound to a port, for IBoE we need to keep track of all multicast groups that a QP is attached too before it transitions from INIT to RTR (since it does not have a port in the INIT state). Signed-off-by: Eli Cohen <eli@mellanox.co.il> [ Many things cleaned up and otherwise monkeyed with; hope I didn't introduce too many bugs. - Roland ] Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c153
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c32
-rw-r--r--drivers/infiniband/hw/mlx4/main.c448
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h32
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c130
-rw-r--r--include/linux/mlx4/device.h27
6 files changed, 714 insertions, 108 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 11a236f8d88..3bf3544c0aa 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -30,66 +30,153 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#include <rdma/ib_addr.h>
34
33#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/inet.h>
37#include <linux/string.h>
34 38
35#include "mlx4_ib.h" 39#include "mlx4_ib.h"
36 40
37struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 41int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
42 u8 *mac, int *is_mcast, u8 port)
38{ 43{
39 struct mlx4_dev *dev = to_mdev(pd->device)->dev; 44 struct in6_addr in6;
40 struct mlx4_ib_ah *ah;
41 45
42 ah = kmalloc(sizeof *ah, GFP_ATOMIC); 46 *is_mcast = 0;
43 if (!ah)
44 return ERR_PTR(-ENOMEM);
45 47
46 memset(&ah->av, 0, sizeof ah->av); 48 memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
49 if (rdma_link_local_addr(&in6))
50 rdma_get_ll_mac(&in6, mac);
51 else if (rdma_is_multicast_addr(&in6)) {
52 rdma_get_mcast_mac(&in6, mac);
53 *is_mcast = 1;
54 } else
55 return -EINVAL;
47 56
48 ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); 57 return 0;
49 ah->av.g_slid = ah_attr->src_path_bits; 58}
50 ah->av.dlid = cpu_to_be16(ah_attr->dlid); 59
51 if (ah_attr->static_rate) { 60static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
52 ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; 61 struct mlx4_ib_ah *ah)
53 while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && 62{
54 !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) 63 struct mlx4_dev *dev = to_mdev(pd->device)->dev;
55 --ah->av.stat_rate; 64
56 } 65 ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
57 ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); 66 ah->av.ib.g_slid = ah_attr->src_path_bits;
58 if (ah_attr->ah_flags & IB_AH_GRH) { 67 if (ah_attr->ah_flags & IB_AH_GRH) {
59 ah->av.g_slid |= 0x80; 68 ah->av.ib.g_slid |= 0x80;
60 ah->av.gid_index = ah_attr->grh.sgid_index; 69 ah->av.ib.gid_index = ah_attr->grh.sgid_index;
61 ah->av.hop_limit = ah_attr->grh.hop_limit; 70 ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
62 ah->av.sl_tclass_flowlabel |= 71 ah->av.ib.sl_tclass_flowlabel |=
63 cpu_to_be32((ah_attr->grh.traffic_class << 20) | 72 cpu_to_be32((ah_attr->grh.traffic_class << 20) |
64 ah_attr->grh.flow_label); 73 ah_attr->grh.flow_label);
65 memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); 74 memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
75 }
76
77 ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
78 if (ah_attr->static_rate) {
79 ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
80 while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
81 !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
82 --ah->av.ib.stat_rate;
66 } 83 }
84 ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
67 85
68 return &ah->ibah; 86 return &ah->ibah;
69} 87}
70 88
89static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
90 struct mlx4_ib_ah *ah)
91{
92 struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
93 struct mlx4_dev *dev = ibdev->dev;
94 u8 mac[6];
95 int err;
96 int is_mcast;
97
98 err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
99 if (err)
100 return ERR_PTR(err);
101
102 memcpy(ah->av.eth.mac, mac, 6);
103 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
104 ah->av.eth.gid_index = ah_attr->grh.sgid_index;
105 if (ah_attr->static_rate) {
106 ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
107 while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
108 !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
109 --ah->av.eth.stat_rate;
110 }
111
112 /*
113 * HW requires multicast LID so we just choose one.
114 */
115 if (is_mcast)
116 ah->av.ib.dlid = cpu_to_be16(0xc000);
117
118 memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
119 ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
120
121 return &ah->ibah;
122}
123
124struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
125{
126 struct mlx4_ib_ah *ah;
127 struct ib_ah *ret;
128
129 ah = kzalloc(sizeof *ah, GFP_ATOMIC);
130 if (!ah)
131 return ERR_PTR(-ENOMEM);
132
133 if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
134 if (!(ah_attr->ah_flags & IB_AH_GRH)) {
135 ret = ERR_PTR(-EINVAL);
136 } else {
137 /*
138 * TBD: need to handle the case when we get
139 * called in an atomic context and there we
140 * might sleep. We don't expect this
141 * currently since we're working with link
142 * local addresses which we can translate
143 * without going to sleep.
144 */
145 ret = create_iboe_ah(pd, ah_attr, ah);
146 }
147
148 if (IS_ERR(ret))
149 kfree(ah);
150
151 return ret;
152 } else
153 return create_ib_ah(pd, ah_attr, ah); /* never fails */
154}
155
71int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) 156int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
72{ 157{
73 struct mlx4_ib_ah *ah = to_mah(ibah); 158 struct mlx4_ib_ah *ah = to_mah(ibah);
159 enum rdma_link_layer ll;
74 160
75 memset(ah_attr, 0, sizeof *ah_attr); 161 memset(ah_attr, 0, sizeof *ah_attr);
76 ah_attr->dlid = be16_to_cpu(ah->av.dlid); 162 ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
77 ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; 163 ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
78 ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; 164 ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
79 if (ah->av.stat_rate) 165 ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
80 ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; 166 if (ah->av.ib.stat_rate)
81 ah_attr->src_path_bits = ah->av.g_slid & 0x7F; 167 ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
168 ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
82 169
83 if (mlx4_ib_ah_grh_present(ah)) { 170 if (mlx4_ib_ah_grh_present(ah)) {
84 ah_attr->ah_flags = IB_AH_GRH; 171 ah_attr->ah_flags = IB_AH_GRH;
85 172
86 ah_attr->grh.traffic_class = 173 ah_attr->grh.traffic_class =
87 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; 174 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
88 ah_attr->grh.flow_label = 175 ah_attr->grh.flow_label =
89 be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; 176 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
90 ah_attr->grh.hop_limit = ah->av.hop_limit; 177 ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
91 ah_attr->grh.sgid_index = ah->av.gid_index; 178 ah_attr->grh.sgid_index = ah->av.ib.gid_index;
92 memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); 179 memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
93 } 180 }
94 181
95 return 0; 182 return 0;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f38d5b11892..c9a8dd63b9e 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
311 struct ib_mad_agent *agent; 311 struct ib_mad_agent *agent;
312 int p, q; 312 int p, q;
313 int ret; 313 int ret;
314 enum rdma_link_layer ll;
314 315
315 for (p = 0; p < dev->num_ports; ++p) 316 for (p = 0; p < dev->num_ports; ++p) {
317 ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
316 for (q = 0; q <= 1; ++q) { 318 for (q = 0; q <= 1; ++q) {
317 agent = ib_register_mad_agent(&dev->ib_dev, p + 1, 319 if (ll == IB_LINK_LAYER_INFINIBAND) {
318 q ? IB_QPT_GSI : IB_QPT_SMI, 320 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
319 NULL, 0, send_handler, 321 q ? IB_QPT_GSI : IB_QPT_SMI,
320 NULL, NULL); 322 NULL, 0, send_handler,
321 if (IS_ERR(agent)) { 323 NULL, NULL);
322 ret = PTR_ERR(agent); 324 if (IS_ERR(agent)) {
323 goto err; 325 ret = PTR_ERR(agent);
324 } 326 goto err;
325 dev->send_agent[p][q] = agent; 327 }
328 dev->send_agent[p][q] = agent;
329 } else
330 dev->send_agent[p][q] = NULL;
326 } 331 }
332 }
327 333
328 return 0; 334 return 0;
329 335
@@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
344 for (p = 0; p < dev->num_ports; ++p) { 350 for (p = 0; p < dev->num_ports; ++p) {
345 for (q = 0; q <= 1; ++q) { 351 for (q = 0; q <= 1; ++q) {
346 agent = dev->send_agent[p][q]; 352 agent = dev->send_agent[p][q];
347 dev->send_agent[p][q] = NULL; 353 if (agent) {
348 ib_unregister_mad_agent(agent); 354 dev->send_agent[p][q] = NULL;
355 ib_unregister_mad_agent(agent);
356 }
349 } 357 }
350 358
351 if (dev->sm_ah[p]) 359 if (dev->sm_ah[p])
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4e94e360e43..e65db73fc27 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -35,9 +35,13 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/errno.h> 37#include <linux/errno.h>
38#include <linux/netdevice.h>
39#include <linux/inetdevice.h>
40#include <linux/rtnetlink.h>
38 41
39#include <rdma/ib_smi.h> 42#include <rdma/ib_smi.h>
40#include <rdma/ib_user_verbs.h> 43#include <rdma/ib_user_verbs.h>
44#include <rdma/ib_addr.h>
41 45
42#include <linux/mlx4/driver.h> 46#include <linux/mlx4/driver.h>
43#include <linux/mlx4/cmd.h> 47#include <linux/mlx4/cmd.h>
@@ -58,6 +62,15 @@ static const char mlx4_ib_version[] =
58 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 62 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
59 DRV_VERSION " (" DRV_RELDATE ")\n"; 63 DRV_VERSION " (" DRV_RELDATE ")\n";
60 64
65struct update_gid_work {
66 struct work_struct work;
67 union ib_gid gids[128];
68 struct mlx4_ib_dev *dev;
69 int port;
70};
71
72static struct workqueue_struct *wq;
73
61static void init_query_mad(struct ib_smp *mad) 74static void init_query_mad(struct ib_smp *mad)
62{ 75{
63 mad->base_version = 1; 76 mad->base_version = 1;
@@ -154,28 +167,19 @@ out:
154 return err; 167 return err;
155} 168}
156 169
157static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 170static enum rdma_link_layer
158 struct ib_port_attr *props) 171mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
159{ 172{
160 struct ib_smp *in_mad = NULL; 173 struct mlx4_dev *dev = to_mdev(device)->dev;
161 struct ib_smp *out_mad = NULL;
162 int err = -ENOMEM;
163 174
164 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 175 return dev->caps.port_mask & (1 << (port_num - 1)) ?
165 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 176 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
166 if (!in_mad || !out_mad) 177}
167 goto out;
168
169 memset(props, 0, sizeof *props);
170
171 init_query_mad(in_mad);
172 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
173 in_mad->attr_mod = cpu_to_be32(port);
174
175 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
176 if (err)
177 goto out;
178 178
179static int ib_link_query_port(struct ib_device *ibdev, u8 port,
180 struct ib_port_attr *props,
181 struct ib_smp *out_mad)
182{
179 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); 183 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
180 props->lmc = out_mad->data[34] & 0x7; 184 props->lmc = out_mad->data[34] & 0x7;
181 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); 185 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -196,6 +200,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
196 props->max_vl_num = out_mad->data[37] >> 4; 200 props->max_vl_num = out_mad->data[37] >> 4;
197 props->init_type_reply = out_mad->data[41] >> 4; 201 props->init_type_reply = out_mad->data[41] >> 4;
198 202
203 return 0;
204}
205
206static u8 state_to_phys_state(enum ib_port_state state)
207{
208 return state == IB_PORT_ACTIVE ? 5 : 3;
209}
210
211static int eth_link_query_port(struct ib_device *ibdev, u8 port,
212 struct ib_port_attr *props,
213 struct ib_smp *out_mad)
214{
215 struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
216 struct net_device *ndev;
217 enum ib_mtu tmp;
218
219 props->active_width = IB_WIDTH_4X;
220 props->active_speed = 4;
221 props->port_cap_flags = IB_PORT_CM_SUP;
222 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
223 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
224 props->pkey_tbl_len = 1;
225 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
226 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
227 props->max_mtu = IB_MTU_2048;
228 props->subnet_timeout = 0;
229 props->max_vl_num = out_mad->data[37] >> 4;
230 props->init_type_reply = 0;
231 props->state = IB_PORT_DOWN;
232 props->phys_state = state_to_phys_state(props->state);
233 props->active_mtu = IB_MTU_256;
234 spin_lock(&iboe->lock);
235 ndev = iboe->netdevs[port - 1];
236 if (!ndev)
237 goto out;
238
239 tmp = iboe_get_mtu(ndev->mtu);
240 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
241
242 props->state = netif_running(ndev) && netif_oper_up(ndev) ?
243 IB_PORT_ACTIVE : IB_PORT_DOWN;
244 props->phys_state = state_to_phys_state(props->state);
245
246out:
247 spin_unlock(&iboe->lock);
248 return 0;
249}
250
251static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
252 struct ib_port_attr *props)
253{
254 struct ib_smp *in_mad = NULL;
255 struct ib_smp *out_mad = NULL;
256 int err = -ENOMEM;
257
258 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
259 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
260 if (!in_mad || !out_mad)
261 goto out;
262
263 memset(props, 0, sizeof *props);
264
265 init_query_mad(in_mad);
266 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
267 in_mad->attr_mod = cpu_to_be32(port);
268
269 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
270 if (err)
271 goto out;
272
273 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
274 ib_link_query_port(ibdev, port, props, out_mad) :
275 eth_link_query_port(ibdev, port, props, out_mad);
276
199out: 277out:
200 kfree(in_mad); 278 kfree(in_mad);
201 kfree(out_mad); 279 kfree(out_mad);
@@ -203,8 +281,8 @@ out:
203 return err; 281 return err;
204} 282}
205 283
206static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 284static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
207 union ib_gid *gid) 285 union ib_gid *gid)
208{ 286{
209 struct ib_smp *in_mad = NULL; 287 struct ib_smp *in_mad = NULL;
210 struct ib_smp *out_mad = NULL; 288 struct ib_smp *out_mad = NULL;
@@ -241,6 +319,25 @@ out:
241 return err; 319 return err;
242} 320}
243 321
322static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
323 union ib_gid *gid)
324{
325 struct mlx4_ib_dev *dev = to_mdev(ibdev);
326
327 *gid = dev->iboe.gid_table[port - 1][index];
328
329 return 0;
330}
331
332static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
333 union ib_gid *gid)
334{
335 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
336 return __mlx4_ib_query_gid(ibdev, port, index, gid);
337 else
338 return iboe_query_gid(ibdev, port, index, gid);
339}
340
244static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 341static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
245 u16 *pkey) 342 u16 *pkey)
246{ 343{
@@ -289,6 +386,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
289{ 386{
290 struct mlx4_cmd_mailbox *mailbox; 387 struct mlx4_cmd_mailbox *mailbox;
291 int err; 388 int err;
389 u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
292 390
293 mailbox = mlx4_alloc_cmd_mailbox(dev->dev); 391 mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
294 if (IS_ERR(mailbox)) 392 if (IS_ERR(mailbox))
@@ -304,7 +402,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
304 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); 402 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
305 } 403 }
306 404
307 err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, 405 err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
308 MLX4_CMD_TIME_CLASS_B); 406 MLX4_CMD_TIME_CLASS_B);
309 407
310 mlx4_free_cmd_mailbox(dev->dev, mailbox); 408 mlx4_free_cmd_mailbox(dev->dev, mailbox);
@@ -447,18 +545,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
447 return 0; 545 return 0;
448} 546}
449 547
548static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
549{
550 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
551 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
552 struct mlx4_ib_gid_entry *ge;
553
554 ge = kzalloc(sizeof *ge, GFP_KERNEL);
555 if (!ge)
556 return -ENOMEM;
557
558 ge->gid = *gid;
559 if (mlx4_ib_add_mc(mdev, mqp, gid)) {
560 ge->port = mqp->port;
561 ge->added = 1;
562 }
563
564 mutex_lock(&mqp->mutex);
565 list_add_tail(&ge->list, &mqp->gid_list);
566 mutex_unlock(&mqp->mutex);
567
568 return 0;
569}
570
571int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
572 union ib_gid *gid)
573{
574 u8 mac[6];
575 struct net_device *ndev;
576 int ret = 0;
577
578 if (!mqp->port)
579 return 0;
580
581 spin_lock(&mdev->iboe.lock);
582 ndev = mdev->iboe.netdevs[mqp->port - 1];
583 if (ndev)
584 dev_hold(ndev);
585 spin_unlock(&mdev->iboe.lock);
586
587 if (ndev) {
588 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
589 rtnl_lock();
590 dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
591 ret = 1;
592 rtnl_unlock();
593 dev_put(ndev);
594 }
595
596 return ret;
597}
598
450static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 599static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
451{ 600{
452 return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, 601 int err;
453 &to_mqp(ibqp)->mqp, gid->raw, 602 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
454 !!(to_mqp(ibqp)->flags & 603 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
455 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); 604
605 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
606 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
607 if (err)
608 return err;
609
610 err = add_gid_entry(ibqp, gid);
611 if (err)
612 goto err_add;
613
614 return 0;
615
616err_add:
617 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw);
618 return err;
619}
620
621static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
622{
623 struct mlx4_ib_gid_entry *ge;
624 struct mlx4_ib_gid_entry *tmp;
625 struct mlx4_ib_gid_entry *ret = NULL;
626
627 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
628 if (!memcmp(raw, ge->gid.raw, 16)) {
629 ret = ge;
630 break;
631 }
632 }
633
634 return ret;
456} 635}
457 636
458static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 637static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
459{ 638{
460 return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, 639 int err;
461 &to_mqp(ibqp)->mqp, gid->raw); 640 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
641 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
642 u8 mac[6];
643 struct net_device *ndev;
644 struct mlx4_ib_gid_entry *ge;
645
646 err = mlx4_multicast_detach(mdev->dev,
647 &mqp->mqp, gid->raw);
648 if (err)
649 return err;
650
651 mutex_lock(&mqp->mutex);
652 ge = find_gid_entry(mqp, gid->raw);
653 if (ge) {
654 spin_lock(&mdev->iboe.lock);
655 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
656 if (ndev)
657 dev_hold(ndev);
658 spin_unlock(&mdev->iboe.lock);
659 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
660 if (ndev) {
661 rtnl_lock();
662 dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
663 rtnl_unlock();
664 dev_put(ndev);
665 }
666 list_del(&ge->list);
667 kfree(ge);
668 } else
669 printk(KERN_WARNING "could not find mgid entry\n");
670
671 mutex_unlock(&mqp->mutex);
672
673 return 0;
462} 674}
463 675
464static int init_node_data(struct mlx4_ib_dev *dev) 676static int init_node_data(struct mlx4_ib_dev *dev)
@@ -543,15 +755,143 @@ static struct device_attribute *mlx4_class_attributes[] = {
543 &dev_attr_board_id 755 &dev_attr_board_id
544}; 756};
545 757
758static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
759{
760 memcpy(eui, dev->dev_addr, 3);
761 memcpy(eui + 5, dev->dev_addr + 3, 3);
762 eui[3] = 0xFF;
763 eui[4] = 0xFE;
764 eui[0] ^= 2;
765}
766
767static void update_gids_task(struct work_struct *work)
768{
769 struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
770 struct mlx4_cmd_mailbox *mailbox;
771 union ib_gid *gids;
772 int err;
773 struct mlx4_dev *dev = gw->dev->dev;
774 struct ib_event event;
775
776 mailbox = mlx4_alloc_cmd_mailbox(dev);
777 if (IS_ERR(mailbox)) {
778 printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
779 return;
780 }
781
782 gids = mailbox->buf;
783 memcpy(gids, gw->gids, sizeof gw->gids);
784
785 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
786 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
787 if (err)
788 printk(KERN_WARNING "set port command failed\n");
789 else {
790 memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
791 event.device = &gw->dev->ib_dev;
792 event.element.port_num = gw->port;
793 event.event = IB_EVENT_LID_CHANGE;
794 ib_dispatch_event(&event);
795 }
796
797 mlx4_free_cmd_mailbox(dev, mailbox);
798 kfree(gw);
799}
800
801static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
802{
803 struct net_device *ndev = dev->iboe.netdevs[port - 1];
804 struct update_gid_work *work;
805
806 work = kzalloc(sizeof *work, GFP_ATOMIC);
807 if (!work)
808 return -ENOMEM;
809
810 if (!clear) {
811 mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev);
812 work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
813 }
814
815 INIT_WORK(&work->work, update_gids_task);
816 work->port = port;
817 work->dev = dev;
818 queue_work(wq, &work->work);
819
820 return 0;
821}
822
823static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
824{
825 switch (event) {
826 case NETDEV_UP:
827 update_ipv6_gids(dev, port, 0);
828 break;
829
830 case NETDEV_DOWN:
831 update_ipv6_gids(dev, port, 1);
832 dev->iboe.netdevs[port - 1] = NULL;
833 }
834}
835
836static void netdev_added(struct mlx4_ib_dev *dev, int port)
837{
838 update_ipv6_gids(dev, port, 0);
839}
840
841static void netdev_removed(struct mlx4_ib_dev *dev, int port)
842{
843 update_ipv6_gids(dev, port, 1);
844}
845
846static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
847 void *ptr)
848{
849 struct net_device *dev = ptr;
850 struct mlx4_ib_dev *ibdev;
851 struct net_device *oldnd;
852 struct mlx4_ib_iboe *iboe;
853 int port;
854
855 if (!net_eq(dev_net(dev), &init_net))
856 return NOTIFY_DONE;
857
858 ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
859 iboe = &ibdev->iboe;
860
861 spin_lock(&iboe->lock);
862 mlx4_foreach_ib_transport_port(port, ibdev->dev) {
863 oldnd = iboe->netdevs[port - 1];
864 iboe->netdevs[port - 1] =
865 mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port);
866 if (oldnd != iboe->netdevs[port - 1]) {
867 if (iboe->netdevs[port - 1])
868 netdev_added(ibdev, port);
869 else
870 netdev_removed(ibdev, port);
871 }
872 }
873
874 if (dev == iboe->netdevs[0])
875 handle_en_event(ibdev, 1, event);
876 else if (dev == iboe->netdevs[1])
877 handle_en_event(ibdev, 2, event);
878
879 spin_unlock(&iboe->lock);
880
881 return NOTIFY_DONE;
882}
883
546static void *mlx4_ib_add(struct mlx4_dev *dev) 884static void *mlx4_ib_add(struct mlx4_dev *dev)
547{ 885{
548 struct mlx4_ib_dev *ibdev; 886 struct mlx4_ib_dev *ibdev;
549 int num_ports = 0; 887 int num_ports = 0;
550 int i; 888 int i;
889 int err;
890 struct mlx4_ib_iboe *iboe;
551 891
552 printk_once(KERN_INFO "%s", mlx4_ib_version); 892 printk_once(KERN_INFO "%s", mlx4_ib_version);
553 893
554 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 894 mlx4_foreach_ib_transport_port(i, dev)
555 num_ports++; 895 num_ports++;
556 896
557 /* No point in registering a device with no ports... */ 897 /* No point in registering a device with no ports... */
@@ -564,6 +904,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
564 return NULL; 904 return NULL;
565 } 905 }
566 906
907 iboe = &ibdev->iboe;
908
567 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) 909 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
568 goto err_dealloc; 910 goto err_dealloc;
569 911
@@ -612,6 +954,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
612 954
613 ibdev->ib_dev.query_device = mlx4_ib_query_device; 955 ibdev->ib_dev.query_device = mlx4_ib_query_device;
614 ibdev->ib_dev.query_port = mlx4_ib_query_port; 956 ibdev->ib_dev.query_port = mlx4_ib_query_port;
957 ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
615 ibdev->ib_dev.query_gid = mlx4_ib_query_gid; 958 ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
616 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; 959 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
617 ibdev->ib_dev.modify_device = mlx4_ib_modify_device; 960 ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
@@ -656,6 +999,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
656 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; 999 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
657 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; 1000 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
658 1001
1002 spin_lock_init(&iboe->lock);
1003
659 if (init_node_data(ibdev)) 1004 if (init_node_data(ibdev))
660 goto err_map; 1005 goto err_map;
661 1006
@@ -668,16 +1013,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
668 if (mlx4_ib_mad_init(ibdev)) 1013 if (mlx4_ib_mad_init(ibdev))
669 goto err_reg; 1014 goto err_reg;
670 1015
1016 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1017 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1018 err = register_netdevice_notifier(&iboe->nb);
1019 if (err)
1020 goto err_reg;
1021 }
1022
671 for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { 1023 for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
672 if (device_create_file(&ibdev->ib_dev.dev, 1024 if (device_create_file(&ibdev->ib_dev.dev,
673 mlx4_class_attributes[i])) 1025 mlx4_class_attributes[i]))
674 goto err_reg; 1026 goto err_notif;
675 } 1027 }
676 1028
677 ibdev->ib_active = true; 1029 ibdev->ib_active = true;
678 1030
679 return ibdev; 1031 return ibdev;
680 1032
1033err_notif:
1034 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1035 printk(KERN_WARNING "failure unregistering notifier\n");
1036 flush_workqueue(wq);
1037
681err_reg: 1038err_reg:
682 ib_unregister_device(&ibdev->ib_dev); 1039 ib_unregister_device(&ibdev->ib_dev);
683 1040
@@ -703,11 +1060,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
703 1060
704 mlx4_ib_mad_cleanup(ibdev); 1061 mlx4_ib_mad_cleanup(ibdev);
705 ib_unregister_device(&ibdev->ib_dev); 1062 ib_unregister_device(&ibdev->ib_dev);
1063 if (ibdev->iboe.nb.notifier_call) {
1064 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1065 printk(KERN_WARNING "failure unregistering notifier\n");
1066 ibdev->iboe.nb.notifier_call = NULL;
1067 }
1068 iounmap(ibdev->uar_map);
706 1069
707 for (p = 1; p <= ibdev->num_ports; ++p) 1070 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
708 mlx4_CLOSE_PORT(dev, p); 1071 mlx4_CLOSE_PORT(dev, p);
709 1072
710 iounmap(ibdev->uar_map);
711 mlx4_uar_free(dev, &ibdev->priv_uar); 1073 mlx4_uar_free(dev, &ibdev->priv_uar);
712 mlx4_pd_free(dev, ibdev->priv_pdn); 1074 mlx4_pd_free(dev, ibdev->priv_pdn);
713 ib_dealloc_device(&ibdev->ib_dev); 1075 ib_dealloc_device(&ibdev->ib_dev);
@@ -747,19 +1109,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
747} 1109}
748 1110
749static struct mlx4_interface mlx4_ib_interface = { 1111static struct mlx4_interface mlx4_ib_interface = {
750 .add = mlx4_ib_add, 1112 .add = mlx4_ib_add,
751 .remove = mlx4_ib_remove, 1113 .remove = mlx4_ib_remove,
752 .event = mlx4_ib_event 1114 .event = mlx4_ib_event,
1115 .protocol = MLX4_PROTOCOL_IB
753}; 1116};
754 1117
755static int __init mlx4_ib_init(void) 1118static int __init mlx4_ib_init(void)
756{ 1119{
757 return mlx4_register_interface(&mlx4_ib_interface); 1120 int err;
1121
1122 wq = create_singlethread_workqueue("mlx4_ib");
1123 if (!wq)
1124 return -ENOMEM;
1125
1126 err = mlx4_register_interface(&mlx4_ib_interface);
1127 if (err) {
1128 destroy_workqueue(wq);
1129 return err;
1130 }
1131
1132 return 0;
758} 1133}
759 1134
760static void __exit mlx4_ib_cleanup(void) 1135static void __exit mlx4_ib_cleanup(void)
761{ 1136{
762 mlx4_unregister_interface(&mlx4_ib_interface); 1137 mlx4_unregister_interface(&mlx4_ib_interface);
1138 destroy_workqueue(wq);
763} 1139}
764 1140
765module_init(mlx4_ib_init); 1141module_init(mlx4_ib_init);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 3486d7675e5..2a322f21049 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags {
112 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 112 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
113}; 113};
114 114
115struct mlx4_ib_gid_entry {
116 struct list_head list;
117 union ib_gid gid;
118 int added;
119 u8 port;
120};
121
115struct mlx4_ib_qp { 122struct mlx4_ib_qp {
116 struct ib_qp ibqp; 123 struct ib_qp ibqp;
117 struct mlx4_qp mqp; 124 struct mlx4_qp mqp;
@@ -138,6 +145,8 @@ struct mlx4_ib_qp {
138 u8 resp_depth; 145 u8 resp_depth;
139 u8 sq_no_prefetch; 146 u8 sq_no_prefetch;
140 u8 state; 147 u8 state;
148 int mlx_type;
149 struct list_head gid_list;
141}; 150};
142 151
143struct mlx4_ib_srq { 152struct mlx4_ib_srq {
@@ -157,7 +166,14 @@ struct mlx4_ib_srq {
157 166
158struct mlx4_ib_ah { 167struct mlx4_ib_ah {
159 struct ib_ah ibah; 168 struct ib_ah ibah;
160 struct mlx4_av av; 169 union mlx4_ext_av av;
170};
171
172struct mlx4_ib_iboe {
173 spinlock_t lock;
174 struct net_device *netdevs[MLX4_MAX_PORTS];
175 struct notifier_block nb;
176 union ib_gid gid_table[MLX4_MAX_PORTS][128];
161}; 177};
162 178
163struct mlx4_ib_dev { 179struct mlx4_ib_dev {
@@ -176,6 +192,7 @@ struct mlx4_ib_dev {
176 192
177 struct mutex cap_mask_mutex; 193 struct mutex cap_mask_mutex;
178 bool ib_active; 194 bool ib_active;
195 struct mlx4_ib_iboe iboe;
179}; 196};
180 197
181static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) 198static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
314int mlx4_ib_unmap_fmr(struct list_head *fmr_list); 331int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
315int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); 332int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
316 333
334int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
335 u8 *mac, int *is_mcast, u8 port);
336
317static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) 337static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
318{ 338{
319 return !!(ah->av.g_slid & 0x80); 339 u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
340
341 if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
342 return 1;
343
344 return !!(ah->av.ib.g_slid & 0x80);
320} 345}
321 346
347int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
348 union ib_gid *gid);
349
322#endif /* MLX4_IB_H */ 350#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bb1277c8fbf..17f60fe6e5b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -33,6 +33,7 @@
33 33
34#include <linux/log2.h> 34#include <linux/log2.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/netdevice.h>
36 37
37#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
38#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
@@ -48,17 +49,25 @@ enum {
48 49
49enum { 50enum {
50 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, 51 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
51 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f 52 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
53 MLX4_IB_LINK_TYPE_IB = 0,
54 MLX4_IB_LINK_TYPE_ETH = 1
52}; 55};
53 56
54enum { 57enum {
55 /* 58 /*
56 * Largest possible UD header: send with GRH and immediate data. 59 * Largest possible UD header: send with GRH and immediate
60 * data plus 14 bytes for an Ethernet header. (LRH would only
61 * use 8 bytes, so Ethernet is the biggest case)
57 */ 62 */
58 MLX4_IB_UD_HEADER_SIZE = 72, 63 MLX4_IB_UD_HEADER_SIZE = 78,
59 MLX4_IB_LSO_HEADER_SPARE = 128, 64 MLX4_IB_LSO_HEADER_SPARE = 128,
60}; 65};
61 66
67enum {
68 MLX4_IB_IBOE_ETHERTYPE = 0x8915
69};
70
62struct mlx4_ib_sqp { 71struct mlx4_ib_sqp {
63 struct mlx4_ib_qp qp; 72 struct mlx4_ib_qp qp;
64 int pkey_index; 73 int pkey_index;
@@ -462,6 +471,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
462 mutex_init(&qp->mutex); 471 mutex_init(&qp->mutex);
463 spin_lock_init(&qp->sq.lock); 472 spin_lock_init(&qp->sq.lock);
464 spin_lock_init(&qp->rq.lock); 473 spin_lock_init(&qp->rq.lock);
474 INIT_LIST_HEAD(&qp->gid_list);
465 475
466 qp->state = IB_QPS_RESET; 476 qp->state = IB_QPS_RESET;
467 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 477 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -649,6 +659,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
649 } 659 }
650} 660}
651 661
662static void del_gid_entries(struct mlx4_ib_qp *qp)
663{
664 struct mlx4_ib_gid_entry *ge, *tmp;
665
666 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
667 list_del(&ge->list);
668 kfree(ge);
669 }
670}
671
652static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 672static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
653 int is_user) 673 int is_user)
654{ 674{
@@ -695,6 +715,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
695 if (!qp->ibqp.srq) 715 if (!qp->ibqp.srq)
696 mlx4_db_free(dev->dev, &qp->db); 716 mlx4_db_free(dev->dev, &qp->db);
697 } 717 }
718
719 del_gid_entries(qp);
698} 720}
699 721
700struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 722struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
@@ -852,6 +874,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
852static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 874static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
853 struct mlx4_qp_path *path, u8 port) 875 struct mlx4_qp_path *path, u8 port)
854{ 876{
877 int err;
878 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
879 IB_LINK_LAYER_ETHERNET;
880 u8 mac[6];
881 int is_mcast;
882
855 path->grh_mylmc = ah->src_path_bits & 0x7f; 883 path->grh_mylmc = ah->src_path_bits & 0x7f;
856 path->rlid = cpu_to_be16(ah->dlid); 884 path->rlid = cpu_to_be16(ah->dlid);
857 if (ah->static_rate) { 885 if (ah->static_rate) {
@@ -882,9 +910,35 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
882 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 910 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
883 ((port - 1) << 6) | ((ah->sl & 0xf) << 2); 911 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
884 912
913 if (is_eth) {
914 if (!(ah->ah_flags & IB_AH_GRH))
915 return -1;
916
917 err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
918 if (err)
919 return err;
920
921 memcpy(path->dmac, mac, 6);
922 path->ackto = MLX4_IB_LINK_TYPE_ETH;
923 /* use index 0 into MAC table for IBoE */
924 path->grh_mylmc &= 0x80;
925 }
926
885 return 0; 927 return 0;
886} 928}
887 929
930static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
931{
932 struct mlx4_ib_gid_entry *ge, *tmp;
933
934 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
935 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
936 ge->added = 1;
937 ge->port = qp->port;
938 }
939 }
940}
941
888static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 942static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
889 const struct ib_qp_attr *attr, int attr_mask, 943 const struct ib_qp_attr *attr, int attr_mask,
890 enum ib_qp_state cur_state, enum ib_qp_state new_state) 944 enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -980,7 +1034,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
980 } 1034 }
981 1035
982 if (attr_mask & IB_QP_TIMEOUT) { 1036 if (attr_mask & IB_QP_TIMEOUT) {
983 context->pri_path.ackto = attr->timeout << 3; 1037 context->pri_path.ackto |= attr->timeout << 3;
984 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; 1038 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
985 } 1039 }
986 1040
@@ -1118,8 +1172,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1118 qp->atomic_rd_en = attr->qp_access_flags; 1172 qp->atomic_rd_en = attr->qp_access_flags;
1119 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 1173 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1120 qp->resp_depth = attr->max_dest_rd_atomic; 1174 qp->resp_depth = attr->max_dest_rd_atomic;
1121 if (attr_mask & IB_QP_PORT) 1175 if (attr_mask & IB_QP_PORT) {
1122 qp->port = attr->port_num; 1176 qp->port = attr->port_num;
1177 update_mcg_macs(dev, qp);
1178 }
1123 if (attr_mask & IB_QP_ALT_PATH) 1179 if (attr_mask & IB_QP_ALT_PATH)
1124 qp->alt_port = attr->alt_port_num; 1180 qp->alt_port = attr->alt_port_num;
1125 1181
@@ -1226,35 +1282,45 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1226 int header_size; 1282 int header_size;
1227 int spc; 1283 int spc;
1228 int i; 1284 int i;
1285 int is_eth;
1286 int is_grh;
1229 1287
1230 send_size = 0; 1288 send_size = 0;
1231 for (i = 0; i < wr->num_sge; ++i) 1289 for (i = 0; i < wr->num_sge; ++i)
1232 send_size += wr->sg_list[i].length; 1290 send_size += wr->sg_list[i].length;
1233 1291
1234 ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); 1292 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1293 is_grh = mlx4_ib_ah_grh_present(ah);
1294 ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header);
1295
1296 if (!is_eth) {
1297 sqp->ud_header.lrh.service_level =
1298 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1299 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
1300 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1301 }
1235 1302
1236 sqp->ud_header.lrh.service_level = 1303 if (is_grh) {
1237 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
1238 sqp->ud_header.lrh.destination_lid = ah->av.dlid;
1239 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
1240 if (mlx4_ib_ah_grh_present(ah)) {
1241 sqp->ud_header.grh.traffic_class = 1304 sqp->ud_header.grh.traffic_class =
1242 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1305 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
1243 sqp->ud_header.grh.flow_label = 1306 sqp->ud_header.grh.flow_label =
1244 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1307 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1245 sqp->ud_header.grh.hop_limit = ah->av.hop_limit; 1308 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1246 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1309 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
1247 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1310 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
1248 memcpy(sqp->ud_header.grh.destination_gid.raw, 1311 memcpy(sqp->ud_header.grh.destination_gid.raw,
1249 ah->av.dgid, 16); 1312 ah->av.ib.dgid, 16);
1250 } 1313 }
1251 1314
1252 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 1315 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1253 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | 1316
1254 (sqp->ud_header.lrh.destination_lid == 1317 if (!is_eth) {
1255 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1318 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
1256 (sqp->ud_header.lrh.service_level << 8)); 1319 (sqp->ud_header.lrh.destination_lid ==
1257 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1320 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1321 (sqp->ud_header.lrh.service_level << 8));
1322 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1323 }
1258 1324
1259 switch (wr->opcode) { 1325 switch (wr->opcode) {
1260 case IB_WR_SEND: 1326 case IB_WR_SEND:
@@ -1270,9 +1336,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1270 return -EINVAL; 1336 return -EINVAL;
1271 } 1337 }
1272 1338
1273 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 1339 if (is_eth) {
1274 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 1340 u8 *smac;
1275 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1341
1342 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1343 /* FIXME: cache smac value? */
1344 smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr;
1345 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1346 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1347 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
1348 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1349 } else {
1350 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1351 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1352 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1353 }
1276 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1354 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1277 if (!sqp->qp.ibqp.qp_num) 1355 if (!sqp->qp.ibqp.qp_num)
1278 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 1356 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
@@ -1434,6 +1512,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1434 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 1512 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1435 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1513 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1436 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1514 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1515 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
1516 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
1437} 1517}
1438 1518
1439static void set_mlx_icrc_seg(void *dseg) 1519static void set_mlx_icrc_seg(void *dseg)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 47e163ad3d1..ca5645c43f6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -374,6 +374,27 @@ struct mlx4_av {
374 u8 dgid[16]; 374 u8 dgid[16];
375}; 375};
376 376
377struct mlx4_eth_av {
378 __be32 port_pd;
379 u8 reserved1;
380 u8 smac_idx;
381 u16 reserved2;
382 u8 reserved3;
383 u8 gid_index;
384 u8 stat_rate;
385 u8 hop_limit;
386 __be32 sl_tclass_flowlabel;
387 u8 dgid[16];
388 u32 reserved4[2];
389 __be16 vlan;
390 u8 mac[6];
391};
392
393union mlx4_ext_av {
394 struct mlx4_av ib;
395 struct mlx4_eth_av eth;
396};
397
377struct mlx4_dev { 398struct mlx4_dev {
378 struct pci_dev *pdev; 399 struct pci_dev *pdev;
379 unsigned long flags; 400 unsigned long flags;
@@ -402,6 +423,12 @@ struct mlx4_init_port_param {
402 if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ 423 if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \
403 ~(dev)->caps.port_mask) & 1 << ((port) - 1)) 424 ~(dev)->caps.port_mask) & 1 << ((port) - 1))
404 425
426#define mlx4_foreach_ib_transport_port(port, dev) \
427 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
428 if (((dev)->caps.port_mask & 1 << ((port) - 1)) || \
429 ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
430
431
405int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, 432int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
406 struct mlx4_buf *buf); 433 struct mlx4_buf *buf);
407void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); 434void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);