aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/cache.c43
-rw-r--r--drivers/infiniband/core/cma.c5
-rw-r--r--drivers/infiniband/core/device.c16
-rw-r--r--drivers/infiniband/core/ucm.c1
-rw-r--r--drivers/infiniband/core/ucma.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c62
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c688
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c437
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1573
-rw-r--r--drivers/infiniband/hw/mlx4/main.c273
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c1254
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h341
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c660
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c794
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c19
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c242
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c245
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c246
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c171
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h59
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c222
-rw-r--r--include/linux/mlx4/device.h69
-rw-r--r--include/linux/mlx4/driver.h2
-rw-r--r--include/linux/mlx4/qp.h3
-rw-r--r--include/rdma/ib_cache.h16
-rw-r--r--include/rdma/ib_verbs.h3
37 files changed, 7241 insertions, 387 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 9353992f9ee..80f6cf2449f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -167,6 +167,7 @@ int ib_find_cached_pkey(struct ib_device *device,
167 unsigned long flags; 167 unsigned long flags;
168 int i; 168 int i;
169 int ret = -ENOENT; 169 int ret = -ENOENT;
170 int partial_ix = -1;
170 171
171 if (port_num < start_port(device) || port_num > end_port(device)) 172 if (port_num < start_port(device) || port_num > end_port(device))
172 return -EINVAL; 173 return -EINVAL;
@@ -179,6 +180,46 @@ int ib_find_cached_pkey(struct ib_device *device,
179 180
180 for (i = 0; i < cache->table_len; ++i) 181 for (i = 0; i < cache->table_len; ++i)
181 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 182 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
183 if (cache->table[i] & 0x8000) {
184 *index = i;
185 ret = 0;
186 break;
187 } else
188 partial_ix = i;
189 }
190
191 if (ret && partial_ix >= 0) {
192 *index = partial_ix;
193 ret = 0;
194 }
195
196 read_unlock_irqrestore(&device->cache.lock, flags);
197
198 return ret;
199}
200EXPORT_SYMBOL(ib_find_cached_pkey);
201
202int ib_find_exact_cached_pkey(struct ib_device *device,
203 u8 port_num,
204 u16 pkey,
205 u16 *index)
206{
207 struct ib_pkey_cache *cache;
208 unsigned long flags;
209 int i;
210 int ret = -ENOENT;
211
212 if (port_num < start_port(device) || port_num > end_port(device))
213 return -EINVAL;
214
215 read_lock_irqsave(&device->cache.lock, flags);
216
217 cache = device->cache.pkey_cache[port_num - start_port(device)];
218
219 *index = -1;
220
221 for (i = 0; i < cache->table_len; ++i)
222 if (cache->table[i] == pkey) {
182 *index = i; 223 *index = i;
183 ret = 0; 224 ret = 0;
184 break; 225 break;
@@ -188,7 +229,7 @@ int ib_find_cached_pkey(struct ib_device *device,
188 229
189 return ret; 230 return ret;
190} 231}
191EXPORT_SYMBOL(ib_find_cached_pkey); 232EXPORT_SYMBOL(ib_find_exact_cached_pkey);
192 233
193int ib_get_cached_lmc(struct ib_device *device, 234int ib_get_cached_lmc(struct ib_device *device,
194 u8 port_num, 235 u8 port_num,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 7172559ce0c..26b37603dcf 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3058,7 +3058,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3058 3058
3059 if (id_priv->id.ps == RDMA_PS_IPOIB) 3059 if (id_priv->id.ps == RDMA_PS_IPOIB)
3060 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 3060 comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3061 IB_SA_MCMEMBER_REC_RATE_SELECTOR; 3061 IB_SA_MCMEMBER_REC_RATE_SELECTOR |
3062 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
3063 IB_SA_MCMEMBER_REC_MTU |
3064 IB_SA_MCMEMBER_REC_HOP_LIMIT;
3062 3065
3063 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 3066 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3064 id_priv->id.port_num, &rec, 3067 id_priv->id.port_num, &rec,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index e711de400a0..18c1ece765f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -707,18 +707,28 @@ int ib_find_pkey(struct ib_device *device,
707{ 707{
708 int ret, i; 708 int ret, i;
709 u16 tmp_pkey; 709 u16 tmp_pkey;
710 int partial_ix = -1;
710 711
711 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { 712 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
712 ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 713 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
713 if (ret) 714 if (ret)
714 return ret; 715 return ret;
715
716 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 716 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
717 *index = i; 717 /* if there is full-member pkey take it.*/
718 return 0; 718 if (tmp_pkey & 0x8000) {
719 *index = i;
720 return 0;
721 }
722 if (partial_ix < 0)
723 partial_ix = i;
719 } 724 }
720 } 725 }
721 726
727 /*no full-member, if exists take the limited*/
728 if (partial_ix >= 0) {
729 *index = partial_ix;
730 return 0;
731 }
722 return -ENOENT; 732 return -ENOENT;
723} 733}
724EXPORT_SYMBOL(ib_find_pkey); 734EXPORT_SYMBOL(ib_find_pkey);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 06f08713f48..49b15ac1987 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -397,7 +397,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
397 struct ib_ucm_event_get cmd; 397 struct ib_ucm_event_get cmd;
398 struct ib_ucm_event *uevent; 398 struct ib_ucm_event *uevent;
399 int result = 0; 399 int result = 0;
400 DEFINE_WAIT(wait);
401 400
402 if (out_len < sizeof(struct ib_ucm_event_resp)) 401 if (out_len < sizeof(struct ib_ucm_event_resp))
403 return -ENOSPC; 402 return -ENOSPC;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 055ed59838d..7972bae2e9b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -310,7 +310,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
310 struct rdma_ucm_get_event cmd; 310 struct rdma_ucm_get_event cmd;
311 struct ucma_event *uevent; 311 struct ucma_event *uevent;
312 int ret = 0; 312 int ret = 0;
313 DEFINE_WAIT(wait);
314 313
315 if (out_len < sizeof uevent->resp) 314 if (out_len < sizeof uevent->resp)
316 return -ENOSPC; 315 return -ENOSPC;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 45aedf1d933..e2bf9c68cfc 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -137,19 +137,25 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
137 return -ENOMEM; 137 return -ENOMEM;
138 138
139 wq->rq.qid = c4iw_get_qpid(rdev, uctx); 139 wq->rq.qid = c4iw_get_qpid(rdev, uctx);
140 if (!wq->rq.qid) 140 if (!wq->rq.qid) {
141 goto err1; 141 ret = -ENOMEM;
142 goto free_sq_qid;
143 }
142 144
143 if (!user) { 145 if (!user) {
144 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, 146 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
145 GFP_KERNEL); 147 GFP_KERNEL);
146 if (!wq->sq.sw_sq) 148 if (!wq->sq.sw_sq) {
147 goto err2; 149 ret = -ENOMEM;
150 goto free_rq_qid;
151 }
148 152
149 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, 153 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
150 GFP_KERNEL); 154 GFP_KERNEL);
151 if (!wq->rq.sw_rq) 155 if (!wq->rq.sw_rq) {
152 goto err3; 156 ret = -ENOMEM;
157 goto free_sw_sq;
158 }
153 } 159 }
154 160
155 /* 161 /*
@@ -157,15 +163,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
157 */ 163 */
158 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); 164 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
159 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); 165 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
160 if (!wq->rq.rqt_hwaddr) 166 if (!wq->rq.rqt_hwaddr) {
161 goto err4; 167 ret = -ENOMEM;
168 goto free_sw_rq;
169 }
162 170
163 if (user) { 171 if (user) {
164 if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) 172 ret = alloc_oc_sq(rdev, &wq->sq);
165 goto err5; 173 if (ret)
174 goto free_hwaddr;
175
176 ret = alloc_host_sq(rdev, &wq->sq);
177 if (ret)
178 goto free_sq;
166 } else 179 } else
167 if (alloc_host_sq(rdev, &wq->sq)) 180 ret = alloc_host_sq(rdev, &wq->sq);
168 goto err5; 181 if (ret)
182 goto free_hwaddr;
169 memset(wq->sq.queue, 0, wq->sq.memsize); 183 memset(wq->sq.queue, 0, wq->sq.memsize);
170 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); 184 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
171 185
@@ -173,7 +187,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
173 wq->rq.memsize, &(wq->rq.dma_addr), 187 wq->rq.memsize, &(wq->rq.dma_addr),
174 GFP_KERNEL); 188 GFP_KERNEL);
175 if (!wq->rq.queue) 189 if (!wq->rq.queue)
176 goto err6; 190 goto free_sq;
177 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", 191 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
178 __func__, wq->sq.queue, 192 __func__, wq->sq.queue,
179 (unsigned long long)virt_to_phys(wq->sq.queue), 193 (unsigned long long)virt_to_phys(wq->sq.queue),
@@ -201,7 +215,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
201 skb = alloc_skb(wr_len, GFP_KERNEL); 215 skb = alloc_skb(wr_len, GFP_KERNEL);
202 if (!skb) { 216 if (!skb) {
203 ret = -ENOMEM; 217 ret = -ENOMEM;
204 goto err7; 218 goto free_dma;
205 } 219 }
206 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 220 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
207 221
@@ -266,33 +280,33 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
266 280
267 ret = c4iw_ofld_send(rdev, skb); 281 ret = c4iw_ofld_send(rdev, skb);
268 if (ret) 282 if (ret)
269 goto err7; 283 goto free_dma;
270 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); 284 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
271 if (ret) 285 if (ret)
272 goto err7; 286 goto free_dma;
273 287
274 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", 288 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
275 __func__, wq->sq.qid, wq->rq.qid, wq->db, 289 __func__, wq->sq.qid, wq->rq.qid, wq->db,
276 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); 290 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
277 291
278 return 0; 292 return 0;
279err7: 293free_dma:
280 dma_free_coherent(&(rdev->lldi.pdev->dev), 294 dma_free_coherent(&(rdev->lldi.pdev->dev),
281 wq->rq.memsize, wq->rq.queue, 295 wq->rq.memsize, wq->rq.queue,
282 dma_unmap_addr(&wq->rq, mapping)); 296 dma_unmap_addr(&wq->rq, mapping));
283err6: 297free_sq:
284 dealloc_sq(rdev, &wq->sq); 298 dealloc_sq(rdev, &wq->sq);
285err5: 299free_hwaddr:
286 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); 300 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
287err4: 301free_sw_rq:
288 kfree(wq->rq.sw_rq); 302 kfree(wq->rq.sw_rq);
289err3: 303free_sw_sq:
290 kfree(wq->sq.sw_sq); 304 kfree(wq->sq.sw_sq);
291err2: 305free_rq_qid:
292 c4iw_put_qpid(rdev, wq->rq.qid, uctx); 306 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
293err1: 307free_sq_qid:
294 c4iw_put_qpid(rdev, wq->sq.qid, uctx); 308 c4iw_put_qpid(rdev, wq->sq.qid, uctx);
295 return -ENOMEM; 309 return ret;
296} 310}
297 311
298static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, 312static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index 70f09c7826d..f4213b3a8fe 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o 1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
2 2
3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o 3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
new file mode 100644
index 00000000000..0fcd5cd6f3e
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -0,0 +1,688 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 /***********************************************************/
33/*This file support the handling of the Alias GUID feature. */
34/***********************************************************/
35#include <rdma/ib_mad.h>
36#include <rdma/ib_smi.h>
37#include <rdma/ib_cache.h>
38#include <rdma/ib_sa.h>
39#include <rdma/ib_pack.h>
40#include <linux/mlx4/cmd.h>
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/errno.h>
44#include <rdma/ib_user_verbs.h>
45#include <linux/delay.h>
46#include "mlx4_ib.h"
47
48/*
49The driver keeps the current state of all guids, as they are in the HW.
50Whenever we receive an smp mad GUIDInfo record, the data will be cached.
51*/
52
53struct mlx4_alias_guid_work_context {
54 u8 port;
55 struct mlx4_ib_dev *dev ;
56 struct ib_sa_query *sa_query;
57 struct completion done;
58 int query_id;
59 struct list_head list;
60 int block_num;
61};
62
63struct mlx4_next_alias_guid_work {
64 u8 port;
65 u8 block_num;
66 struct mlx4_sriov_alias_guid_info_rec_det rec_det;
67};
68
69
70void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
71 u8 port_num, u8 *p_data)
72{
73 int i;
74 u64 guid_indexes;
75 int slave_id;
76 int port_index = port_num - 1;
77
78 if (!mlx4_is_master(dev->dev))
79 return;
80
81 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
82 ports_guid[port_num - 1].
83 all_rec_per_port[block_num].guid_indexes);
84 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
85
86 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
87 /* The location of the specific index starts from bit number 4
88 * until bit num 11 */
89 if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
90 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
91 if (slave_id >= dev->dev->num_slaves) {
92 pr_debug("The last slave: %d\n", slave_id);
93 return;
94 }
95
96 /* cache the guid: */
97 memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
98 &p_data[i * GUID_REC_SIZE],
99 GUID_REC_SIZE);
100 } else
101 pr_debug("Guid number: %d in block: %d"
102 " was not updated\n", i, block_num);
103 }
104}
105
106static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
107{
108 if (index >= NUM_ALIAS_GUID_PER_PORT) {
109 pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
110 return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL);
111 }
112 return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
113}
114
115
116ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
117{
118 return IB_SA_COMP_MASK(4 + index);
119}
120
121/*
122 * Whenever new GUID is set/unset (guid table change) create event and
123 * notify the relevant slave (master also should be notified).
124 * If the GUID value is not as we have in the cache the slave will not be
125 * updated; in this case it waits for the smp_snoop or the port management
126 * event to call the function and to update the slave.
127 * block_number - the index of the block (16 blocks available)
128 * port_number - 1 or 2
129 */
130void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
131 int block_num, u8 port_num,
132 u8 *p_data)
133{
134 int i;
135 u64 guid_indexes;
136 int slave_id;
137 enum slave_port_state new_state;
138 enum slave_port_state prev_state;
139 __be64 tmp_cur_ag, form_cache_ag;
140 enum slave_port_gen_event gen_event;
141
142 if (!mlx4_is_master(dev->dev))
143 return;
144
145 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
146 ports_guid[port_num - 1].
147 all_rec_per_port[block_num].guid_indexes);
148 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
149
150 /*calculate the slaves and notify them*/
151 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
152 /* the location of the specific index runs from bits 4..11 */
153 if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
154 continue;
155
156 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
157 if (slave_id >= dev->dev->num_slaves)
158 return;
159 tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
160 form_cache_ag = get_cached_alias_guid(dev, port_num,
161 (NUM_ALIAS_GUID_IN_REC * block_num) + i);
162 /*
163 * Check if guid is not the same as in the cache,
164 * If it is different, wait for the snoop_smp or the port mgmt
165 * change event to update the slave on its port state change
166 */
167 if (tmp_cur_ag != form_cache_ag)
168 continue;
169 mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
170
171 /*2 cases: Valid GUID, and Invalid Guid*/
172
173 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
174 prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
175 new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
176 MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
177 &gen_event);
178 pr_debug("slave: %d, port: %d prev_port_state: %d,"
179 " new_port_state: %d, gen_event: %d\n",
180 slave_id, port_num, prev_state, new_state, gen_event);
181 if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
182 pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
183 slave_id, port_num);
184 mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
185 port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
186 }
187 } else { /* request to invalidate GUID */
188 set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
189 MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
190 &gen_event);
191 pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
192 slave_id, port_num);
193 mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
194 MLX4_PORT_CHANGE_SUBTYPE_DOWN);
195 }
196 }
197}
198
199static void aliasguid_query_handler(int status,
200 struct ib_sa_guidinfo_rec *guid_rec,
201 void *context)
202{
203 struct mlx4_ib_dev *dev;
204 struct mlx4_alias_guid_work_context *cb_ctx = context;
205 u8 port_index ;
206 int i;
207 struct mlx4_sriov_alias_guid_info_rec_det *rec;
208 unsigned long flags, flags1;
209
210 if (!context)
211 return;
212
213 dev = cb_ctx->dev;
214 port_index = cb_ctx->port - 1;
215 rec = &dev->sriov.alias_guid.ports_guid[port_index].
216 all_rec_per_port[cb_ctx->block_num];
217
218 if (status) {
219 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
220 pr_debug("(port: %d) failed: status = %d\n",
221 cb_ctx->port, status);
222 goto out;
223 }
224
225 if (guid_rec->block_num != cb_ctx->block_num) {
226 pr_err("block num mismatch: %d != %d\n",
227 cb_ctx->block_num, guid_rec->block_num);
228 goto out;
229 }
230
231 pr_debug("lid/port: %d/%d, block_num: %d\n",
232 be16_to_cpu(guid_rec->lid), cb_ctx->port,
233 guid_rec->block_num);
234
235 rec = &dev->sriov.alias_guid.ports_guid[port_index].
236 all_rec_per_port[guid_rec->block_num];
237
238 rec->status = MLX4_GUID_INFO_STATUS_SET;
239 rec->method = MLX4_GUID_INFO_RECORD_SET;
240
241 for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
242 __be64 tmp_cur_ag;
243 tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
244 /* check if the SM didn't assign one of the records.
245 * if it didn't, if it was not sysadmin request:
246 * ask the SM to give a new GUID, (instead of the driver request).
247 */
248 if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
249 mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
250 "block_num: %d was declined by SM, "
251 "ownership by %d (0 = driver, 1=sysAdmin,"
252 " 2=None)\n", __func__, i,
253 guid_rec->block_num, rec->ownership);
254 if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
255 /* if it is driver assign, asks for new GUID from SM*/
256 *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
257 MLX4_NOT_SET_GUID;
258
259 /* Mark the record as not assigned, and let it
260 * be sent again in the next work sched.*/
261 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
262 rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
263 }
264 } else {
265 /* properly assigned record. */
266 /* We save the GUID we just got from the SM in the
267 * admin_guid in order to be persistent, and in the
268 * request from the sm the process will ask for the same GUID */
269 if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
270 tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
271 /* the sysadmin assignment failed.*/
272 mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
273 " admin guid after SysAdmin "
274 "configuration. "
275 "Record num %d in block_num:%d "
276 "was declined by SM, "
277 "new val(0x%llx) was kept\n",
278 __func__, i,
279 guid_rec->block_num,
280 be64_to_cpu(*(__be64 *) &
281 rec->all_recs[i * GUID_REC_SIZE]));
282 } else {
283 memcpy(&rec->all_recs[i * GUID_REC_SIZE],
284 &guid_rec->guid_info_list[i * GUID_REC_SIZE],
285 GUID_REC_SIZE);
286 }
287 }
288 }
289 /*
290 The func is call here to close the cases when the
291 sm doesn't send smp, so in the sa response the driver
292 notifies the slave.
293 */
294 mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
295 cb_ctx->port,
296 guid_rec->guid_info_list);
297out:
298 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
299 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
300 if (!dev->sriov.is_going_down)
301 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
302 &dev->sriov.alias_guid.ports_guid[port_index].
303 alias_guid_work, 0);
304 if (cb_ctx->sa_query) {
305 list_del(&cb_ctx->list);
306 kfree(cb_ctx);
307 } else
308 complete(&cb_ctx->done);
309 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
310 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
311}
312
313static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
314{
315 int i;
316 u64 cur_admin_val;
317 ib_sa_comp_mask comp_mask = 0;
318
319 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
320 = MLX4_GUID_INFO_STATUS_IDLE;
321 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
322 = MLX4_GUID_INFO_RECORD_SET;
323
324 /* calculate the comp_mask for that record.*/
325 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
326 cur_admin_val =
327 *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
328 all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
329 /*
330 check the admin value: if it's for delete (~00LL) or
331 it is the first guid of the first record (hw guid) or
332 the records is not in ownership of the sysadmin and the sm doesn't
333 need to assign GUIDs, then don't put it up for assignment.
334 */
335 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
336 (!index && !i) ||
337 MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
338 ports_guid[port - 1].all_rec_per_port[index].ownership)
339 continue;
340 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
341 }
342 dev->sriov.alias_guid.ports_guid[port - 1].
343 all_rec_per_port[index].guid_indexes = comp_mask;
344}
345
346static int set_guid_rec(struct ib_device *ibdev,
347 u8 port, int index,
348 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
349{
350 int err;
351 struct mlx4_ib_dev *dev = to_mdev(ibdev);
352 struct ib_sa_guidinfo_rec guid_info_rec;
353 ib_sa_comp_mask comp_mask;
354 struct ib_port_attr attr;
355 struct mlx4_alias_guid_work_context *callback_context;
356 unsigned long resched_delay, flags, flags1;
357 struct list_head *head =
358 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
359
360 err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
361 if (err) {
362 pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
363 err, port);
364 return err;
365 }
366 /*check the port was configured by the sm, otherwise no need to send */
367 if (attr.state != IB_PORT_ACTIVE) {
368 pr_debug("port %d not active...rescheduling\n", port);
369 resched_delay = 5 * HZ;
370 err = -EAGAIN;
371 goto new_schedule;
372 }
373
374 callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
375 if (!callback_context) {
376 err = -ENOMEM;
377 resched_delay = HZ * 5;
378 goto new_schedule;
379 }
380 callback_context->port = port;
381 callback_context->dev = dev;
382 callback_context->block_num = index;
383
384 memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
385
386 guid_info_rec.lid = cpu_to_be16(attr.lid);
387 guid_info_rec.block_num = index;
388
389 memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
390 GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
391 comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
392 rec_det->guid_indexes;
393
394 init_completion(&callback_context->done);
395 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
396 list_add_tail(&callback_context->list, head);
397 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
398
399 callback_context->query_id =
400 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
401 ibdev, port, &guid_info_rec,
402 comp_mask, rec_det->method, 1000,
403 GFP_KERNEL, aliasguid_query_handler,
404 callback_context,
405 &callback_context->sa_query);
406 if (callback_context->query_id < 0) {
407 pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
408 "%d. will reschedule to the next 1 sec.\n",
409 callback_context->query_id);
410 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
411 list_del(&callback_context->list);
412 kfree(callback_context);
413 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
414 resched_delay = 1 * HZ;
415 err = -EAGAIN;
416 goto new_schedule;
417 }
418 err = 0;
419 goto out;
420
421new_schedule:
422 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
423 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
424 invalidate_guid_record(dev, port, index);
425 if (!dev->sriov.is_going_down) {
426 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
427 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
428 resched_delay);
429 }
430 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
431 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
432
433out:
434 return err;
435}
436
437void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
438{
439 int i;
440 unsigned long flags, flags1;
441
442 pr_debug("port %d\n", port);
443
444 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
445 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
446 for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
447 invalidate_guid_record(dev, port, i);
448
449 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
450 /*
451 make sure no work waits in the queue, if the work is already
452 queued(not on the timer) the cancel will fail. That is not a problem
453 because we just want the work started.
454 */
455 __cancel_delayed_work(&dev->sriov.alias_guid.
456 ports_guid[port - 1].alias_guid_work);
457 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
458 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
459 0);
460 }
461 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
462 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
463}
464
465/* The function returns the next record that was
466 * not configured (or failed to be configured) */
467static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
468 struct mlx4_next_alias_guid_work *rec)
469{
470 int j;
471 unsigned long flags;
472
473 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
474 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
475 if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
476 MLX4_GUID_INFO_STATUS_IDLE) {
477 memcpy(&rec->rec_det,
478 &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
479 sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
480 rec->port = port;
481 rec->block_num = j;
482 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
483 MLX4_GUID_INFO_STATUS_PENDING;
484 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
485 return 0;
486 }
487 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
488 }
489 return -ENOENT;
490}
491
492static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
493 int rec_index,
494 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
495{
496 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
497 rec_det->guid_indexes;
498 memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
499 rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
500 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
501 rec_det->status;
502}
503
504static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
505{
506 int j;
507 struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
508
509 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
510 memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
511 rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
512 IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
513 IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
514 IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
515 IB_SA_GUIDINFO_REC_GID7;
516 rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
517 set_administratively_guid_record(dev, port, j, &rec_det);
518 }
519}
520
521static void alias_guid_work(struct work_struct *work)
522{
523 struct delayed_work *delay = to_delayed_work(work);
524 int ret = 0;
525 struct mlx4_next_alias_guid_work *rec;
526 struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
527 container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
528 alias_guid_work);
529 struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
530 struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
531 struct mlx4_ib_sriov,
532 alias_guid);
533 struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
534
535 rec = kzalloc(sizeof *rec, GFP_KERNEL);
536 if (!rec) {
537 pr_err("alias_guid_work: No Memory\n");
538 return;
539 }
540
541 pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
542 ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
543 if (ret) {
544 pr_debug("No more records to update.\n");
545 goto out;
546 }
547
548 set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
549 &rec->rec_det);
550
551out:
552 kfree(rec);
553}
554
555
556void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
557{
558 unsigned long flags, flags1;
559
560 if (!mlx4_is_master(dev->dev))
561 return;
562 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
563 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
564 if (!dev->sriov.is_going_down) {
565 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
566 &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
567 }
568 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
569 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
570}
571
572void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
573{
574 int i;
575 struct mlx4_ib_sriov *sriov = &dev->sriov;
576 struct mlx4_alias_guid_work_context *cb_ctx;
577 struct mlx4_sriov_alias_guid_port_rec_det *det;
578 struct ib_sa_query *sa_query;
579 unsigned long flags;
580
581 for (i = 0 ; i < dev->num_ports; i++) {
582 cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
583 det = &sriov->alias_guid.ports_guid[i];
584 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
585 while (!list_empty(&det->cb_list)) {
586 cb_ctx = list_entry(det->cb_list.next,
587 struct mlx4_alias_guid_work_context,
588 list);
589 sa_query = cb_ctx->sa_query;
590 cb_ctx->sa_query = NULL;
591 list_del(&cb_ctx->list);
592 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
593 ib_sa_cancel_query(cb_ctx->query_id, sa_query);
594 wait_for_completion(&cb_ctx->done);
595 kfree(cb_ctx);
596 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
597 }
598 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
599 }
600 for (i = 0 ; i < dev->num_ports; i++) {
601 flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
602 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
603 }
604 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
605 kfree(dev->sriov.alias_guid.sa_client);
606}
607
608int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
609{
610 char alias_wq_name[15];
611 int ret = 0;
612 int i, j, k;
613 union ib_gid gid;
614
615 if (!mlx4_is_master(dev->dev))
616 return 0;
617 dev->sriov.alias_guid.sa_client =
618 kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
619 if (!dev->sriov.alias_guid.sa_client)
620 return -ENOMEM;
621
622 ib_sa_register_client(dev->sriov.alias_guid.sa_client);
623
624 spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
625
626 for (i = 1; i <= dev->num_ports; ++i) {
627 if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
628 ret = -EFAULT;
629 goto err_unregister;
630 }
631 }
632
633 for (i = 0 ; i < dev->num_ports; i++) {
634 memset(&dev->sriov.alias_guid.ports_guid[i], 0,
635 sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
636 /*Check if the SM doesn't need to assign the GUIDs*/
637 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
638 if (mlx4_ib_sm_guid_assign) {
639 dev->sriov.alias_guid.ports_guid[i].
640 all_rec_per_port[j].
641 ownership = MLX4_GUID_DRIVER_ASSIGN;
642 continue;
643 }
644 dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
645 ownership = MLX4_GUID_NONE_ASSIGN;
646 /*mark each val as it was deleted,
647 till the sysAdmin will give it valid val*/
648 for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
649 *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
650 all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
651 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
652 }
653 }
654 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
655 /*prepare the records, set them to be allocated by sm*/
656 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
657 invalidate_guid_record(dev, i + 1, j);
658
659 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
660 dev->sriov.alias_guid.ports_guid[i].port = i;
661 if (mlx4_ib_sm_guid_assign)
662 set_all_slaves_guids(dev, i);
663
664 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
665 dev->sriov.alias_guid.ports_guid[i].wq =
666 create_singlethread_workqueue(alias_wq_name);
667 if (!dev->sriov.alias_guid.ports_guid[i].wq) {
668 ret = -ENOMEM;
669 goto err_thread;
670 }
671 INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
672 alias_guid_work);
673 }
674 return 0;
675
676err_thread:
677 for (--i; i >= 0; i--) {
678 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
679 dev->sriov.alias_guid.ports_guid[i].wq = NULL;
680 }
681
682err_unregister:
683 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
684 kfree(dev->sriov.alias_guid.sa_client);
685 dev->sriov.alias_guid.sa_client = NULL;
686 pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
687 return ret;
688}
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
new file mode 100644
index 00000000000..e25e4dafb8a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -0,0 +1,437 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34
35#include <linux/mlx4/cmd.h>
36#include <linux/rbtree.h>
37#include <linux/idr.h>
38#include <rdma/ib_cm.h>
39
40#include "mlx4_ib.h"
41
42#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
43
44struct id_map_entry {
45 struct rb_node node;
46
47 u32 sl_cm_id;
48 u32 pv_cm_id;
49 int slave_id;
50 int scheduled_delete;
51 struct mlx4_ib_dev *dev;
52
53 struct list_head list;
54 struct delayed_work timeout;
55};
56
57struct cm_generic_msg {
58 struct ib_mad_hdr hdr;
59
60 __be32 local_comm_id;
61 __be32 remote_comm_id;
62};
63
64struct cm_req_msg {
65 unsigned char unused[0x60];
66 union ib_gid primary_path_sgid;
67};
68
69
70static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
71{
72 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
73 msg->local_comm_id = cpu_to_be32(cm_id);
74}
75
76static u32 get_local_comm_id(struct ib_mad *mad)
77{
78 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
79
80 return be32_to_cpu(msg->local_comm_id);
81}
82
83static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
84{
85 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
86 msg->remote_comm_id = cpu_to_be32(cm_id);
87}
88
89static u32 get_remote_comm_id(struct ib_mad *mad)
90{
91 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
92
93 return be32_to_cpu(msg->remote_comm_id);
94}
95
96static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
97{
98 struct cm_req_msg *msg = (struct cm_req_msg *)mad;
99
100 return msg->primary_path_sgid;
101}
102
103/* Lock should be taken before called */
104static struct id_map_entry *
105id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
106{
107 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
108 struct rb_node *node = sl_id_map->rb_node;
109
110 while (node) {
111 struct id_map_entry *id_map_entry =
112 rb_entry(node, struct id_map_entry, node);
113
114 if (id_map_entry->sl_cm_id > sl_cm_id)
115 node = node->rb_left;
116 else if (id_map_entry->sl_cm_id < sl_cm_id)
117 node = node->rb_right;
118 else if (id_map_entry->slave_id > slave_id)
119 node = node->rb_left;
120 else if (id_map_entry->slave_id < slave_id)
121 node = node->rb_right;
122 else
123 return id_map_entry;
124 }
125 return NULL;
126}
127
128static void id_map_ent_timeout(struct work_struct *work)
129{
130 struct delayed_work *delay = to_delayed_work(work);
131 struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
132 struct id_map_entry *db_ent, *found_ent;
133 struct mlx4_ib_dev *dev = ent->dev;
134 struct mlx4_ib_sriov *sriov = &dev->sriov;
135 struct rb_root *sl_id_map = &sriov->sl_id_map;
136 int pv_id = (int) ent->pv_cm_id;
137
138 spin_lock(&sriov->id_map_lock);
139 db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
140 if (!db_ent)
141 goto out;
142 found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
143 if (found_ent && found_ent == ent)
144 rb_erase(&found_ent->node, sl_id_map);
145 idr_remove(&sriov->pv_id_table, pv_id);
146
147out:
148 list_del(&ent->list);
149 spin_unlock(&sriov->id_map_lock);
150 kfree(ent);
151}
152
153static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
154{
155 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
156 struct rb_root *sl_id_map = &sriov->sl_id_map;
157 struct id_map_entry *ent, *found_ent;
158
159 spin_lock(&sriov->id_map_lock);
160 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
161 if (!ent)
162 goto out;
163 found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
164 if (found_ent && found_ent == ent)
165 rb_erase(&found_ent->node, sl_id_map);
166 idr_remove(&sriov->pv_id_table, pv_cm_id);
167out:
168 spin_unlock(&sriov->id_map_lock);
169}
170
171static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
172{
173 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
174 struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
175 struct id_map_entry *ent;
176 int slave_id = new->slave_id;
177 int sl_cm_id = new->sl_cm_id;
178
179 ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
180 if (ent) {
181 pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
182 sl_cm_id);
183
184 rb_replace_node(&ent->node, &new->node, sl_id_map);
185 return;
186 }
187
188 /* Go to the bottom of the tree */
189 while (*link) {
190 parent = *link;
191 ent = rb_entry(parent, struct id_map_entry, node);
192
193 if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
194 link = &(*link)->rb_left;
195 else
196 link = &(*link)->rb_right;
197 }
198
199 rb_link_node(&new->node, parent, link);
200 rb_insert_color(&new->node, sl_id_map);
201}
202
203static struct id_map_entry *
204id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
205{
206 int ret, id;
207 static int next_id;
208 struct id_map_entry *ent;
209 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
210
211 ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
212 if (!ent) {
213 mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
214 return ERR_PTR(-ENOMEM);
215 }
216
217 ent->sl_cm_id = sl_cm_id;
218 ent->slave_id = slave_id;
219 ent->scheduled_delete = 0;
220 ent->dev = to_mdev(ibdev);
221 INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
222
223 do {
224 spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
225 ret = idr_get_new_above(&sriov->pv_id_table, ent,
226 next_id, &id);
227 if (!ret) {
228 next_id = ((unsigned) id + 1) & MAX_ID_MASK;
229 ent->pv_cm_id = (u32)id;
230 sl_id_map_add(ibdev, ent);
231 }
232
233 spin_unlock(&sriov->id_map_lock);
234 } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
235 /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
236 if (!ret) {
237 spin_lock(&sriov->id_map_lock);
238 list_add_tail(&ent->list, &sriov->cm_list);
239 spin_unlock(&sriov->id_map_lock);
240 return ent;
241 }
242 /*error flow*/
243 kfree(ent);
244 mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
245 return ERR_PTR(-ENOMEM);
246}
247
248static struct id_map_entry *
249id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
250{
251 struct id_map_entry *ent;
252 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
253
254 spin_lock(&sriov->id_map_lock);
255 if (*pv_cm_id == -1) {
256 ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
257 if (ent)
258 *pv_cm_id = (int) ent->pv_cm_id;
259 } else
260 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
261 spin_unlock(&sriov->id_map_lock);
262
263 return ent;
264}
265
266static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
267{
268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
269 unsigned long flags;
270
271 spin_lock_irqsave(&sriov->going_down_lock, flags);
272 spin_lock(&sriov->id_map_lock);
273 /*make sure that there is no schedule inside the scheduled work.*/
274 if (!sriov->is_going_down) {
275 id->scheduled_delete = 1;
276 schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
277 }
278 spin_unlock(&sriov->id_map_lock);
279 spin_unlock_irqrestore(&sriov->going_down_lock, flags);
280}
281
282int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
283 struct ib_mad *mad)
284{
285 struct id_map_entry *id;
286 u32 sl_cm_id;
287 int pv_cm_id = -1;
288
289 sl_cm_id = get_local_comm_id(mad);
290
291 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
292 mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
293 id = id_map_alloc(ibdev, slave_id, sl_cm_id);
294 if (IS_ERR(id)) {
295 mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
296 __func__, slave_id, sl_cm_id);
297 return PTR_ERR(id);
298 }
299 } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
300 return 0;
301 } else {
302 id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
303 }
304
305 if (!id) {
306 pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
307 slave_id, sl_cm_id);
308 return -EINVAL;
309 }
310
311 set_local_comm_id(mad, id->pv_cm_id);
312
313 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
314 schedule_delayed(ibdev, id);
315 else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
316 id_map_find_del(ibdev, pv_cm_id);
317
318 return 0;
319}
320
321int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
322 struct ib_mad *mad)
323{
324 u32 pv_cm_id;
325 struct id_map_entry *id;
326
327 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
328 union ib_gid gid;
329
330 gid = gid_from_req_msg(ibdev, mad);
331 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
332 if (*slave < 0) {
333 mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
334 gid.global.interface_id);
335 return -ENOENT;
336 }
337 return 0;
338 }
339
340 pv_cm_id = get_remote_comm_id(mad);
341 id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
342
343 if (!id) {
344 pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
345 return -ENOENT;
346 }
347
348 *slave = id->slave_id;
349 set_remote_comm_id(mad, id->sl_cm_id);
350
351 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
352 schedule_delayed(ibdev, id);
353 else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
354 mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
355 id_map_find_del(ibdev, (int) pv_cm_id);
356 }
357
358 return 0;
359}
360
361void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
362{
363 spin_lock_init(&dev->sriov.id_map_lock);
364 INIT_LIST_HEAD(&dev->sriov.cm_list);
365 dev->sriov.sl_id_map = RB_ROOT;
366 idr_init(&dev->sriov.pv_id_table);
367 idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
368}
369
370/* slave = -1 ==> all slaves */
371/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
372void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
373{
374 struct mlx4_ib_sriov *sriov = &dev->sriov;
375 struct rb_root *sl_id_map = &sriov->sl_id_map;
376 struct list_head lh;
377 struct rb_node *nd;
378 int need_flush = 1;
379 struct id_map_entry *map, *tmp_map;
380 /* cancel all delayed work queue entries */
381 INIT_LIST_HEAD(&lh);
382 spin_lock(&sriov->id_map_lock);
383 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
384 if (slave < 0 || slave == map->slave_id) {
385 if (map->scheduled_delete)
386 need_flush &= !!cancel_delayed_work(&map->timeout);
387 }
388 }
389
390 spin_unlock(&sriov->id_map_lock);
391
392 if (!need_flush)
393 flush_scheduled_work(); /* make sure all timers were flushed */
394
395 /* now, remove all leftover entries from databases*/
396 spin_lock(&sriov->id_map_lock);
397 if (slave < 0) {
398 while (rb_first(sl_id_map)) {
399 struct id_map_entry *ent =
400 rb_entry(rb_first(sl_id_map),
401 struct id_map_entry, node);
402
403 rb_erase(&ent->node, sl_id_map);
404 idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
405 }
406 list_splice_init(&dev->sriov.cm_list, &lh);
407 } else {
408 /* first, move nodes belonging to slave to db remove list */
409 nd = rb_first(sl_id_map);
410 while (nd) {
411 struct id_map_entry *ent =
412 rb_entry(nd, struct id_map_entry, node);
413 nd = rb_next(nd);
414 if (ent->slave_id == slave)
415 list_move_tail(&ent->list, &lh);
416 }
417 /* remove those nodes from databases */
418 list_for_each_entry_safe(map, tmp_map, &lh, list) {
419 rb_erase(&map->node, sl_id_map);
420 idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
421 }
422
423 /* add remaining nodes from cm_list */
424 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
425 if (slave == map->slave_id)
426 list_move_tail(&map->list, &lh);
427 }
428 }
429
430 spin_unlock(&sriov->id_map_lock);
431
432 /* free any map entries left behind due to cancel_delayed_work above */
433 list_for_each_entry_safe(map, tmp_map, &lh, list) {
434 list_del(&map->list);
435 kfree(map);
436 }
437}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6d4ef71cbcd..c9eb6a6815c 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -547,6 +547,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
547 checksum == cpu_to_be16(0xffff); 547 checksum == cpu_to_be16(0xffff);
548} 548}
549 549
550static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
551 unsigned tail, struct mlx4_cqe *cqe)
552{
553 struct mlx4_ib_proxy_sqp_hdr *hdr;
554
555 ib_dma_sync_single_for_cpu(qp->ibqp.device,
556 qp->sqp_proxy_rcv[tail].map,
557 sizeof (struct mlx4_ib_proxy_sqp_hdr),
558 DMA_FROM_DEVICE);
559 hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
560 wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
561 wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
562 wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
563 wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
564 wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
565 wc->dlid_path_bits = 0;
566
567 return 0;
568}
569
550static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, 570static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
551 struct mlx4_ib_qp **cur_qp, 571 struct mlx4_ib_qp **cur_qp,
552 struct ib_wc *wc) 572 struct ib_wc *wc)
@@ -559,6 +579,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
559 int is_error; 579 int is_error;
560 u32 g_mlpath_rqpn; 580 u32 g_mlpath_rqpn;
561 u16 wqe_ctr; 581 u16 wqe_ctr;
582 unsigned tail = 0;
562 583
563repoll: 584repoll:
564 cqe = next_cqe_sw(cq); 585 cqe = next_cqe_sw(cq);
@@ -634,7 +655,8 @@ repoll:
634 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 655 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
635 } else { 656 } else {
636 wq = &(*cur_qp)->rq; 657 wq = &(*cur_qp)->rq;
637 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 658 tail = wq->tail & (wq->wqe_cnt - 1);
659 wc->wr_id = wq->wrid[tail];
638 ++wq->tail; 660 ++wq->tail;
639 } 661 }
640 662
@@ -717,6 +739,13 @@ repoll:
717 break; 739 break;
718 } 740 }
719 741
742 if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
743 if ((*cur_qp)->mlx4_ib_qp_type &
744 (MLX4_IB_QPT_PROXY_SMI_OWNER |
745 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
746 return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
747 }
748
720 wc->slid = be16_to_cpu(cqe->rlid); 749 wc->slid = be16_to_cpu(cqe->rlid);
721 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); 750 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
722 wc->src_qp = g_mlpath_rqpn & 0xffffff; 751 wc->src_qp = g_mlpath_rqpn & 0xffffff;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2ae7efd00..21a794152d1 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,7 +32,10 @@
32 32
33#include <rdma/ib_mad.h> 33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h> 34#include <rdma/ib_smi.h>
35#include <rdma/ib_sa.h>
36#include <rdma/ib_cache.h>
35 37
38#include <linux/random.h>
36#include <linux/mlx4/cmd.h> 39#include <linux/mlx4/cmd.h>
37#include <linux/gfp.h> 40#include <linux/gfp.h>
38#include <rdma/ib_pma.h> 41#include <rdma/ib_pma.h>
@@ -44,7 +47,62 @@ enum {
44 MLX4_IB_VENDOR_CLASS2 = 0xa 47 MLX4_IB_VENDOR_CLASS2 = 0xa
45}; 48};
46 49
47int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 50#define MLX4_TUN_SEND_WRID_SHIFT 34
51#define MLX4_TUN_QPN_SHIFT 32
52#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
53#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
54
55#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
56#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
57
58 /* Port mgmt change event handling */
59
60#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
61#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
62#define NUM_IDX_IN_PKEY_TBL_BLK 32
63#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
64#define GUID_TBL_BLK_NUM_ENTRIES 8
65#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
66
67struct mlx4_mad_rcv_buf {
68 struct ib_grh grh;
69 u8 payload[256];
70} __packed;
71
72struct mlx4_mad_snd_buf {
73 u8 payload[256];
74} __packed;
75
76struct mlx4_tunnel_mad {
77 struct ib_grh grh;
78 struct mlx4_ib_tunnel_header hdr;
79 struct ib_mad mad;
80} __packed;
81
82struct mlx4_rcv_tunnel_mad {
83 struct mlx4_rcv_tunnel_hdr hdr;
84 struct ib_grh grh;
85 struct ib_mad mad;
86} __packed;
87
88static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
89static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
90static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
91 int block, u32 change_bitmap);
92
93__be64 mlx4_ib_gen_node_guid(void)
94{
95#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
96 return cpu_to_be64(NODE_GUID_HI | random32());
97}
98
99__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
100{
101 return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
102 cpu_to_be64(0xff00000000000000LL);
103}
104
105int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
48 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 106 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
49 void *in_mad, void *response_mad) 107 void *in_mad, void *response_mad)
50{ 108{
@@ -71,10 +129,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
71 * Key check traps can't be generated unless we have in_wc to 129 * Key check traps can't be generated unless we have in_wc to
72 * tell us where to send the trap. 130 * tell us where to send the trap.
73 */ 131 */
74 if (ignore_mkey || !in_wc) 132 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
75 op_modifier |= 0x1; 133 op_modifier |= 0x1;
76 if (ignore_bkey || !in_wc) 134 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
77 op_modifier |= 0x2; 135 op_modifier |= 0x2;
136 if (mlx4_is_mfunc(dev->dev) &&
137 (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
138 op_modifier |= 0x8;
78 139
79 if (in_wc) { 140 if (in_wc) {
80 struct { 141 struct {
@@ -107,10 +168,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
107 in_modifier |= in_wc->slid << 16; 168 in_modifier |= in_wc->slid << 16;
108 } 169 }
109 170
110 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, 171 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
111 in_modifier, op_modifier, 172 mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
112 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, 173 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
113 MLX4_CMD_NATIVE); 174 (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
114 175
115 if (!err) 176 if (!err)
116 memcpy(response_mad, outmailbox->buf, 256); 177 memcpy(response_mad, outmailbox->buf, 256);
@@ -156,6 +217,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
156{ 217{
157 struct ib_port_info *pinfo; 218 struct ib_port_info *pinfo;
158 u16 lid; 219 u16 lid;
220 __be16 *base;
221 u32 bn, pkey_change_bitmap;
222 int i;
223
159 224
160 struct mlx4_ib_dev *dev = to_mdev(ibdev); 225 struct mlx4_ib_dev *dev = to_mdev(ibdev);
161 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || 226 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
@@ -171,17 +236,46 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
171 pinfo->neighbormtu_mastersmsl & 0xf); 236 pinfo->neighbormtu_mastersmsl & 0xf);
172 237
173 if (pinfo->clientrereg_resv_subnetto & 0x80) 238 if (pinfo->clientrereg_resv_subnetto & 0x80)
174 mlx4_ib_dispatch_event(dev, port_num, 239 handle_client_rereg_event(dev, port_num);
175 IB_EVENT_CLIENT_REREGISTER);
176 240
177 if (prev_lid != lid) 241 if (prev_lid != lid)
178 mlx4_ib_dispatch_event(dev, port_num, 242 handle_lid_change_event(dev, port_num);
179 IB_EVENT_LID_CHANGE);
180 break; 243 break;
181 244
182 case IB_SMP_ATTR_PKEY_TABLE: 245 case IB_SMP_ATTR_PKEY_TABLE:
183 mlx4_ib_dispatch_event(dev, port_num, 246 if (!mlx4_is_mfunc(dev->dev)) {
184 IB_EVENT_PKEY_CHANGE); 247 mlx4_ib_dispatch_event(dev, port_num,
248 IB_EVENT_PKEY_CHANGE);
249 break;
250 }
251
252 /* at this point, we are running in the master.
253 * Slaves do not receive SMPs.
254 */
255 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
256 base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
257 pkey_change_bitmap = 0;
258 for (i = 0; i < 32; i++) {
259 pr_debug("PKEY[%d] = x%x\n",
260 i + bn*32, be16_to_cpu(base[i]));
261 if (be16_to_cpu(base[i]) !=
262 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
263 pkey_change_bitmap |= (1 << i);
264 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
265 be16_to_cpu(base[i]);
266 }
267 }
268 pr_debug("PKEY Change event: port=%d, "
269 "block=0x%x, change_bitmap=0x%x\n",
270 port_num, bn, pkey_change_bitmap);
271
272 if (pkey_change_bitmap) {
273 mlx4_ib_dispatch_event(dev, port_num,
274 IB_EVENT_PKEY_CHANGE);
275 if (!dev->sriov.is_going_down)
276 __propagate_pkey_ev(dev, port_num, bn,
277 pkey_change_bitmap);
278 }
185 break; 279 break;
186 280
187 case IB_SMP_ATTR_GUID_INFO: 281 case IB_SMP_ATTR_GUID_INFO:
@@ -189,12 +283,56 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
189 if (!mlx4_is_master(dev->dev)) 283 if (!mlx4_is_master(dev->dev))
190 mlx4_ib_dispatch_event(dev, port_num, 284 mlx4_ib_dispatch_event(dev, port_num,
191 IB_EVENT_GID_CHANGE); 285 IB_EVENT_GID_CHANGE);
286 /*if master, notify relevant slaves*/
287 if (mlx4_is_master(dev->dev) &&
288 !dev->sriov.is_going_down) {
289 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
290 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
291 (u8 *)(&((struct ib_smp *)mad)->data));
292 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
293 (u8 *)(&((struct ib_smp *)mad)->data));
294 }
192 break; 295 break;
296
193 default: 297 default:
194 break; 298 break;
195 } 299 }
196} 300}
197 301
302static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
303 int block, u32 change_bitmap)
304{
305 int i, ix, slave, err;
306 int have_event = 0;
307
308 for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
309 if (slave == mlx4_master_func_num(dev->dev))
310 continue;
311 if (!mlx4_is_slave_active(dev->dev, slave))
312 continue;
313
314 have_event = 0;
315 for (i = 0; i < 32; i++) {
316 if (!(change_bitmap & (1 << i)))
317 continue;
318 for (ix = 0;
319 ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
320 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
321 [ix] == i + 32 * block) {
322 err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
323 pr_debug("propagate_pkey_ev: slave %d,"
324 " port %d, ix %d (%d)\n",
325 slave, port_num, ix, err);
326 have_event = 1;
327 break;
328 }
329 }
330 if (have_event)
331 break;
332 }
333 }
334}
335
198static void node_desc_override(struct ib_device *dev, 336static void node_desc_override(struct ib_device *dev,
199 struct ib_mad *mad) 337 struct ib_mad *mad)
200{ 338{
@@ -242,6 +380,268 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
242 } 380 }
243} 381}
244 382
383static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
384 struct ib_sa_mad *sa_mad)
385{
386 int ret = 0;
387
388 /* dispatch to different sa handlers */
389 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
390 case IB_SA_ATTR_MC_MEMBER_REC:
391 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
392 break;
393 default:
394 break;
395 }
396 return ret;
397}
398
399int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
400{
401 struct mlx4_ib_dev *dev = to_mdev(ibdev);
402 int i;
403
404 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
405 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
406 return i;
407 }
408 return -1;
409}
410
411
412static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
413 u8 *full_pk_ix, u8 *partial_pk_ix,
414 int *is_full_member)
415{
416 u16 search_pkey;
417 int fm;
418 int err = 0;
419 u16 pk;
420
421 err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
422 if (err)
423 return err;
424
425 fm = (search_pkey & 0x8000) ? 1 : 0;
426 if (fm) {
427 *full_pk_ix = ph_pkey_ix;
428 search_pkey &= 0x7FFF;
429 } else {
430 *partial_pk_ix = ph_pkey_ix;
431 search_pkey |= 0x8000;
432 }
433
434 if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
435 pk = 0xFFFF;
436
437 if (fm)
438 *partial_pk_ix = (pk & 0xFF);
439 else
440 *full_pk_ix = (pk & 0xFF);
441
442 *is_full_member = fm;
443 return err;
444}
445
446int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
447 enum ib_qp_type dest_qpt, struct ib_wc *wc,
448 struct ib_grh *grh, struct ib_mad *mad)
449{
450 struct ib_sge list;
451 struct ib_send_wr wr, *bad_wr;
452 struct mlx4_ib_demux_pv_ctx *tun_ctx;
453 struct mlx4_ib_demux_pv_qp *tun_qp;
454 struct mlx4_rcv_tunnel_mad *tun_mad;
455 struct ib_ah_attr attr;
456 struct ib_ah *ah;
457 struct ib_qp *src_qp = NULL;
458 unsigned tun_tx_ix = 0;
459 int dqpn;
460 int ret = 0;
461 int i;
462 int is_full_member = 0;
463 u16 tun_pkey_ix;
464 u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
465
466 if (dest_qpt > IB_QPT_GSI)
467 return -EINVAL;
468
469 tun_ctx = dev->sriov.demux[port-1].tun[slave];
470
471 /* check if proxy qp created */
472 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
473 return -EAGAIN;
474
475 /* QP0 forwarding only for Dom0 */
476 if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
477 return -EINVAL;
478
479 if (!dest_qpt)
480 tun_qp = &tun_ctx->qp[0];
481 else
482 tun_qp = &tun_ctx->qp[1];
483
484 /* compute pkey index for slave */
485 /* get physical pkey -- virtualized Dom0 pkey to phys*/
486 if (dest_qpt) {
487 ph_pkey_ix =
488 dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
489
490 /* now, translate this to the slave pkey index */
491 ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
492 &partial_pk_ix, &is_full_member);
493 if (ret)
494 return -EINVAL;
495
496 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
497 if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
498 (is_full_member &&
499 (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
500 break;
501 }
502 if (i == dev->dev->caps.pkey_table_len[port])
503 return -EINVAL;
504 tun_pkey_ix = i;
505 } else
506 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
507
508 dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
509
510 /* get tunnel tx data buf for slave */
511 src_qp = tun_qp->qp;
512
513 /* create ah. Just need an empty one with the port num for the post send.
514 * The driver will set the force loopback bit in post_send */
515 memset(&attr, 0, sizeof attr);
516 attr.port_num = port;
517 ah = ib_create_ah(tun_ctx->pd, &attr);
518 if (IS_ERR(ah))
519 return -ENOMEM;
520
521 /* allocate tunnel tx buf after pass failure returns */
522 spin_lock(&tun_qp->tx_lock);
523 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
524 (MLX4_NUM_TUNNEL_BUFS - 1))
525 ret = -EAGAIN;
526 else
527 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
528 spin_unlock(&tun_qp->tx_lock);
529 if (ret)
530 goto out;
531
532 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
533 if (tun_qp->tx_ring[tun_tx_ix].ah)
534 ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
535 tun_qp->tx_ring[tun_tx_ix].ah = ah;
536 ib_dma_sync_single_for_cpu(&dev->ib_dev,
537 tun_qp->tx_ring[tun_tx_ix].buf.map,
538 sizeof (struct mlx4_rcv_tunnel_mad),
539 DMA_TO_DEVICE);
540
541 /* copy over to tunnel buffer */
542 if (grh)
543 memcpy(&tun_mad->grh, grh, sizeof *grh);
544 memcpy(&tun_mad->mad, mad, sizeof *mad);
545
546 /* adjust tunnel data */
547 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
548 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
549 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
550 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
551 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
552
553 ib_dma_sync_single_for_device(&dev->ib_dev,
554 tun_qp->tx_ring[tun_tx_ix].buf.map,
555 sizeof (struct mlx4_rcv_tunnel_mad),
556 DMA_TO_DEVICE);
557
558 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
559 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
560 list.lkey = tun_ctx->mr->lkey;
561
562 wr.wr.ud.ah = ah;
563 wr.wr.ud.port_num = port;
564 wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
565 wr.wr.ud.remote_qpn = dqpn;
566 wr.next = NULL;
567 wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
568 wr.sg_list = &list;
569 wr.num_sge = 1;
570 wr.opcode = IB_WR_SEND;
571 wr.send_flags = IB_SEND_SIGNALED;
572
573 ret = ib_post_send(src_qp, &wr, &bad_wr);
574out:
575 if (ret)
576 ib_destroy_ah(ah);
577 return ret;
578}
579
580static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
581 struct ib_wc *wc, struct ib_grh *grh,
582 struct ib_mad *mad)
583{
584 struct mlx4_ib_dev *dev = to_mdev(ibdev);
585 int err;
586 int slave;
587 u8 *slave_id;
588
589 /* Initially assume that this mad is for us */
590 slave = mlx4_master_func_num(dev->dev);
591
592 /* See if the slave id is encoded in a response mad */
593 if (mad->mad_hdr.method & 0x80) {
594 slave_id = (u8 *) &mad->mad_hdr.tid;
595 slave = *slave_id;
596 if (slave != 255) /*255 indicates the dom0*/
597 *slave_id = 0; /* remap tid */
598 }
599
600 /* If a grh is present, we demux according to it */
601 if (wc->wc_flags & IB_WC_GRH) {
602 slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
603 if (slave < 0) {
604 mlx4_ib_warn(ibdev, "failed matching grh\n");
605 return -ENOENT;
606 }
607 }
608 /* Class-specific handling */
609 switch (mad->mad_hdr.mgmt_class) {
610 case IB_MGMT_CLASS_SUBN_ADM:
611 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
612 (struct ib_sa_mad *) mad))
613 return 0;
614 break;
615 case IB_MGMT_CLASS_CM:
616 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
617 return 0;
618 break;
619 case IB_MGMT_CLASS_DEVICE_MGMT:
620 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
621 return 0;
622 break;
623 default:
624 /* Drop unsupported classes for slaves in tunnel mode */
625 if (slave != mlx4_master_func_num(dev->dev)) {
626 pr_debug("dropping unsupported ingress mad from class:%d "
627 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
628 return 0;
629 }
630 }
631 /*make sure that no slave==255 was not handled yet.*/
632 if (slave >= dev->dev->caps.sqp_demux) {
633 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
634 slave, dev->dev->caps.sqp_demux);
635 return -ENOENT;
636 }
637
638 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
639 if (err)
640 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
641 slave, err);
642 return 0;
643}
644
245static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 645static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
246 struct ib_wc *in_wc, struct ib_grh *in_grh, 646 struct ib_wc *in_wc, struct ib_grh *in_grh,
247 struct ib_mad *in_mad, struct ib_mad *out_mad) 647 struct ib_mad *in_mad, struct ib_mad *out_mad)
@@ -306,8 +706,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
306 prev_lid = pattr.lid; 706 prev_lid = pattr.lid;
307 707
308 err = mlx4_MAD_IFC(to_mdev(ibdev), 708 err = mlx4_MAD_IFC(to_mdev(ibdev),
309 mad_flags & IB_MAD_IGNORE_MKEY, 709 (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
310 mad_flags & IB_MAD_IGNORE_BKEY, 710 (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
711 MLX4_MAD_IFC_NET_VIEW,
311 port_num, in_wc, in_grh, in_mad, out_mad); 712 port_num, in_wc, in_grh, in_mad, out_mad);
312 if (err) 713 if (err)
313 return IB_MAD_RESULT_FAILURE; 714 return IB_MAD_RESULT_FAILURE;
@@ -315,7 +716,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
315 if (!out_mad->mad_hdr.status) { 716 if (!out_mad->mad_hdr.status) {
316 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) 717 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
317 smp_snoop(ibdev, port_num, in_mad, prev_lid); 718 smp_snoop(ibdev, port_num, in_mad, prev_lid);
318 node_desc_override(ibdev, out_mad); 719 /* slaves get node desc from FW */
720 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
721 node_desc_override(ibdev, out_mad);
319 } 722 }
320 723
321 /* set return bit in status of directed route responses */ 724 /* set return bit in status of directed route responses */
@@ -398,6 +801,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
398static void send_handler(struct ib_mad_agent *agent, 801static void send_handler(struct ib_mad_agent *agent,
399 struct ib_mad_send_wc *mad_send_wc) 802 struct ib_mad_send_wc *mad_send_wc)
400{ 803{
804 if (mad_send_wc->send_buf->context[0])
805 ib_destroy_ah(mad_send_wc->send_buf->context[0]);
401 ib_free_send_mad(mad_send_wc->send_buf); 806 ib_free_send_mad(mad_send_wc->send_buf);
402} 807}
403 808
@@ -456,6 +861,90 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
456 } 861 }
457} 862}
458 863
864static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
865{
866 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
867
868 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
869 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
870 MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
871}
872
873static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
874{
875 /* re-configure the alias-guid and mcg's */
876 if (mlx4_is_master(dev->dev)) {
877 mlx4_ib_invalidate_all_guid_record(dev, port_num);
878
879 if (!dev->sriov.is_going_down) {
880 mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
881 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
882 MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
883 }
884 }
885 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
886}
887
888static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
889 struct mlx4_eqe *eqe)
890{
891 __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
892 GET_MASK_FROM_EQE(eqe));
893}
894
895static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
896 u32 guid_tbl_blk_num, u32 change_bitmap)
897{
898 struct ib_smp *in_mad = NULL;
899 struct ib_smp *out_mad = NULL;
900 u16 i;
901
902 if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
903 return;
904
905 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
906 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
907 if (!in_mad || !out_mad) {
908 mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
909 goto out;
910 }
911
912 guid_tbl_blk_num *= 4;
913
914 for (i = 0; i < 4; i++) {
915 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
916 continue;
917 memset(in_mad, 0, sizeof *in_mad);
918 memset(out_mad, 0, sizeof *out_mad);
919
920 in_mad->base_version = 1;
921 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
922 in_mad->class_version = 1;
923 in_mad->method = IB_MGMT_METHOD_GET;
924 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
925 in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
926
927 if (mlx4_MAD_IFC(dev,
928 MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
929 port_num, NULL, NULL, in_mad, out_mad)) {
930 mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
931 goto out;
932 }
933
934 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
935 port_num,
936 (u8 *)(&((struct ib_smp *)out_mad)->data));
937 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
938 port_num,
939 (u8 *)(&((struct ib_smp *)out_mad)->data));
940 }
941
942out:
943 kfree(in_mad);
944 kfree(out_mad);
945 return;
946}
947
459void handle_port_mgmt_change_event(struct work_struct *work) 948void handle_port_mgmt_change_event(struct work_struct *work)
460{ 949{
461 struct ib_event_work *ew = container_of(work, struct ib_event_work, work); 950 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
@@ -463,6 +952,8 @@ void handle_port_mgmt_change_event(struct work_struct *work)
463 struct mlx4_eqe *eqe = &(ew->ib_eqe); 952 struct mlx4_eqe *eqe = &(ew->ib_eqe);
464 u8 port = eqe->event.port_mgmt_change.port; 953 u8 port = eqe->event.port_mgmt_change.port;
465 u32 changed_attr; 954 u32 changed_attr;
955 u32 tbl_block;
956 u32 change_bitmap;
466 957
467 switch (eqe->subtype) { 958 switch (eqe->subtype) {
468 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: 959 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
@@ -478,24 +969,36 @@ void handle_port_mgmt_change_event(struct work_struct *work)
478 969
479 /* Check if it is a lid change event */ 970 /* Check if it is a lid change event */
480 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) 971 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
481 mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); 972 handle_lid_change_event(dev, port);
482 973
483 /* Generate GUID changed event */ 974 /* Generate GUID changed event */
484 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) 975 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
485 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); 976 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
977 /*if master, notify all slaves*/
978 if (mlx4_is_master(dev->dev))
979 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
980 MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
981 }
486 982
487 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) 983 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
488 mlx4_ib_dispatch_event(dev, port, 984 handle_client_rereg_event(dev, port);
489 IB_EVENT_CLIENT_REREGISTER);
490 break; 985 break;
491 986
492 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: 987 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
493 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); 988 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
989 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
990 propagate_pkey_ev(dev, port, eqe);
494 break; 991 break;
495 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: 992 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
496 /* paravirtualized master's guid is guid 0 -- does not change */ 993 /* paravirtualized master's guid is guid 0 -- does not change */
497 if (!mlx4_is_master(dev->dev)) 994 if (!mlx4_is_master(dev->dev))
498 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); 995 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
996 /*if master, notify relevant slaves*/
997 else if (!dev->sriov.is_going_down) {
998 tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
999 change_bitmap = GET_MASK_FROM_EQE(eqe);
1000 handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
1001 }
499 break; 1002 break;
500 default: 1003 default:
501 pr_warn("Unsupported subtype 0x%x for " 1004 pr_warn("Unsupported subtype 0x%x for "
@@ -516,3 +1019,1035 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 1019
517 ib_dispatch_event(&event); 1020 ib_dispatch_event(&event);
518} 1021}
1022
1023static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1024{
1025 unsigned long flags;
1026 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1027 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1028 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1029 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1030 queue_work(ctx->wq, &ctx->work);
1031 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1032}
1033
1034static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1035 struct mlx4_ib_demux_pv_qp *tun_qp,
1036 int index)
1037{
1038 struct ib_sge sg_list;
1039 struct ib_recv_wr recv_wr, *bad_recv_wr;
1040 int size;
1041
1042 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1043 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1044
1045 sg_list.addr = tun_qp->ring[index].map;
1046 sg_list.length = size;
1047 sg_list.lkey = ctx->mr->lkey;
1048
1049 recv_wr.next = NULL;
1050 recv_wr.sg_list = &sg_list;
1051 recv_wr.num_sge = 1;
1052 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1053 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
1054 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1055 size, DMA_FROM_DEVICE);
1056 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1057}
1058
1059static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1060 int slave, struct ib_sa_mad *sa_mad)
1061{
1062 int ret = 0;
1063
1064 /* dispatch to different sa handlers */
1065 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1066 case IB_SA_ATTR_MC_MEMBER_REC:
1067 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1068 break;
1069 default:
1070 break;
1071 }
1072 return ret;
1073}
1074
1075static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1076{
1077 int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
1078
1079 return (qpn >= proxy_start && qpn <= proxy_start + 1);
1080}
1081
1082
1083int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1084 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
1085 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
1086{
1087 struct ib_sge list;
1088 struct ib_send_wr wr, *bad_wr;
1089 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1090 struct mlx4_ib_demux_pv_qp *sqp;
1091 struct mlx4_mad_snd_buf *sqp_mad;
1092 struct ib_ah *ah;
1093 struct ib_qp *send_qp = NULL;
1094 unsigned wire_tx_ix = 0;
1095 int ret = 0;
1096 u16 wire_pkey_ix;
1097 int src_qpnum;
1098 u8 sgid_index;
1099
1100
1101 sqp_ctx = dev->sriov.sqps[port-1];
1102
1103 /* check if proxy qp created */
1104 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1105 return -EAGAIN;
1106
1107 /* QP0 forwarding only for Dom0 */
1108 if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
1109 return -EINVAL;
1110
1111 if (dest_qpt == IB_QPT_SMI) {
1112 src_qpnum = 0;
1113 sqp = &sqp_ctx->qp[0];
1114 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1115 } else {
1116 src_qpnum = 1;
1117 sqp = &sqp_ctx->qp[1];
1118 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1119 }
1120
1121 send_qp = sqp->qp;
1122
1123 /* create ah */
1124 sgid_index = attr->grh.sgid_index;
1125 attr->grh.sgid_index = 0;
1126 ah = ib_create_ah(sqp_ctx->pd, attr);
1127 if (IS_ERR(ah))
1128 return -ENOMEM;
1129 attr->grh.sgid_index = sgid_index;
1130 to_mah(ah)->av.ib.gid_index = sgid_index;
1131 /* get rid of force-loopback bit */
1132 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1133 spin_lock(&sqp->tx_lock);
1134 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1135 (MLX4_NUM_TUNNEL_BUFS - 1))
1136 ret = -EAGAIN;
1137 else
1138 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
1139 spin_unlock(&sqp->tx_lock);
1140 if (ret)
1141 goto out;
1142
1143 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1144 if (sqp->tx_ring[wire_tx_ix].ah)
1145 ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
1146 sqp->tx_ring[wire_tx_ix].ah = ah;
1147 ib_dma_sync_single_for_cpu(&dev->ib_dev,
1148 sqp->tx_ring[wire_tx_ix].buf.map,
1149 sizeof (struct mlx4_mad_snd_buf),
1150 DMA_TO_DEVICE);
1151
1152 memcpy(&sqp_mad->payload, mad, sizeof *mad);
1153
1154 ib_dma_sync_single_for_device(&dev->ib_dev,
1155 sqp->tx_ring[wire_tx_ix].buf.map,
1156 sizeof (struct mlx4_mad_snd_buf),
1157 DMA_TO_DEVICE);
1158
1159 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1160 list.length = sizeof (struct mlx4_mad_snd_buf);
1161 list.lkey = sqp_ctx->mr->lkey;
1162
1163 wr.wr.ud.ah = ah;
1164 wr.wr.ud.port_num = port;
1165 wr.wr.ud.pkey_index = wire_pkey_ix;
1166 wr.wr.ud.remote_qkey = qkey;
1167 wr.wr.ud.remote_qpn = remote_qpn;
1168 wr.next = NULL;
1169 wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1170 wr.sg_list = &list;
1171 wr.num_sge = 1;
1172 wr.opcode = IB_WR_SEND;
1173 wr.send_flags = IB_SEND_SIGNALED;
1174
1175 ret = ib_post_send(send_qp, &wr, &bad_wr);
1176out:
1177 if (ret)
1178 ib_destroy_ah(ah);
1179 return ret;
1180}
1181
1182static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1183{
1184 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1185 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1186 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1187 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1188 struct mlx4_ib_ah ah;
1189 struct ib_ah_attr ah_attr;
1190 u8 *slave_id;
1191 int slave;
1192
1193 /* Get slave that sent this packet */
1194 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1195 wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1196 (wc->src_qp & 0x1) != ctx->port - 1 ||
1197 wc->src_qp & 0x4) {
1198 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1199 return;
1200 }
1201 slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1202 if (slave != ctx->slave) {
1203 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1204 "belongs to another slave\n", wc->src_qp);
1205 return;
1206 }
1207 if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
1208 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1209 "non-master trying to send QP0 packets\n", wc->src_qp);
1210 return;
1211 }
1212
1213 /* Map transaction ID */
1214 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1215 sizeof (struct mlx4_tunnel_mad),
1216 DMA_FROM_DEVICE);
1217 switch (tunnel->mad.mad_hdr.method) {
1218 case IB_MGMT_METHOD_SET:
1219 case IB_MGMT_METHOD_GET:
1220 case IB_MGMT_METHOD_REPORT:
1221 case IB_SA_METHOD_GET_TABLE:
1222 case IB_SA_METHOD_DELETE:
1223 case IB_SA_METHOD_GET_MULTI:
1224 case IB_SA_METHOD_GET_TRACE_TBL:
1225 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1226 if (*slave_id) {
1227 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1228 "class:%d slave:%d\n", *slave_id,
1229 tunnel->mad.mad_hdr.mgmt_class, slave);
1230 return;
1231 } else
1232 *slave_id = slave;
1233 default:
1234 /* nothing */;
1235 }
1236
1237 /* Class-specific handling */
1238 switch (tunnel->mad.mad_hdr.mgmt_class) {
1239 case IB_MGMT_CLASS_SUBN_ADM:
1240 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1241 (struct ib_sa_mad *) &tunnel->mad))
1242 return;
1243 break;
1244 case IB_MGMT_CLASS_CM:
1245 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1246 (struct ib_mad *) &tunnel->mad))
1247 return;
1248 break;
1249 case IB_MGMT_CLASS_DEVICE_MGMT:
1250 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1251 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1252 return;
1253 break;
1254 default:
1255 /* Drop unsupported classes for slaves in tunnel mode */
1256 if (slave != mlx4_master_func_num(dev->dev)) {
1257 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1258 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1259 return;
1260 }
1261 }
1262
1263 /* We are using standard ib_core services to send the mad, so generate a
1264 * stadard address handle by decoding the tunnelled mlx4_ah fields */
1265 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1266 ah.ibah.device = ctx->ib_dev;
1267 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1268 if ((ah_attr.ah_flags & IB_AH_GRH) &&
1269 (ah_attr.grh.sgid_index != slave)) {
1270 mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1271 slave, ah_attr.grh.sgid_index);
1272 return;
1273 }
1274
1275 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1276 is_proxy_qp0(dev, wc->src_qp, slave) ?
1277 IB_QPT_SMI : IB_QPT_GSI,
1278 be16_to_cpu(tunnel->hdr.pkey_index),
1279 be32_to_cpu(tunnel->hdr.remote_qpn),
1280 be32_to_cpu(tunnel->hdr.qkey),
1281 &ah_attr, &tunnel->mad);
1282}
1283
1284static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1285 enum ib_qp_type qp_type, int is_tun)
1286{
1287 int i;
1288 struct mlx4_ib_demux_pv_qp *tun_qp;
1289 int rx_buf_size, tx_buf_size;
1290
1291 if (qp_type > IB_QPT_GSI)
1292 return -EINVAL;
1293
1294 tun_qp = &ctx->qp[qp_type];
1295
1296 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
1297 GFP_KERNEL);
1298 if (!tun_qp->ring)
1299 return -ENOMEM;
1300
1301 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
1302 sizeof (struct mlx4_ib_tun_tx_buf),
1303 GFP_KERNEL);
1304 if (!tun_qp->tx_ring) {
1305 kfree(tun_qp->ring);
1306 tun_qp->ring = NULL;
1307 return -ENOMEM;
1308 }
1309
1310 if (is_tun) {
1311 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1312 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1313 } else {
1314 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1315 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1316 }
1317
1318 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1319 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1320 if (!tun_qp->ring[i].addr)
1321 goto err;
1322 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1323 tun_qp->ring[i].addr,
1324 rx_buf_size,
1325 DMA_FROM_DEVICE);
1326 }
1327
1328 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1329 tun_qp->tx_ring[i].buf.addr =
1330 kmalloc(tx_buf_size, GFP_KERNEL);
1331 if (!tun_qp->tx_ring[i].buf.addr)
1332 goto tx_err;
1333 tun_qp->tx_ring[i].buf.map =
1334 ib_dma_map_single(ctx->ib_dev,
1335 tun_qp->tx_ring[i].buf.addr,
1336 tx_buf_size,
1337 DMA_TO_DEVICE);
1338 tun_qp->tx_ring[i].ah = NULL;
1339 }
1340 spin_lock_init(&tun_qp->tx_lock);
1341 tun_qp->tx_ix_head = 0;
1342 tun_qp->tx_ix_tail = 0;
1343 tun_qp->proxy_qpt = qp_type;
1344
1345 return 0;
1346
1347tx_err:
1348 while (i > 0) {
1349 --i;
1350 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1351 tx_buf_size, DMA_TO_DEVICE);
1352 kfree(tun_qp->tx_ring[i].buf.addr);
1353 }
1354 kfree(tun_qp->tx_ring);
1355 tun_qp->tx_ring = NULL;
1356 i = MLX4_NUM_TUNNEL_BUFS;
1357err:
1358 while (i > 0) {
1359 --i;
1360 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1361 rx_buf_size, DMA_FROM_DEVICE);
1362 kfree(tun_qp->ring[i].addr);
1363 }
1364 kfree(tun_qp->ring);
1365 tun_qp->ring = NULL;
1366 return -ENOMEM;
1367}
1368
1369static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1370 enum ib_qp_type qp_type, int is_tun)
1371{
1372 int i;
1373 struct mlx4_ib_demux_pv_qp *tun_qp;
1374 int rx_buf_size, tx_buf_size;
1375
1376 if (qp_type > IB_QPT_GSI)
1377 return;
1378
1379 tun_qp = &ctx->qp[qp_type];
1380 if (is_tun) {
1381 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1382 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1383 } else {
1384 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1385 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1386 }
1387
1388
1389 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1390 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1391 rx_buf_size, DMA_FROM_DEVICE);
1392 kfree(tun_qp->ring[i].addr);
1393 }
1394
1395 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1396 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1397 tx_buf_size, DMA_TO_DEVICE);
1398 kfree(tun_qp->tx_ring[i].buf.addr);
1399 if (tun_qp->tx_ring[i].ah)
1400 ib_destroy_ah(tun_qp->tx_ring[i].ah);
1401 }
1402 kfree(tun_qp->tx_ring);
1403 kfree(tun_qp->ring);
1404}
1405
1406static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1407{
1408 struct mlx4_ib_demux_pv_ctx *ctx;
1409 struct mlx4_ib_demux_pv_qp *tun_qp;
1410 struct ib_wc wc;
1411 int ret;
1412 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1413 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1414
1415 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1416 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1417 if (wc.status == IB_WC_SUCCESS) {
1418 switch (wc.opcode) {
1419 case IB_WC_RECV:
1420 mlx4_ib_multiplex_mad(ctx, &wc);
1421 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1422 wc.wr_id &
1423 (MLX4_NUM_TUNNEL_BUFS - 1));
1424 if (ret)
1425 pr_err("Failed reposting tunnel "
1426 "buf:%lld\n", wc.wr_id);
1427 break;
1428 case IB_WC_SEND:
1429 pr_debug("received tunnel send completion:"
1430 "wrid=0x%llx, status=0x%x\n",
1431 wc.wr_id, wc.status);
1432 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1433 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1434 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1435 = NULL;
1436 spin_lock(&tun_qp->tx_lock);
1437 tun_qp->tx_ix_tail++;
1438 spin_unlock(&tun_qp->tx_lock);
1439
1440 break;
1441 default:
1442 break;
1443 }
1444 } else {
1445 pr_debug("mlx4_ib: completion error in tunnel: %d."
1446 " status = %d, wrid = 0x%llx\n",
1447 ctx->slave, wc.status, wc.wr_id);
1448 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1449 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1450 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1451 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1452 = NULL;
1453 spin_lock(&tun_qp->tx_lock);
1454 tun_qp->tx_ix_tail++;
1455 spin_unlock(&tun_qp->tx_lock);
1456 }
1457 }
1458 }
1459}
1460
1461static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1462{
1463 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1464
1465 /* It's worse than that! He's dead, Jim! */
1466 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1467 event->event, sqp->port);
1468}
1469
1470static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1471 enum ib_qp_type qp_type, int create_tun)
1472{
1473 int i, ret;
1474 struct mlx4_ib_demux_pv_qp *tun_qp;
1475 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1476 struct ib_qp_attr attr;
1477 int qp_attr_mask_INIT;
1478
1479 if (qp_type > IB_QPT_GSI)
1480 return -EINVAL;
1481
1482 tun_qp = &ctx->qp[qp_type];
1483
1484 memset(&qp_init_attr, 0, sizeof qp_init_attr);
1485 qp_init_attr.init_attr.send_cq = ctx->cq;
1486 qp_init_attr.init_attr.recv_cq = ctx->cq;
1487 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1488 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
1489 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
1490 qp_init_attr.init_attr.cap.max_send_sge = 1;
1491 qp_init_attr.init_attr.cap.max_recv_sge = 1;
1492 if (create_tun) {
1493 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1494 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1495 qp_init_attr.port = ctx->port;
1496 qp_init_attr.slave = ctx->slave;
1497 qp_init_attr.proxy_qp_type = qp_type;
1498 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1499 IB_QP_QKEY | IB_QP_PORT;
1500 } else {
1501 qp_init_attr.init_attr.qp_type = qp_type;
1502 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1503 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1504 }
1505 qp_init_attr.init_attr.port_num = ctx->port;
1506 qp_init_attr.init_attr.qp_context = ctx;
1507 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1508 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1509 if (IS_ERR(tun_qp->qp)) {
1510 ret = PTR_ERR(tun_qp->qp);
1511 tun_qp->qp = NULL;
1512 pr_err("Couldn't create %s QP (%d)\n",
1513 create_tun ? "tunnel" : "special", ret);
1514 return ret;
1515 }
1516
1517 memset(&attr, 0, sizeof attr);
1518 attr.qp_state = IB_QPS_INIT;
1519 attr.pkey_index =
1520 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1521 attr.qkey = IB_QP1_QKEY;
1522 attr.port_num = ctx->port;
1523 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1524 if (ret) {
1525 pr_err("Couldn't change %s qp state to INIT (%d)\n",
1526 create_tun ? "tunnel" : "special", ret);
1527 goto err_qp;
1528 }
1529 attr.qp_state = IB_QPS_RTR;
1530 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1531 if (ret) {
1532 pr_err("Couldn't change %s qp state to RTR (%d)\n",
1533 create_tun ? "tunnel" : "special", ret);
1534 goto err_qp;
1535 }
1536 attr.qp_state = IB_QPS_RTS;
1537 attr.sq_psn = 0;
1538 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1539 if (ret) {
1540 pr_err("Couldn't change %s qp state to RTS (%d)\n",
1541 create_tun ? "tunnel" : "special", ret);
1542 goto err_qp;
1543 }
1544
1545 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1546 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1547 if (ret) {
1548 pr_err(" mlx4_ib_post_pv_buf error"
1549 " (err = %d, i = %d)\n", ret, i);
1550 goto err_qp;
1551 }
1552 }
1553 return 0;
1554
1555err_qp:
1556 ib_destroy_qp(tun_qp->qp);
1557 tun_qp->qp = NULL;
1558 return ret;
1559}
1560
1561/*
1562 * IB MAD completion callback for real SQPs
1563 */
1564static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1565{
1566 struct mlx4_ib_demux_pv_ctx *ctx;
1567 struct mlx4_ib_demux_pv_qp *sqp;
1568 struct ib_wc wc;
1569 struct ib_grh *grh;
1570 struct ib_mad *mad;
1571
1572 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1573 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1574
1575 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1576 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1577 if (wc.status == IB_WC_SUCCESS) {
1578 switch (wc.opcode) {
1579 case IB_WC_SEND:
1580 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1581 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1582 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1583 = NULL;
1584 spin_lock(&sqp->tx_lock);
1585 sqp->tx_ix_tail++;
1586 spin_unlock(&sqp->tx_lock);
1587 break;
1588 case IB_WC_RECV:
1589 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1590 (sqp->ring[wc.wr_id &
1591 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1592 grh = &(((struct mlx4_mad_rcv_buf *)
1593 (sqp->ring[wc.wr_id &
1594 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1595 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1596 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1597 (MLX4_NUM_TUNNEL_BUFS - 1)))
1598 pr_err("Failed reposting SQP "
1599 "buf:%lld\n", wc.wr_id);
1600 break;
1601 default:
1602 BUG_ON(1);
1603 break;
1604 }
1605 } else {
1606 pr_debug("mlx4_ib: completion error in tunnel: %d."
1607 " status = %d, wrid = 0x%llx\n",
1608 ctx->slave, wc.status, wc.wr_id);
1609 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1610 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1611 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1612 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1613 = NULL;
1614 spin_lock(&sqp->tx_lock);
1615 sqp->tx_ix_tail++;
1616 spin_unlock(&sqp->tx_lock);
1617 }
1618 }
1619 }
1620}
1621
1622static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1623 struct mlx4_ib_demux_pv_ctx **ret_ctx)
1624{
1625 struct mlx4_ib_demux_pv_ctx *ctx;
1626
1627 *ret_ctx = NULL;
1628 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1629 if (!ctx) {
1630 pr_err("failed allocating pv resource context "
1631 "for port %d, slave %d\n", port, slave);
1632 return -ENOMEM;
1633 }
1634
1635 ctx->ib_dev = &dev->ib_dev;
1636 ctx->port = port;
1637 ctx->slave = slave;
1638 *ret_ctx = ctx;
1639 return 0;
1640}
1641
1642static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1643{
1644 if (dev->sriov.demux[port - 1].tun[slave]) {
1645 kfree(dev->sriov.demux[port - 1].tun[slave]);
1646 dev->sriov.demux[port - 1].tun[slave] = NULL;
1647 }
1648}
1649
1650static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1651 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1652{
1653 int ret, cq_size;
1654
1655 if (ctx->state != DEMUX_PV_STATE_DOWN)
1656 return -EEXIST;
1657
1658 ctx->state = DEMUX_PV_STATE_STARTING;
1659 /* have QP0 only on port owner, and only if link layer is IB */
1660 if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
1661 rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
1662 ctx->has_smi = 1;
1663
1664 if (ctx->has_smi) {
1665 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1666 if (ret) {
1667 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
1668 goto err_out;
1669 }
1670 }
1671
1672 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
1673 if (ret) {
1674 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
1675 goto err_out_qp0;
1676 }
1677
1678 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
1679 if (ctx->has_smi)
1680 cq_size *= 2;
1681
1682 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
1683 NULL, ctx, cq_size, 0);
1684 if (IS_ERR(ctx->cq)) {
1685 ret = PTR_ERR(ctx->cq);
1686 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
1687 goto err_buf;
1688 }
1689
1690 ctx->pd = ib_alloc_pd(ctx->ib_dev);
1691 if (IS_ERR(ctx->pd)) {
1692 ret = PTR_ERR(ctx->pd);
1693 pr_err("Couldn't create tunnel PD (%d)\n", ret);
1694 goto err_cq;
1695 }
1696
1697 ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
1698 if (IS_ERR(ctx->mr)) {
1699 ret = PTR_ERR(ctx->mr);
1700 pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
1701 goto err_pd;
1702 }
1703
1704 if (ctx->has_smi) {
1705 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
1706 if (ret) {
1707 pr_err("Couldn't create %s QP0 (%d)\n",
1708 create_tun ? "tunnel for" : "", ret);
1709 goto err_mr;
1710 }
1711 }
1712
1713 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
1714 if (ret) {
1715 pr_err("Couldn't create %s QP1 (%d)\n",
1716 create_tun ? "tunnel for" : "", ret);
1717 goto err_qp0;
1718 }
1719
1720 if (create_tun)
1721 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
1722 else
1723 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
1724
1725 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
1726
1727 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1728 if (ret) {
1729 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
1730 goto err_wq;
1731 }
1732 ctx->state = DEMUX_PV_STATE_ACTIVE;
1733 return 0;
1734
1735err_wq:
1736 ctx->wq = NULL;
1737 ib_destroy_qp(ctx->qp[1].qp);
1738 ctx->qp[1].qp = NULL;
1739
1740
1741err_qp0:
1742 if (ctx->has_smi)
1743 ib_destroy_qp(ctx->qp[0].qp);
1744 ctx->qp[0].qp = NULL;
1745
1746err_mr:
1747 ib_dereg_mr(ctx->mr);
1748 ctx->mr = NULL;
1749
1750err_pd:
1751 ib_dealloc_pd(ctx->pd);
1752 ctx->pd = NULL;
1753
1754err_cq:
1755 ib_destroy_cq(ctx->cq);
1756 ctx->cq = NULL;
1757
1758err_buf:
1759 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
1760
1761err_out_qp0:
1762 if (ctx->has_smi)
1763 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
1764err_out:
1765 ctx->state = DEMUX_PV_STATE_DOWN;
1766 return ret;
1767}
1768
1769static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
1770 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
1771{
1772 if (!ctx)
1773 return;
1774 if (ctx->state > DEMUX_PV_STATE_DOWN) {
1775 ctx->state = DEMUX_PV_STATE_DOWNING;
1776 if (flush)
1777 flush_workqueue(ctx->wq);
1778 if (ctx->has_smi) {
1779 ib_destroy_qp(ctx->qp[0].qp);
1780 ctx->qp[0].qp = NULL;
1781 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
1782 }
1783 ib_destroy_qp(ctx->qp[1].qp);
1784 ctx->qp[1].qp = NULL;
1785 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
1786 ib_dereg_mr(ctx->mr);
1787 ctx->mr = NULL;
1788 ib_dealloc_pd(ctx->pd);
1789 ctx->pd = NULL;
1790 ib_destroy_cq(ctx->cq);
1791 ctx->cq = NULL;
1792 ctx->state = DEMUX_PV_STATE_DOWN;
1793 }
1794}
1795
1796static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
1797 int port, int do_init)
1798{
1799 int ret = 0;
1800
1801 if (!do_init) {
1802 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
1803 /* for master, destroy real sqp resources */
1804 if (slave == mlx4_master_func_num(dev->dev))
1805 destroy_pv_resources(dev, slave, port,
1806 dev->sriov.sqps[port - 1], 1);
1807 /* destroy the tunnel qp resources */
1808 destroy_pv_resources(dev, slave, port,
1809 dev->sriov.demux[port - 1].tun[slave], 1);
1810 return 0;
1811 }
1812
1813 /* create the tunnel qp resources */
1814 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1815 dev->sriov.demux[port - 1].tun[slave]);
1816
1817 /* for master, create the real sqp resources */
1818 if (!ret && slave == mlx4_master_func_num(dev->dev))
1819 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1820 dev->sriov.sqps[port - 1]);
1821 return ret;
1822}
1823
1824void mlx4_ib_tunnels_update_work(struct work_struct *work)
1825{
1826 struct mlx4_ib_demux_work *dmxw;
1827
1828 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1829 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1830 dmxw->do_init);
1831 kfree(dmxw);
1832 return;
1833}
1834
1835static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1836 struct mlx4_ib_demux_ctx *ctx,
1837 int port)
1838{
1839 char name[12];
1840 int ret = 0;
1841 int i;
1842
1843 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1844 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1845 if (!ctx->tun)
1846 return -ENOMEM;
1847
1848 ctx->dev = dev;
1849 ctx->port = port;
1850 ctx->ib_dev = &dev->ib_dev;
1851
1852 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1853 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1854 if (ret) {
1855 ret = -ENOMEM;
1856 goto err_mcg;
1857 }
1858 }
1859
1860 ret = mlx4_ib_mcg_port_init(ctx);
1861 if (ret) {
1862 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
1863 goto err_mcg;
1864 }
1865
1866 snprintf(name, sizeof name, "mlx4_ibt%d", port);
1867 ctx->wq = create_singlethread_workqueue(name);
1868 if (!ctx->wq) {
1869 pr_err("Failed to create tunnelling WQ for port %d\n", port);
1870 ret = -ENOMEM;
1871 goto err_wq;
1872 }
1873
1874 snprintf(name, sizeof name, "mlx4_ibud%d", port);
1875 ctx->ud_wq = create_singlethread_workqueue(name);
1876 if (!ctx->ud_wq) {
1877 pr_err("Failed to create up/down WQ for port %d\n", port);
1878 ret = -ENOMEM;
1879 goto err_udwq;
1880 }
1881
1882 return 0;
1883
1884err_udwq:
1885 destroy_workqueue(ctx->wq);
1886 ctx->wq = NULL;
1887
1888err_wq:
1889 mlx4_ib_mcg_port_cleanup(ctx, 1);
1890err_mcg:
1891 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1892 free_pv_object(dev, i, port);
1893 kfree(ctx->tun);
1894 ctx->tun = NULL;
1895 return ret;
1896}
1897
1898static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1899{
1900 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1901 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1902 flush_workqueue(sqp_ctx->wq);
1903 if (sqp_ctx->has_smi) {
1904 ib_destroy_qp(sqp_ctx->qp[0].qp);
1905 sqp_ctx->qp[0].qp = NULL;
1906 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1907 }
1908 ib_destroy_qp(sqp_ctx->qp[1].qp);
1909 sqp_ctx->qp[1].qp = NULL;
1910 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1911 ib_dereg_mr(sqp_ctx->mr);
1912 sqp_ctx->mr = NULL;
1913 ib_dealloc_pd(sqp_ctx->pd);
1914 sqp_ctx->pd = NULL;
1915 ib_destroy_cq(sqp_ctx->cq);
1916 sqp_ctx->cq = NULL;
1917 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1918 }
1919}
1920
1921static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1922{
1923 int i;
1924 if (ctx) {
1925 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1926 mlx4_ib_mcg_port_cleanup(ctx, 1);
1927 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1928 if (!ctx->tun[i])
1929 continue;
1930 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1931 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1932 }
1933 flush_workqueue(ctx->wq);
1934 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1935 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1936 free_pv_object(dev, i, ctx->port);
1937 }
1938 kfree(ctx->tun);
1939 destroy_workqueue(ctx->ud_wq);
1940 destroy_workqueue(ctx->wq);
1941 }
1942}
1943
1944static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1945{
1946 int i;
1947
1948 if (!mlx4_is_master(dev->dev))
1949 return;
1950 /* initialize or tear down tunnel QPs for the master */
1951 for (i = 0; i < dev->dev->caps.num_ports; i++)
1952 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1953 return;
1954}
1955
1956int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
1957{
1958 int i = 0;
1959 int err;
1960
1961 if (!mlx4_is_mfunc(dev->dev))
1962 return 0;
1963
1964 dev->sriov.is_going_down = 0;
1965 spin_lock_init(&dev->sriov.going_down_lock);
1966 mlx4_ib_cm_paravirt_init(dev);
1967
1968 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1969
1970 if (mlx4_is_slave(dev->dev)) {
1971 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1972 return 0;
1973 }
1974
1975 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1976 if (i == mlx4_master_func_num(dev->dev))
1977 mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
1978 else
1979 mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
1980 }
1981
1982 err = mlx4_ib_init_alias_guid_service(dev);
1983 if (err) {
1984 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
1985 goto paravirt_err;
1986 }
1987 err = mlx4_ib_device_register_sysfs(dev);
1988 if (err) {
1989 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
1990 goto sysfs_err;
1991 }
1992
1993 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1994 dev->dev->caps.sqp_demux);
1995 for (i = 0; i < dev->num_ports; i++) {
1996 union ib_gid gid;
1997 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
1998 if (err)
1999 goto demux_err;
2000 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
2001 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
2002 &dev->sriov.sqps[i]);
2003 if (err)
2004 goto demux_err;
2005 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2006 if (err)
2007 goto demux_err;
2008 }
2009 mlx4_ib_master_tunnels(dev, 1);
2010 return 0;
2011
2012demux_err:
2013 while (i > 0) {
2014 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2015 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2016 --i;
2017 }
2018 mlx4_ib_device_unregister_sysfs(dev);
2019
2020sysfs_err:
2021 mlx4_ib_destroy_alias_guid_service(dev);
2022
2023paravirt_err:
2024 mlx4_ib_cm_paravirt_clean(dev, -1);
2025
2026 return err;
2027}
2028
2029void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
2030{
2031 int i;
2032 unsigned long flags;
2033
2034 if (!mlx4_is_mfunc(dev->dev))
2035 return;
2036
2037 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2038 dev->sriov.is_going_down = 1;
2039 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2040 if (mlx4_is_master(dev->dev)) {
2041 for (i = 0; i < dev->num_ports; i++) {
2042 flush_workqueue(dev->sriov.demux[i].ud_wq);
2043 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2044 kfree(dev->sriov.sqps[i]);
2045 dev->sriov.sqps[i] = NULL;
2046 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2047 }
2048
2049 mlx4_ib_cm_paravirt_clean(dev, -1);
2050 mlx4_ib_destroy_alias_guid_service(dev);
2051 mlx4_ib_device_unregister_sysfs(dev);
2052 }
2053}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index cc05579ebce..718ec6b2bad 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -59,6 +59,10 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
59MODULE_LICENSE("Dual BSD/GPL"); 59MODULE_LICENSE("Dual BSD/GPL");
60MODULE_VERSION(DRV_VERSION); 60MODULE_VERSION(DRV_VERSION);
61 61
62int mlx4_ib_sm_guid_assign = 1;
63module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
64MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
65
62static const char mlx4_ib_version[] = 66static const char mlx4_ib_version[] =
63 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 67 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
64 DRV_VERSION " (" DRV_RELDATE ")\n"; 68 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -70,6 +74,8 @@ struct update_gid_work {
70 int port; 74 int port;
71}; 75};
72 76
77static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
78
73static struct workqueue_struct *wq; 79static struct workqueue_struct *wq;
74 80
75static void init_query_mad(struct ib_smp *mad) 81static void init_query_mad(struct ib_smp *mad)
@@ -98,7 +104,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
98 init_query_mad(in_mad); 104 init_query_mad(in_mad);
99 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 105 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
100 106
101 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); 107 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
108 1, NULL, NULL, in_mad, out_mad);
102 if (err) 109 if (err)
103 goto out; 110 goto out;
104 111
@@ -133,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
133 140
134 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 141 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
135 0xffffff; 142 0xffffff;
136 props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); 143 props->vendor_part_id = dev->dev->pdev->device;
137 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); 144 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
138 memcpy(&props->sys_image_guid, out_mad->data + 4, 8); 145 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
139 146
@@ -182,11 +189,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
182} 189}
183 190
184static int ib_link_query_port(struct ib_device *ibdev, u8 port, 191static int ib_link_query_port(struct ib_device *ibdev, u8 port,
185 struct ib_port_attr *props) 192 struct ib_port_attr *props, int netw_view)
186{ 193{
187 struct ib_smp *in_mad = NULL; 194 struct ib_smp *in_mad = NULL;
188 struct ib_smp *out_mad = NULL; 195 struct ib_smp *out_mad = NULL;
189 int ext_active_speed; 196 int ext_active_speed;
197 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
190 int err = -ENOMEM; 198 int err = -ENOMEM;
191 199
192 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 200 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -198,7 +206,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
198 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 206 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
199 in_mad->attr_mod = cpu_to_be32(port); 207 in_mad->attr_mod = cpu_to_be32(port);
200 208
201 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, 209 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
210 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
211
212 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
202 in_mad, out_mad); 213 in_mad, out_mad);
203 if (err) 214 if (err)
204 goto out; 215 goto out;
@@ -211,7 +222,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
211 props->state = out_mad->data[32] & 0xf; 222 props->state = out_mad->data[32] & 0xf;
212 props->phys_state = out_mad->data[33] >> 4; 223 props->phys_state = out_mad->data[33] >> 4;
213 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); 224 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
214 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; 225 if (netw_view)
226 props->gid_tbl_len = out_mad->data[50];
227 else
228 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
215 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; 229 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
216 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; 230 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
217 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); 231 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -244,7 +258,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
244 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; 258 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
245 in_mad->attr_mod = cpu_to_be32(port); 259 in_mad->attr_mod = cpu_to_be32(port);
246 260
247 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, 261 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
248 NULL, NULL, in_mad, out_mad); 262 NULL, NULL, in_mad, out_mad);
249 if (err) 263 if (err)
250 goto out; 264 goto out;
@@ -270,7 +284,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
270} 284}
271 285
272static int eth_link_query_port(struct ib_device *ibdev, u8 port, 286static int eth_link_query_port(struct ib_device *ibdev, u8 port,
273 struct ib_port_attr *props) 287 struct ib_port_attr *props, int netw_view)
274{ 288{
275 289
276 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 290 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -320,26 +334,36 @@ out:
320 return err; 334 return err;
321} 335}
322 336
323static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 337int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
324 struct ib_port_attr *props) 338 struct ib_port_attr *props, int netw_view)
325{ 339{
326 int err; 340 int err;
327 341
328 memset(props, 0, sizeof *props); 342 memset(props, 0, sizeof *props);
329 343
330 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? 344 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
331 ib_link_query_port(ibdev, port, props) : 345 ib_link_query_port(ibdev, port, props, netw_view) :
332 eth_link_query_port(ibdev, port, props); 346 eth_link_query_port(ibdev, port, props, netw_view);
333 347
334 return err; 348 return err;
335} 349}
336 350
337static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 351static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
338 union ib_gid *gid) 352 struct ib_port_attr *props)
353{
354 /* returns host view */
355 return __mlx4_ib_query_port(ibdev, port, props, 0);
356}
357
358int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
359 union ib_gid *gid, int netw_view)
339{ 360{
340 struct ib_smp *in_mad = NULL; 361 struct ib_smp *in_mad = NULL;
341 struct ib_smp *out_mad = NULL; 362 struct ib_smp *out_mad = NULL;
342 int err = -ENOMEM; 363 int err = -ENOMEM;
364 struct mlx4_ib_dev *dev = to_mdev(ibdev);
365 int clear = 0;
366 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
343 367
344 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 368 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
345 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 369 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -350,23 +374,38 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
350 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 374 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
351 in_mad->attr_mod = cpu_to_be32(port); 375 in_mad->attr_mod = cpu_to_be32(port);
352 376
353 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 377 if (mlx4_is_mfunc(dev->dev) && netw_view)
378 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
379
380 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
354 if (err) 381 if (err)
355 goto out; 382 goto out;
356 383
357 memcpy(gid->raw, out_mad->data + 8, 8); 384 memcpy(gid->raw, out_mad->data + 8, 8);
358 385
386 if (mlx4_is_mfunc(dev->dev) && !netw_view) {
387 if (index) {
388 /* For any index > 0, return the null guid */
389 err = 0;
390 clear = 1;
391 goto out;
392 }
393 }
394
359 init_query_mad(in_mad); 395 init_query_mad(in_mad);
360 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; 396 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
361 in_mad->attr_mod = cpu_to_be32(index / 8); 397 in_mad->attr_mod = cpu_to_be32(index / 8);
362 398
363 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 399 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
400 NULL, NULL, in_mad, out_mad);
364 if (err) 401 if (err)
365 goto out; 402 goto out;
366 403
367 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); 404 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
368 405
369out: 406out:
407 if (clear)
408 memset(gid->raw + 8, 0, 8);
370 kfree(in_mad); 409 kfree(in_mad);
371 kfree(out_mad); 410 kfree(out_mad);
372 return err; 411 return err;
@@ -386,16 +425,17 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
386 union ib_gid *gid) 425 union ib_gid *gid)
387{ 426{
388 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) 427 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
389 return __mlx4_ib_query_gid(ibdev, port, index, gid); 428 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
390 else 429 else
391 return iboe_query_gid(ibdev, port, index, gid); 430 return iboe_query_gid(ibdev, port, index, gid);
392} 431}
393 432
394static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 433int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
395 u16 *pkey) 434 u16 *pkey, int netw_view)
396{ 435{
397 struct ib_smp *in_mad = NULL; 436 struct ib_smp *in_mad = NULL;
398 struct ib_smp *out_mad = NULL; 437 struct ib_smp *out_mad = NULL;
438 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
399 int err = -ENOMEM; 439 int err = -ENOMEM;
400 440
401 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 441 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -407,7 +447,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
407 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; 447 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
408 in_mad->attr_mod = cpu_to_be32(index / 32); 448 in_mad->attr_mod = cpu_to_be32(index / 32);
409 449
410 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 450 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
451 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
452
453 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
454 in_mad, out_mad);
411 if (err) 455 if (err)
412 goto out; 456 goto out;
413 457
@@ -419,6 +463,11 @@ out:
419 return err; 463 return err;
420} 464}
421 465
466static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
467{
468 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
469}
470
422static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, 471static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
423 struct ib_device_modify *props) 472 struct ib_device_modify *props)
424{ 473{
@@ -431,6 +480,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
431 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 480 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
432 return 0; 481 return 0;
433 482
483 if (mlx4_is_slave(to_mdev(ibdev)->dev))
484 return -EOPNOTSUPP;
485
434 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); 486 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
435 memcpy(ibdev->node_desc, props->node_desc, 64); 487 memcpy(ibdev->node_desc, props->node_desc, 64);
436 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); 488 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
@@ -446,7 +498,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
446 memset(mailbox->buf, 0, 256); 498 memset(mailbox->buf, 0, 256);
447 memcpy(mailbox->buf, props->node_desc, 64); 499 memcpy(mailbox->buf, props->node_desc, 64);
448 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, 500 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
449 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 501 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
450 502
451 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); 503 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
452 504
@@ -849,6 +901,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
849{ 901{
850 struct ib_smp *in_mad = NULL; 902 struct ib_smp *in_mad = NULL;
851 struct ib_smp *out_mad = NULL; 903 struct ib_smp *out_mad = NULL;
904 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
852 int err = -ENOMEM; 905 int err = -ENOMEM;
853 906
854 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 907 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -858,8 +911,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
858 911
859 init_query_mad(in_mad); 912 init_query_mad(in_mad);
860 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; 913 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
914 if (mlx4_is_master(dev->dev))
915 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
861 916
862 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); 917 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
863 if (err) 918 if (err)
864 goto out; 919 goto out;
865 920
@@ -867,10 +922,11 @@ static int init_node_data(struct mlx4_ib_dev *dev)
867 922
868 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 923 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
869 924
870 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); 925 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
871 if (err) 926 if (err)
872 goto out; 927 goto out;
873 928
929 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
874 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); 930 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
875 931
876out: 932out:
@@ -959,7 +1015,7 @@ static void update_gids_task(struct work_struct *work)
959 1015
960 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1016 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
961 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 1017 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
962 MLX4_CMD_NATIVE); 1018 MLX4_CMD_WRAPPED);
963 if (err) 1019 if (err)
964 pr_warn("set port command failed\n"); 1020 pr_warn("set port command failed\n");
965 else { 1021 else {
@@ -1121,6 +1177,38 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
1121 return NOTIFY_DONE; 1177 return NOTIFY_DONE;
1122} 1178}
1123 1179
1180static void init_pkeys(struct mlx4_ib_dev *ibdev)
1181{
1182 int port;
1183 int slave;
1184 int i;
1185
1186 if (mlx4_is_master(ibdev->dev)) {
1187 for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1188 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1189 for (i = 0;
1190 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1191 ++i) {
1192 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1193 /* master has the identity virt2phys pkey mapping */
1194 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1195 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1196 mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1197 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1198 }
1199 }
1200 }
1201 /* initialize pkey cache */
1202 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1203 for (i = 0;
1204 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1205 ++i)
1206 ibdev->pkeys.phys_pkey_cache[port-1][i] =
1207 (i) ? 0 : 0xFFFF;
1208 }
1209 }
1210}
1211
1124static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) 1212static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1125{ 1213{
1126 char name[32]; 1214 char name[32];
@@ -1207,11 +1295,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1207 1295
1208 pr_info_once("%s", mlx4_ib_version); 1296 pr_info_once("%s", mlx4_ib_version);
1209 1297
1210 if (mlx4_is_mfunc(dev)) { 1298 mlx4_foreach_non_ib_transport_port(i, dev)
1211 pr_warn("IB not yet supported in SRIOV\n"); 1299 num_ports++;
1300
1301 if (mlx4_is_mfunc(dev) && num_ports) {
1302 dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
1212 return NULL; 1303 return NULL;
1213 } 1304 }
1214 1305
1306 num_ports = 0;
1215 mlx4_foreach_ib_transport_port(i, dev) 1307 mlx4_foreach_ib_transport_port(i, dev)
1216 num_ports++; 1308 num_ports++;
1217 1309
@@ -1318,10 +1410,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1318 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 1410 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
1319 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 1411 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
1320 1412
1321 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; 1413 if (!mlx4_is_slave(ibdev->dev)) {
1322 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; 1414 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
1323 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; 1415 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
1324 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; 1416 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
1417 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
1418 }
1325 1419
1326 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { 1420 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
1327 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; 1421 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
@@ -1357,11 +1451,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1357 if (mlx4_ib_mad_init(ibdev)) 1451 if (mlx4_ib_mad_init(ibdev))
1358 goto err_reg; 1452 goto err_reg;
1359 1453
1454 if (mlx4_ib_init_sriov(ibdev))
1455 goto err_mad;
1456
1360 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 1457 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1361 iboe->nb.notifier_call = mlx4_ib_netdev_event; 1458 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1362 err = register_netdevice_notifier(&iboe->nb); 1459 err = register_netdevice_notifier(&iboe->nb);
1363 if (err) 1460 if (err)
1364 goto err_reg; 1461 goto err_sriov;
1365 } 1462 }
1366 1463
1367 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 1464 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1372,6 +1469,18 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1372 1469
1373 ibdev->ib_active = true; 1470 ibdev->ib_active = true;
1374 1471
1472 if (mlx4_is_mfunc(ibdev->dev))
1473 init_pkeys(ibdev);
1474
1475 /* create paravirt contexts for any VFs which are active */
1476 if (mlx4_is_master(ibdev->dev)) {
1477 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
1478 if (j == mlx4_master_func_num(ibdev->dev))
1479 continue;
1480 if (mlx4_is_slave_active(ibdev->dev, j))
1481 do_slave_init(ibdev, j, 1);
1482 }
1483 }
1375 return ibdev; 1484 return ibdev;
1376 1485
1377err_notif: 1486err_notif:
@@ -1379,6 +1488,12 @@ err_notif:
1379 pr_warn("failure unregistering notifier\n"); 1488 pr_warn("failure unregistering notifier\n");
1380 flush_workqueue(wq); 1489 flush_workqueue(wq);
1381 1490
1491err_sriov:
1492 mlx4_ib_close_sriov(ibdev);
1493
1494err_mad:
1495 mlx4_ib_mad_cleanup(ibdev);
1496
1382err_reg: 1497err_reg:
1383 ib_unregister_device(&ibdev->ib_dev); 1498 ib_unregister_device(&ibdev->ib_dev);
1384 1499
@@ -1407,6 +1522,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1407 struct mlx4_ib_dev *ibdev = ibdev_ptr; 1522 struct mlx4_ib_dev *ibdev = ibdev_ptr;
1408 int p; 1523 int p;
1409 1524
1525 mlx4_ib_close_sriov(ibdev);
1410 mlx4_ib_mad_cleanup(ibdev); 1526 mlx4_ib_mad_cleanup(ibdev);
1411 ib_unregister_device(&ibdev->ib_dev); 1527 ib_unregister_device(&ibdev->ib_dev);
1412 if (ibdev->iboe.nb.notifier_call) { 1528 if (ibdev->iboe.nb.notifier_call) {
@@ -1428,6 +1544,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1428 ib_dealloc_device(&ibdev->ib_dev); 1544 ib_dealloc_device(&ibdev->ib_dev);
1429} 1545}
1430 1546
1547static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1548{
1549 struct mlx4_ib_demux_work **dm = NULL;
1550 struct mlx4_dev *dev = ibdev->dev;
1551 int i;
1552 unsigned long flags;
1553
1554 if (!mlx4_is_master(dev))
1555 return;
1556
1557 dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1558 if (!dm) {
1559 pr_err("failed to allocate memory for tunneling qp update\n");
1560 goto out;
1561 }
1562
1563 for (i = 0; i < dev->caps.num_ports; i++) {
1564 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1565 if (!dm[i]) {
1566 pr_err("failed to allocate memory for tunneling qp update work struct\n");
1567 for (i = 0; i < dev->caps.num_ports; i++) {
1568 if (dm[i])
1569 kfree(dm[i]);
1570 }
1571 goto out;
1572 }
1573 }
1574 /* initialize or tear down tunnel QPs for the slave */
1575 for (i = 0; i < dev->caps.num_ports; i++) {
1576 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1577 dm[i]->port = i + 1;
1578 dm[i]->slave = slave;
1579 dm[i]->do_init = do_init;
1580 dm[i]->dev = ibdev;
1581 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1582 if (!ibdev->sriov.is_going_down)
1583 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1584 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1585 }
1586out:
1587 if (dm)
1588 kfree(dm);
1589 return;
1590}
1591
1431static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 1592static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1432 enum mlx4_dev_event event, unsigned long param) 1593 enum mlx4_dev_event event, unsigned long param)
1433{ 1594{
@@ -1435,22 +1596,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1435 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 1596 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1436 struct mlx4_eqe *eqe = NULL; 1597 struct mlx4_eqe *eqe = NULL;
1437 struct ib_event_work *ew; 1598 struct ib_event_work *ew;
1438 int port = 0; 1599 int p = 0;
1439 1600
1440 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 1601 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
1441 eqe = (struct mlx4_eqe *)param; 1602 eqe = (struct mlx4_eqe *)param;
1442 else 1603 else
1443 port = (u8)param; 1604 p = (int) param;
1444
1445 if (port > ibdev->num_ports)
1446 return;
1447 1605
1448 switch (event) { 1606 switch (event) {
1449 case MLX4_DEV_EVENT_PORT_UP: 1607 case MLX4_DEV_EVENT_PORT_UP:
1608 if (p > ibdev->num_ports)
1609 return;
1610 if (mlx4_is_master(dev) &&
1611 rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
1612 IB_LINK_LAYER_INFINIBAND) {
1613 mlx4_ib_invalidate_all_guid_record(ibdev, p);
1614 }
1450 ibev.event = IB_EVENT_PORT_ACTIVE; 1615 ibev.event = IB_EVENT_PORT_ACTIVE;
1451 break; 1616 break;
1452 1617
1453 case MLX4_DEV_EVENT_PORT_DOWN: 1618 case MLX4_DEV_EVENT_PORT_DOWN:
1619 if (p > ibdev->num_ports)
1620 return;
1454 ibev.event = IB_EVENT_PORT_ERR; 1621 ibev.event = IB_EVENT_PORT_ERR;
1455 break; 1622 break;
1456 1623
@@ -1469,7 +1636,21 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1469 INIT_WORK(&ew->work, handle_port_mgmt_change_event); 1636 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
1470 memcpy(&ew->ib_eqe, eqe, sizeof *eqe); 1637 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
1471 ew->ib_dev = ibdev; 1638 ew->ib_dev = ibdev;
1472 handle_port_mgmt_change_event(&ew->work); 1639 /* need to queue only for port owner, which uses GEN_EQE */
1640 if (mlx4_is_master(dev))
1641 queue_work(wq, &ew->work);
1642 else
1643 handle_port_mgmt_change_event(&ew->work);
1644 return;
1645
1646 case MLX4_DEV_EVENT_SLAVE_INIT:
1647 /* here, p is the slave id */
1648 do_slave_init(ibdev, p, 1);
1649 return;
1650
1651 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1652 /* here, p is the slave id */
1653 do_slave_init(ibdev, p, 0);
1473 return; 1654 return;
1474 1655
1475 default: 1656 default:
@@ -1477,7 +1658,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1477 } 1658 }
1478 1659
1479 ibev.device = ibdev_ptr; 1660 ibev.device = ibdev_ptr;
1480 ibev.element.port_num = port; 1661 ibev.element.port_num = (u8) p;
1481 1662
1482 ib_dispatch_event(&ibev); 1663 ib_dispatch_event(&ibev);
1483} 1664}
@@ -1497,18 +1678,28 @@ static int __init mlx4_ib_init(void)
1497 if (!wq) 1678 if (!wq)
1498 return -ENOMEM; 1679 return -ENOMEM;
1499 1680
1681 err = mlx4_ib_mcg_init();
1682 if (err)
1683 goto clean_wq;
1684
1500 err = mlx4_register_interface(&mlx4_ib_interface); 1685 err = mlx4_register_interface(&mlx4_ib_interface);
1501 if (err) { 1686 if (err)
1502 destroy_workqueue(wq); 1687 goto clean_mcg;
1503 return err;
1504 }
1505 1688
1506 return 0; 1689 return 0;
1690
1691clean_mcg:
1692 mlx4_ib_mcg_destroy();
1693
1694clean_wq:
1695 destroy_workqueue(wq);
1696 return err;
1507} 1697}
1508 1698
1509static void __exit mlx4_ib_cleanup(void) 1699static void __exit mlx4_ib_cleanup(void)
1510{ 1700{
1511 mlx4_unregister_interface(&mlx4_ib_interface); 1701 mlx4_unregister_interface(&mlx4_ib_interface);
1702 mlx4_ib_mcg_destroy();
1512 destroy_workqueue(wq); 1703 destroy_workqueue(wq);
1513} 1704}
1514 1705
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
new file mode 100644
index 00000000000..3c3b54c3fdd
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -0,0 +1,1254 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h>
35#include <rdma/ib_cache.h>
36#include <rdma/ib_sa.h>
37
38#include <linux/mlx4/cmd.h>
39#include <linux/rbtree.h>
40#include <linux/delay.h>
41
42#include "mlx4_ib.h"
43
44#define MAX_VFS 80
45#define MAX_PEND_REQS_PER_FUNC 4
46#define MAD_TIMEOUT_MS 2000
47
48#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
49#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
50#define mcg_warn_group(group, format, arg...) \
51 pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
52 (group)->name, group->demux->port, ## arg)
53
54#define mcg_error_group(group, format, arg...) \
55 pr_err(" %16s: " format, (group)->name, ## arg)
56
57
58static union ib_gid mgid0;
59
60static struct workqueue_struct *clean_wq;
61
62enum mcast_state {
63 MCAST_NOT_MEMBER = 0,
64 MCAST_MEMBER,
65};
66
67enum mcast_group_state {
68 MCAST_IDLE,
69 MCAST_JOIN_SENT,
70 MCAST_LEAVE_SENT,
71 MCAST_RESP_READY
72};
73
74struct mcast_member {
75 enum mcast_state state;
76 uint8_t join_state;
77 int num_pend_reqs;
78 struct list_head pending;
79};
80
81struct ib_sa_mcmember_data {
82 union ib_gid mgid;
83 union ib_gid port_gid;
84 __be32 qkey;
85 __be16 mlid;
86 u8 mtusel_mtu;
87 u8 tclass;
88 __be16 pkey;
89 u8 ratesel_rate;
90 u8 lifetmsel_lifetm;
91 __be32 sl_flowlabel_hoplimit;
92 u8 scope_join_state;
93 u8 proxy_join;
94 u8 reserved[2];
95};
96
97struct mcast_group {
98 struct ib_sa_mcmember_data rec;
99 struct rb_node node;
100 struct list_head mgid0_list;
101 struct mlx4_ib_demux_ctx *demux;
102 struct mcast_member func[MAX_VFS];
103 struct mutex lock;
104 struct work_struct work;
105 struct list_head pending_list;
106 int members[3];
107 enum mcast_group_state state;
108 enum mcast_group_state prev_state;
109 struct ib_sa_mad response_sa_mad;
110 __be64 last_req_tid;
111
112 char name[33]; /* MGID string */
113 struct device_attribute dentry;
114
115 /* refcount is the reference count for the following:
116 1. Each queued request
117 2. Each invocation of the worker thread
118 3. Membership of the port at the SA
119 */
120 atomic_t refcount;
121
122 /* delayed work to clean pending SM request */
123 struct delayed_work timeout_work;
124 struct list_head cleanup_list;
125};
126
127struct mcast_req {
128 int func;
129 struct ib_sa_mad sa_mad;
130 struct list_head group_list;
131 struct list_head func_list;
132 struct mcast_group *group;
133 int clean;
134};
135
136
137#define safe_atomic_dec(ref) \
138 do {\
139 if (atomic_dec_and_test(ref)) \
140 mcg_warn_group(group, "did not expect to reach zero\n"); \
141 } while (0)
142
143static const char *get_state_string(enum mcast_group_state state)
144{
145 switch (state) {
146 case MCAST_IDLE:
147 return "MCAST_IDLE";
148 case MCAST_JOIN_SENT:
149 return "MCAST_JOIN_SENT";
150 case MCAST_LEAVE_SENT:
151 return "MCAST_LEAVE_SENT";
152 case MCAST_RESP_READY:
153 return "MCAST_RESP_READY";
154 }
155 return "Invalid State";
156}
157
158static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
159 union ib_gid *mgid)
160{
161 struct rb_node *node = ctx->mcg_table.rb_node;
162 struct mcast_group *group;
163 int ret;
164
165 while (node) {
166 group = rb_entry(node, struct mcast_group, node);
167 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
168 if (!ret)
169 return group;
170
171 if (ret < 0)
172 node = node->rb_left;
173 else
174 node = node->rb_right;
175 }
176 return NULL;
177}
178
179static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
180 struct mcast_group *group)
181{
182 struct rb_node **link = &ctx->mcg_table.rb_node;
183 struct rb_node *parent = NULL;
184 struct mcast_group *cur_group;
185 int ret;
186
187 while (*link) {
188 parent = *link;
189 cur_group = rb_entry(parent, struct mcast_group, node);
190
191 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
192 sizeof group->rec.mgid);
193 if (ret < 0)
194 link = &(*link)->rb_left;
195 else if (ret > 0)
196 link = &(*link)->rb_right;
197 else
198 return cur_group;
199 }
200 rb_link_node(&group->node, parent, link);
201 rb_insert_color(&group->node, &ctx->mcg_table);
202 return NULL;
203}
204
205static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
206{
207 struct mlx4_ib_dev *dev = ctx->dev;
208 struct ib_ah_attr ah_attr;
209
210 spin_lock(&dev->sm_lock);
211 if (!dev->sm_ah[ctx->port - 1]) {
212 /* port is not yet Active, sm_ah not ready */
213 spin_unlock(&dev->sm_lock);
214 return -EAGAIN;
215 }
216 mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
217 spin_unlock(&dev->sm_lock);
218 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
219 IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
220}
221
222static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
223 struct ib_mad *mad)
224{
225 struct mlx4_ib_dev *dev = ctx->dev;
226 struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
227 struct ib_wc wc;
228 struct ib_ah_attr ah_attr;
229
230 /* Our agent might not yet be registered when mads start to arrive */
231 if (!agent)
232 return -EAGAIN;
233
234 ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
235
236 wc.pkey_index = 0;
237 wc.sl = 0;
238 wc.dlid_path_bits = 0;
239 wc.port_num = ctx->port;
240 wc.slid = ah_attr.dlid; /* opensm lid */
241 wc.src_qp = 1;
242 return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
243}
244
245static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
246{
247 struct ib_sa_mad mad;
248 struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
249 int ret;
250
251 /* we rely on a mad request as arrived from a VF */
252 memcpy(&mad, sa_mad, sizeof mad);
253
254 /* fix port GID to be the real one (slave 0) */
255 sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
256
257 /* assign our own TID */
258 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
259 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
260
261 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
262 /* set timeout handler */
263 if (!ret) {
264 /* calls mlx4_ib_mcg_timeout_handler */
265 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
266 msecs_to_jiffies(MAD_TIMEOUT_MS));
267 }
268
269 return ret;
270}
271
272static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
273{
274 struct ib_sa_mad mad;
275 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
276 int ret;
277
278 memset(&mad, 0, sizeof mad);
279 mad.mad_hdr.base_version = 1;
280 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
281 mad.mad_hdr.class_version = 2;
282 mad.mad_hdr.method = IB_SA_METHOD_DELETE;
283 mad.mad_hdr.status = cpu_to_be16(0);
284 mad.mad_hdr.class_specific = cpu_to_be16(0);
285 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
286 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
287 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
288 mad.mad_hdr.attr_mod = cpu_to_be32(0);
289 mad.sa_hdr.sm_key = 0x0;
290 mad.sa_hdr.attr_offset = cpu_to_be16(7);
291 mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
292 IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
293
294 *sa_data = group->rec;
295 sa_data->scope_join_state = join_state;
296
297 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
298 if (ret)
299 group->state = MCAST_IDLE;
300
301 /* set timeout handler */
302 if (!ret) {
303 /* calls mlx4_ib_mcg_timeout_handler */
304 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
305 msecs_to_jiffies(MAD_TIMEOUT_MS));
306 }
307
308 return ret;
309}
310
311static int send_reply_to_slave(int slave, struct mcast_group *group,
312 struct ib_sa_mad *req_sa_mad, u16 status)
313{
314 struct ib_sa_mad mad;
315 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
316 struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
317 int ret;
318
319 memset(&mad, 0, sizeof mad);
320 mad.mad_hdr.base_version = 1;
321 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
322 mad.mad_hdr.class_version = 2;
323 mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
324 mad.mad_hdr.status = cpu_to_be16(status);
325 mad.mad_hdr.class_specific = cpu_to_be16(0);
326 mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
327 *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
328 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
329 mad.mad_hdr.attr_mod = cpu_to_be32(0);
330 mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
331 mad.sa_hdr.attr_offset = cpu_to_be16(7);
332 mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
333
334 *sa_data = group->rec;
335
336 /* reconstruct VF's requested join_state and port_gid */
337 sa_data->scope_join_state &= 0xf0;
338 sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
339 memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
340
341 ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
342 return ret;
343}
344
345static int check_selector(ib_sa_comp_mask comp_mask,
346 ib_sa_comp_mask selector_mask,
347 ib_sa_comp_mask value_mask,
348 u8 src_value, u8 dst_value)
349{
350 int err;
351 u8 selector = dst_value >> 6;
352 dst_value &= 0x3f;
353 src_value &= 0x3f;
354
355 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
356 return 0;
357
358 switch (selector) {
359 case IB_SA_GT:
360 err = (src_value <= dst_value);
361 break;
362 case IB_SA_LT:
363 err = (src_value >= dst_value);
364 break;
365 case IB_SA_EQ:
366 err = (src_value != dst_value);
367 break;
368 default:
369 err = 0;
370 break;
371 }
372
373 return err;
374}
375
376static u16 cmp_rec(struct ib_sa_mcmember_data *src,
377 struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
378{
379 /* src is group record, dst is request record */
380 /* MGID must already match */
381 /* Port_GID we always replace to our Port_GID, so it is a match */
382
383#define MAD_STATUS_REQ_INVALID 0x0200
384 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
385 return MAD_STATUS_REQ_INVALID;
386 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
387 return MAD_STATUS_REQ_INVALID;
388 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
389 IB_SA_MCMEMBER_REC_MTU,
390 src->mtusel_mtu, dst->mtusel_mtu))
391 return MAD_STATUS_REQ_INVALID;
392 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
393 src->tclass != dst->tclass)
394 return MAD_STATUS_REQ_INVALID;
395 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
396 return MAD_STATUS_REQ_INVALID;
397 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
398 IB_SA_MCMEMBER_REC_RATE,
399 src->ratesel_rate, dst->ratesel_rate))
400 return MAD_STATUS_REQ_INVALID;
401 if (check_selector(comp_mask,
402 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
403 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
404 src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
405 return MAD_STATUS_REQ_INVALID;
406 if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
407 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
408 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
409 return MAD_STATUS_REQ_INVALID;
410 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
411 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
412 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
413 return MAD_STATUS_REQ_INVALID;
414 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
415 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
416 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
417 return MAD_STATUS_REQ_INVALID;
418 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
419 (src->scope_join_state & 0xf0) !=
420 (dst->scope_join_state & 0xf0))
421 return MAD_STATUS_REQ_INVALID;
422
423 /* join_state checked separately, proxy_join ignored */
424
425 return 0;
426}
427
428/* release group, return 1 if this was last release and group is destroyed
429 * timout work is canceled sync */
430static int release_group(struct mcast_group *group, int from_timeout_handler)
431{
432 struct mlx4_ib_demux_ctx *ctx = group->demux;
433 int nzgroup;
434
435 mutex_lock(&ctx->mcg_table_lock);
436 mutex_lock(&group->lock);
437 if (atomic_dec_and_test(&group->refcount)) {
438 if (!from_timeout_handler) {
439 if (group->state != MCAST_IDLE &&
440 !cancel_delayed_work(&group->timeout_work)) {
441 atomic_inc(&group->refcount);
442 mutex_unlock(&group->lock);
443 mutex_unlock(&ctx->mcg_table_lock);
444 return 0;
445 }
446 }
447
448 nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
449 if (nzgroup)
450 del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
451 if (!list_empty(&group->pending_list))
452 mcg_warn_group(group, "releasing a group with non empty pending list\n");
453 if (nzgroup)
454 rb_erase(&group->node, &ctx->mcg_table);
455 list_del_init(&group->mgid0_list);
456 mutex_unlock(&group->lock);
457 mutex_unlock(&ctx->mcg_table_lock);
458 kfree(group);
459 return 1;
460 } else {
461 mutex_unlock(&group->lock);
462 mutex_unlock(&ctx->mcg_table_lock);
463 }
464 return 0;
465}
466
467static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
468{
469 int i;
470
471 for (i = 0; i < 3; i++, join_state >>= 1)
472 if (join_state & 0x1)
473 group->members[i] += inc;
474}
475
476static u8 get_leave_state(struct mcast_group *group)
477{
478 u8 leave_state = 0;
479 int i;
480
481 for (i = 0; i < 3; i++)
482 if (!group->members[i])
483 leave_state |= (1 << i);
484
485 return leave_state & (group->rec.scope_join_state & 7);
486}
487
488static int join_group(struct mcast_group *group, int slave, u8 join_mask)
489{
490 int ret = 0;
491 u8 join_state;
492
493 /* remove bits that slave is already member of, and adjust */
494 join_state = join_mask & (~group->func[slave].join_state);
495 adjust_membership(group, join_state, 1);
496 group->func[slave].join_state |= join_state;
497 if (group->func[slave].state != MCAST_MEMBER && join_state) {
498 group->func[slave].state = MCAST_MEMBER;
499 ret = 1;
500 }
501 return ret;
502}
503
504static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
505{
506 int ret = 0;
507
508 adjust_membership(group, leave_state, -1);
509 group->func[slave].join_state &= ~leave_state;
510 if (!group->func[slave].join_state) {
511 group->func[slave].state = MCAST_NOT_MEMBER;
512 ret = 1;
513 }
514 return ret;
515}
516
517static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
518{
519 if (group->func[slave].state != MCAST_MEMBER)
520 return MAD_STATUS_REQ_INVALID;
521
522 /* make sure we're not deleting unset bits */
523 if (~group->func[slave].join_state & leave_mask)
524 return MAD_STATUS_REQ_INVALID;
525
526 if (!leave_mask)
527 return MAD_STATUS_REQ_INVALID;
528
529 return 0;
530}
531
532static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
533{
534 struct delayed_work *delay = to_delayed_work(work);
535 struct mcast_group *group;
536 struct mcast_req *req = NULL;
537
538 group = container_of(delay, typeof(*group), timeout_work);
539
540 mutex_lock(&group->lock);
541 if (group->state == MCAST_JOIN_SENT) {
542 if (!list_empty(&group->pending_list)) {
543 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
544 list_del(&req->group_list);
545 list_del(&req->func_list);
546 --group->func[req->func].num_pend_reqs;
547 mutex_unlock(&group->lock);
548 kfree(req);
549 if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
550 if (release_group(group, 1))
551 return;
552 } else {
553 kfree(group);
554 return;
555 }
556 mutex_lock(&group->lock);
557 } else
558 mcg_warn_group(group, "DRIVER BUG\n");
559 } else if (group->state == MCAST_LEAVE_SENT) {
560 if (group->rec.scope_join_state & 7)
561 group->rec.scope_join_state &= 0xf8;
562 group->state = MCAST_IDLE;
563 mutex_unlock(&group->lock);
564 if (release_group(group, 1))
565 return;
566 mutex_lock(&group->lock);
567 } else
568 mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
569 group->state = MCAST_IDLE;
570 atomic_inc(&group->refcount);
571 if (!queue_work(group->demux->mcg_wq, &group->work))
572 safe_atomic_dec(&group->refcount);
573
574 mutex_unlock(&group->lock);
575}
576
577static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
578 struct mcast_req *req)
579{
580 u16 status;
581
582 if (req->clean)
583 leave_mask = group->func[req->func].join_state;
584
585 status = check_leave(group, req->func, leave_mask);
586 if (!status)
587 leave_group(group, req->func, leave_mask);
588
589 if (!req->clean)
590 send_reply_to_slave(req->func, group, &req->sa_mad, status);
591 --group->func[req->func].num_pend_reqs;
592 list_del(&req->group_list);
593 list_del(&req->func_list);
594 kfree(req);
595 return 1;
596}
597
598static int handle_join_req(struct mcast_group *group, u8 join_mask,
599 struct mcast_req *req)
600{
601 u8 group_join_state = group->rec.scope_join_state & 7;
602 int ref = 0;
603 u16 status;
604 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
605
606 if (join_mask == (group_join_state & join_mask)) {
607 /* port's membership need not change */
608 status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
609 if (!status)
610 join_group(group, req->func, join_mask);
611
612 --group->func[req->func].num_pend_reqs;
613 send_reply_to_slave(req->func, group, &req->sa_mad, status);
614 list_del(&req->group_list);
615 list_del(&req->func_list);
616 kfree(req);
617 ++ref;
618 } else {
619 /* port's membership needs to be updated */
620 group->prev_state = group->state;
621 if (send_join_to_wire(group, &req->sa_mad)) {
622 --group->func[req->func].num_pend_reqs;
623 list_del(&req->group_list);
624 list_del(&req->func_list);
625 kfree(req);
626 ref = 1;
627 group->state = group->prev_state;
628 } else
629 group->state = MCAST_JOIN_SENT;
630 }
631
632 return ref;
633}
634
635static void mlx4_ib_mcg_work_handler(struct work_struct *work)
636{
637 struct mcast_group *group;
638 struct mcast_req *req = NULL;
639 struct ib_sa_mcmember_data *sa_data;
640 u8 req_join_state;
641 int rc = 1; /* release_count - this is for the scheduled work */
642 u16 status;
643 u8 method;
644
645 group = container_of(work, typeof(*group), work);
646
647 mutex_lock(&group->lock);
648
649 /* First, let's see if a response from SM is waiting regarding this group.
650 * If so, we need to update the group's REC. If this is a bad response, we
651 * may need to send a bad response to a VF waiting for it. If VF is waiting
652 * and this is a good response, the VF will be answered later in this func. */
653 if (group->state == MCAST_RESP_READY) {
654 /* cancels mlx4_ib_mcg_timeout_handler */
655 cancel_delayed_work(&group->timeout_work);
656 status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
657 method = group->response_sa_mad.mad_hdr.method;
658 if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
659 mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
660 be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
661 be64_to_cpu(group->last_req_tid));
662 group->state = group->prev_state;
663 goto process_requests;
664 }
665 if (status) {
666 if (!list_empty(&group->pending_list))
667 req = list_first_entry(&group->pending_list,
668 struct mcast_req, group_list);
669 if ((method == IB_MGMT_METHOD_GET_RESP)) {
670 if (req) {
671 send_reply_to_slave(req->func, group, &req->sa_mad, status);
672 --group->func[req->func].num_pend_reqs;
673 list_del(&req->group_list);
674 list_del(&req->func_list);
675 kfree(req);
676 ++rc;
677 } else
678 mcg_warn_group(group, "no request for failed join\n");
679 } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
680 ++rc;
681 } else {
682 u8 resp_join_state;
683 u8 cur_join_state;
684
685 resp_join_state = ((struct ib_sa_mcmember_data *)
686 group->response_sa_mad.data)->scope_join_state & 7;
687 cur_join_state = group->rec.scope_join_state & 7;
688
689 if (method == IB_MGMT_METHOD_GET_RESP) {
690 /* successfull join */
691 if (!cur_join_state && resp_join_state)
692 --rc;
693 } else if (!resp_join_state)
694 ++rc;
695 memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
696 }
697 group->state = MCAST_IDLE;
698 }
699
700process_requests:
701 /* We should now go over pending join/leave requests, as long as we are idle. */
702 while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
703 req = list_first_entry(&group->pending_list, struct mcast_req,
704 group_list);
705 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
706 req_join_state = sa_data->scope_join_state & 0x7;
707
708 /* For a leave request, we will immediately answer the VF, and
709 * update our internal counters. The actual leave will be sent
710 * to SM later, if at all needed. We dequeue the request now. */
711 if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
712 rc += handle_leave_req(group, req_join_state, req);
713 else
714 rc += handle_join_req(group, req_join_state, req);
715 }
716
717 /* Handle leaves */
718 if (group->state == MCAST_IDLE) {
719 req_join_state = get_leave_state(group);
720 if (req_join_state) {
721 group->rec.scope_join_state &= ~req_join_state;
722 group->prev_state = group->state;
723 if (send_leave_to_wire(group, req_join_state)) {
724 group->state = group->prev_state;
725 ++rc;
726 } else
727 group->state = MCAST_LEAVE_SENT;
728 }
729 }
730
731 if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
732 goto process_requests;
733 mutex_unlock(&group->lock);
734
735 while (rc--)
736 release_group(group, 0);
737}
738
739static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
740 __be64 tid,
741 union ib_gid *new_mgid)
742{
743 struct mcast_group *group = NULL, *cur_group;
744 struct mcast_req *req;
745 struct list_head *pos;
746 struct list_head *n;
747
748 mutex_lock(&ctx->mcg_table_lock);
749 list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
750 group = list_entry(pos, struct mcast_group, mgid0_list);
751 mutex_lock(&group->lock);
752 if (group->last_req_tid == tid) {
753 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
754 group->rec.mgid = *new_mgid;
755 sprintf(group->name, "%016llx%016llx",
756 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
757 be64_to_cpu(group->rec.mgid.global.interface_id));
758 list_del_init(&group->mgid0_list);
759 cur_group = mcast_insert(ctx, group);
760 if (cur_group) {
761 /* A race between our code and SM. Silently cleaning the new one */
762 req = list_first_entry(&group->pending_list,
763 struct mcast_req, group_list);
764 --group->func[req->func].num_pend_reqs;
765 list_del(&req->group_list);
766 list_del(&req->func_list);
767 kfree(req);
768 mutex_unlock(&group->lock);
769 mutex_unlock(&ctx->mcg_table_lock);
770 release_group(group, 0);
771 return NULL;
772 }
773
774 atomic_inc(&group->refcount);
775 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
776 mutex_unlock(&group->lock);
777 mutex_unlock(&ctx->mcg_table_lock);
778 return group;
779 } else {
780 struct mcast_req *tmp1, *tmp2;
781
782 list_del(&group->mgid0_list);
783 if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
784 cancel_delayed_work_sync(&group->timeout_work);
785
786 list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
787 list_del(&tmp1->group_list);
788 kfree(tmp1);
789 }
790 mutex_unlock(&group->lock);
791 mutex_unlock(&ctx->mcg_table_lock);
792 kfree(group);
793 return NULL;
794 }
795 }
796 mutex_unlock(&group->lock);
797 }
798 mutex_unlock(&ctx->mcg_table_lock);
799
800 return NULL;
801}
802
803static ssize_t sysfs_show_group(struct device *dev,
804 struct device_attribute *attr, char *buf);
805
806static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
807 union ib_gid *mgid, int create,
808 gfp_t gfp_mask)
809{
810 struct mcast_group *group, *cur_group;
811 int is_mgid0;
812 int i;
813
814 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
815 if (!is_mgid0) {
816 group = mcast_find(ctx, mgid);
817 if (group)
818 goto found;
819 }
820
821 if (!create)
822 return ERR_PTR(-ENOENT);
823
824 group = kzalloc(sizeof *group, gfp_mask);
825 if (!group)
826 return ERR_PTR(-ENOMEM);
827
828 group->demux = ctx;
829 group->rec.mgid = *mgid;
830 INIT_LIST_HEAD(&group->pending_list);
831 INIT_LIST_HEAD(&group->mgid0_list);
832 for (i = 0; i < MAX_VFS; ++i)
833 INIT_LIST_HEAD(&group->func[i].pending);
834 INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
835 INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
836 mutex_init(&group->lock);
837 sprintf(group->name, "%016llx%016llx",
838 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
839 be64_to_cpu(group->rec.mgid.global.interface_id));
840 sysfs_attr_init(&group->dentry.attr);
841 group->dentry.show = sysfs_show_group;
842 group->dentry.store = NULL;
843 group->dentry.attr.name = group->name;
844 group->dentry.attr.mode = 0400;
845 group->state = MCAST_IDLE;
846
847 if (is_mgid0) {
848 list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
849 goto found;
850 }
851
852 cur_group = mcast_insert(ctx, group);
853 if (cur_group) {
854 mcg_warn("group just showed up %s - confused\n", cur_group->name);
855 kfree(group);
856 return ERR_PTR(-EINVAL);
857 }
858
859 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
860
861found:
862 atomic_inc(&group->refcount);
863 return group;
864}
865
866static void queue_req(struct mcast_req *req)
867{
868 struct mcast_group *group = req->group;
869
870 atomic_inc(&group->refcount); /* for the request */
871 atomic_inc(&group->refcount); /* for scheduling the work */
872 list_add_tail(&req->group_list, &group->pending_list);
873 list_add_tail(&req->func_list, &group->func[req->func].pending);
874 /* calls mlx4_ib_mcg_work_handler */
875 if (!queue_work(group->demux->mcg_wq, &group->work))
876 safe_atomic_dec(&group->refcount);
877}
878
879int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
880 struct ib_sa_mad *mad)
881{
882 struct mlx4_ib_dev *dev = to_mdev(ibdev);
883 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
884 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
885 struct mcast_group *group;
886
887 switch (mad->mad_hdr.method) {
888 case IB_MGMT_METHOD_GET_RESP:
889 case IB_SA_METHOD_DELETE_RESP:
890 mutex_lock(&ctx->mcg_table_lock);
891 group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
892 mutex_unlock(&ctx->mcg_table_lock);
893 if (IS_ERR(group)) {
894 if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
895 __be64 tid = mad->mad_hdr.tid;
896 *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
897 group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
898 } else
899 group = NULL;
900 }
901
902 if (!group)
903 return 1;
904
905 mutex_lock(&group->lock);
906 group->response_sa_mad = *mad;
907 group->prev_state = group->state;
908 group->state = MCAST_RESP_READY;
909 /* calls mlx4_ib_mcg_work_handler */
910 atomic_inc(&group->refcount);
911 if (!queue_work(ctx->mcg_wq, &group->work))
912 safe_atomic_dec(&group->refcount);
913 mutex_unlock(&group->lock);
914 release_group(group, 0);
915 return 1; /* consumed */
916 case IB_MGMT_METHOD_SET:
917 case IB_SA_METHOD_GET_TABLE:
918 case IB_SA_METHOD_GET_TABLE_RESP:
919 case IB_SA_METHOD_DELETE:
920 return 0; /* not consumed, pass-through to guest over tunnel */
921 default:
922 mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
923 port, mad->mad_hdr.method);
924 return 1; /* consumed */
925 }
926}
927
928int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
929 int slave, struct ib_sa_mad *sa_mad)
930{
931 struct mlx4_ib_dev *dev = to_mdev(ibdev);
932 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
933 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
934 struct mcast_group *group;
935 struct mcast_req *req;
936 int may_create = 0;
937
938 if (ctx->flushing)
939 return -EAGAIN;
940
941 switch (sa_mad->mad_hdr.method) {
942 case IB_MGMT_METHOD_SET:
943 may_create = 1;
944 case IB_SA_METHOD_DELETE:
945 req = kzalloc(sizeof *req, GFP_KERNEL);
946 if (!req)
947 return -ENOMEM;
948
949 req->func = slave;
950 req->sa_mad = *sa_mad;
951
952 mutex_lock(&ctx->mcg_table_lock);
953 group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
954 mutex_unlock(&ctx->mcg_table_lock);
955 if (IS_ERR(group)) {
956 kfree(req);
957 return PTR_ERR(group);
958 }
959 mutex_lock(&group->lock);
960 if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
961 mutex_unlock(&group->lock);
962 mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
963 port, slave, MAX_PEND_REQS_PER_FUNC);
964 release_group(group, 0);
965 kfree(req);
966 return -ENOMEM;
967 }
968 ++group->func[slave].num_pend_reqs;
969 req->group = group;
970 queue_req(req);
971 mutex_unlock(&group->lock);
972 release_group(group, 0);
973 return 1; /* consumed */
974 case IB_SA_METHOD_GET_TABLE:
975 case IB_MGMT_METHOD_GET_RESP:
976 case IB_SA_METHOD_GET_TABLE_RESP:
977 case IB_SA_METHOD_DELETE_RESP:
978 return 0; /* not consumed, pass-through */
979 default:
980 mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
981 port, slave, sa_mad->mad_hdr.method);
982 return 1; /* consumed */
983 }
984}
985
986static ssize_t sysfs_show_group(struct device *dev,
987 struct device_attribute *attr, char *buf)
988{
989 struct mcast_group *group =
990 container_of(attr, struct mcast_group, dentry);
991 struct mcast_req *req = NULL;
992 char pending_str[40];
993 char state_str[40];
994 ssize_t len = 0;
995 int f;
996
997 if (group->state == MCAST_IDLE)
998 sprintf(state_str, "%s", get_state_string(group->state));
999 else
1000 sprintf(state_str, "%s(TID=0x%llx)",
1001 get_state_string(group->state),
1002 be64_to_cpu(group->last_req_tid));
1003 if (list_empty(&group->pending_list)) {
1004 sprintf(pending_str, "No");
1005 } else {
1006 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1007 sprintf(pending_str, "Yes(TID=0x%llx)",
1008 be64_to_cpu(req->sa_mad.mad_hdr.tid));
1009 }
1010 len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
1011 group->rec.scope_join_state & 0xf,
1012 group->members[2], group->members[1], group->members[0],
1013 atomic_read(&group->refcount),
1014 pending_str,
1015 state_str);
1016 for (f = 0; f < MAX_VFS; ++f)
1017 if (group->func[f].state == MCAST_MEMBER)
1018 len += sprintf(buf + len, "%d[%1x] ",
1019 f, group->func[f].join_state);
1020
1021 len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
1022 "%4x %4x %2x %2x)\n",
1023 be16_to_cpu(group->rec.pkey),
1024 be32_to_cpu(group->rec.qkey),
1025 (group->rec.mtusel_mtu & 0xc0) >> 6,
1026 group->rec.mtusel_mtu & 0x3f,
1027 group->rec.tclass,
1028 (group->rec.ratesel_rate & 0xc0) >> 6,
1029 group->rec.ratesel_rate & 0x3f,
1030 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
1031 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
1032 be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
1033 group->rec.proxy_join);
1034
1035 return len;
1036}
1037
1038int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1039{
1040 char name[20];
1041
1042 atomic_set(&ctx->tid, 0);
1043 sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1044 ctx->mcg_wq = create_singlethread_workqueue(name);
1045 if (!ctx->mcg_wq)
1046 return -ENOMEM;
1047
1048 mutex_init(&ctx->mcg_table_lock);
1049 ctx->mcg_table = RB_ROOT;
1050 INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1051 ctx->flushing = 0;
1052
1053 return 0;
1054}
1055
1056static void force_clean_group(struct mcast_group *group)
1057{
1058 struct mcast_req *req, *tmp
1059 ;
1060 list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1061 list_del(&req->group_list);
1062 kfree(req);
1063 }
1064 del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1065 rb_erase(&group->node, &group->demux->mcg_table);
1066 kfree(group);
1067}
1068
1069static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1070{
1071 int i;
1072 struct rb_node *p;
1073 struct mcast_group *group;
1074 unsigned long end;
1075 int count;
1076
1077 if (ctx->flushing)
1078 return;
1079
1080 ctx->flushing = 1;
1081 for (i = 0; i < MAX_VFS; ++i)
1082 clean_vf_mcast(ctx, i);
1083
1084 end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1085 do {
1086 count = 0;
1087 mutex_lock(&ctx->mcg_table_lock);
1088 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1089 ++count;
1090 mutex_unlock(&ctx->mcg_table_lock);
1091 if (!count)
1092 break;
1093
1094 msleep(1);
1095 } while (time_after(end, jiffies));
1096
1097 flush_workqueue(ctx->mcg_wq);
1098 if (destroy_wq)
1099 destroy_workqueue(ctx->mcg_wq);
1100
1101 mutex_lock(&ctx->mcg_table_lock);
1102 while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1103 group = rb_entry(p, struct mcast_group, node);
1104 if (atomic_read(&group->refcount))
1105 mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
1106
1107 force_clean_group(group);
1108 }
1109 mutex_unlock(&ctx->mcg_table_lock);
1110
1111 if (!destroy_wq)
1112 ctx->flushing = 0;
1113}
1114
1115struct clean_work {
1116 struct work_struct work;
1117 struct mlx4_ib_demux_ctx *ctx;
1118 int destroy_wq;
1119};
1120
1121static void mcg_clean_task(struct work_struct *work)
1122{
1123 struct clean_work *cw = container_of(work, struct clean_work, work);
1124
1125 _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1126 kfree(cw);
1127}
1128
1129void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1130{
1131 struct clean_work *work;
1132
1133 if (destroy_wq) {
1134 _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1135 return;
1136 }
1137
1138 work = kmalloc(sizeof *work, GFP_KERNEL);
1139 if (!work) {
1140 mcg_warn("failed allocating work for cleanup\n");
1141 return;
1142 }
1143
1144 work->ctx = ctx;
1145 work->destroy_wq = destroy_wq;
1146 INIT_WORK(&work->work, mcg_clean_task);
1147 queue_work(clean_wq, &work->work);
1148}
1149
1150static void build_leave_mad(struct mcast_req *req)
1151{
1152 struct ib_sa_mad *mad = &req->sa_mad;
1153
1154 mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1155}
1156
1157
1158static void clear_pending_reqs(struct mcast_group *group, int vf)
1159{
1160 struct mcast_req *req, *tmp, *group_first = NULL;
1161 int clear;
1162 int pend = 0;
1163
1164 if (!list_empty(&group->pending_list))
1165 group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1166
1167 list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1168 clear = 1;
1169 if (group_first == req &&
1170 (group->state == MCAST_JOIN_SENT ||
1171 group->state == MCAST_LEAVE_SENT)) {
1172 clear = cancel_delayed_work(&group->timeout_work);
1173 pend = !clear;
1174 group->state = MCAST_IDLE;
1175 }
1176 if (clear) {
1177 --group->func[vf].num_pend_reqs;
1178 list_del(&req->group_list);
1179 list_del(&req->func_list);
1180 kfree(req);
1181 atomic_dec(&group->refcount);
1182 }
1183 }
1184
1185 if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1186 mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1187 list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1188 }
1189}
1190
1191static int push_deleteing_req(struct mcast_group *group, int slave)
1192{
1193 struct mcast_req *req;
1194 struct mcast_req *pend_req;
1195
1196 if (!group->func[slave].join_state)
1197 return 0;
1198
1199 req = kzalloc(sizeof *req, GFP_KERNEL);
1200 if (!req) {
1201 mcg_warn_group(group, "failed allocation - may leave stall groups\n");
1202 return -ENOMEM;
1203 }
1204
1205 if (!list_empty(&group->func[slave].pending)) {
1206 pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1207 if (pend_req->clean) {
1208 kfree(req);
1209 return 0;
1210 }
1211 }
1212
1213 req->clean = 1;
1214 req->func = slave;
1215 req->group = group;
1216 ++group->func[slave].num_pend_reqs;
1217 build_leave_mad(req);
1218 queue_req(req);
1219 return 0;
1220}
1221
1222void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1223{
1224 struct mcast_group *group;
1225 struct rb_node *p;
1226
1227 mutex_lock(&ctx->mcg_table_lock);
1228 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1229 group = rb_entry(p, struct mcast_group, node);
1230 mutex_lock(&group->lock);
1231 if (atomic_read(&group->refcount)) {
1232 /* clear pending requests of this VF */
1233 clear_pending_reqs(group, slave);
1234 push_deleteing_req(group, slave);
1235 }
1236 mutex_unlock(&group->lock);
1237 }
1238 mutex_unlock(&ctx->mcg_table_lock);
1239}
1240
1241
1242int mlx4_ib_mcg_init(void)
1243{
1244 clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
1245 if (!clean_wq)
1246 return -ENOMEM;
1247
1248 return 0;
1249}
1250
1251void mlx4_ib_mcg_destroy(void)
1252{
1253 destroy_workqueue(clean_wq);
1254}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c136bb618e2..e04cbc9a54a 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -37,9 +37,12 @@
37#include <linux/compiler.h> 37#include <linux/compiler.h>
38#include <linux/list.h> 38#include <linux/list.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/idr.h>
40 41
41#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
42#include <rdma/ib_umem.h> 43#include <rdma/ib_umem.h>
44#include <rdma/ib_mad.h>
45#include <rdma/ib_sa.h>
43 46
44#include <linux/mlx4/device.h> 47#include <linux/mlx4/device.h>
45#include <linux/mlx4/doorbell.h> 48#include <linux/mlx4/doorbell.h>
@@ -62,6 +65,9 @@ enum {
62#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) 65#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
63#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) 66#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
64 67
68/*module param to indicate if SM assigns the alias_GUID*/
69extern int mlx4_ib_sm_guid_assign;
70
65struct mlx4_ib_ucontext { 71struct mlx4_ib_ucontext {
66 struct ib_ucontext ibucontext; 72 struct ib_ucontext ibucontext;
67 struct mlx4_uar uar; 73 struct mlx4_uar uar;
@@ -133,8 +139,10 @@ struct mlx4_ib_wq {
133}; 139};
134 140
135enum mlx4_ib_qp_flags { 141enum mlx4_ib_qp_flags {
136 MLX4_IB_QP_LSO = 1 << 0, 142 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
137 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 143 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
144 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
145 MLX4_IB_SRIOV_SQP = 1 << 31,
138}; 146};
139 147
140struct mlx4_ib_gid_entry { 148struct mlx4_ib_gid_entry {
@@ -144,6 +152,80 @@ struct mlx4_ib_gid_entry {
144 u8 port; 152 u8 port;
145}; 153};
146 154
155enum mlx4_ib_qp_type {
156 /*
157 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
158 * here (and in that order) since the MAD layer uses them as
159 * indices into a 2-entry table.
160 */
161 MLX4_IB_QPT_SMI = IB_QPT_SMI,
162 MLX4_IB_QPT_GSI = IB_QPT_GSI,
163
164 MLX4_IB_QPT_RC = IB_QPT_RC,
165 MLX4_IB_QPT_UC = IB_QPT_UC,
166 MLX4_IB_QPT_UD = IB_QPT_UD,
167 MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
168 MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
169 MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
170 MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
171 MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
172
173 MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
174 MLX4_IB_QPT_PROXY_SMI = 1 << 17,
175 MLX4_IB_QPT_PROXY_GSI = 1 << 18,
176 MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
177 MLX4_IB_QPT_TUN_SMI = 1 << 20,
178 MLX4_IB_QPT_TUN_GSI = 1 << 21,
179};
180
181#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
182 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
183 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
184
185enum mlx4_ib_mad_ifc_flags {
186 MLX4_MAD_IFC_IGNORE_MKEY = 1,
187 MLX4_MAD_IFC_IGNORE_BKEY = 2,
188 MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
189 MLX4_MAD_IFC_IGNORE_BKEY),
190 MLX4_MAD_IFC_NET_VIEW = 4,
191};
192
193enum {
194 MLX4_NUM_TUNNEL_BUFS = 256,
195};
196
197struct mlx4_ib_tunnel_header {
198 struct mlx4_av av;
199 __be32 remote_qpn;
200 __be32 qkey;
201 __be16 vlan;
202 u8 mac[6];
203 __be16 pkey_index;
204 u8 reserved[6];
205};
206
207struct mlx4_ib_buf {
208 void *addr;
209 dma_addr_t map;
210};
211
212struct mlx4_rcv_tunnel_hdr {
213 __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
214 * 0x0 - no vlan was in the packet
215 * 0x01 - C-VLAN was in the packet */
216 u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
217 u8 reserved;
218 __be16 pkey_index;
219 __be16 sl_vid;
220 __be16 slid_mac_47_32;
221 __be32 mac_31_0;
222};
223
224struct mlx4_ib_proxy_sqp_hdr {
225 struct ib_grh grh;
226 struct mlx4_rcv_tunnel_hdr tun;
227} __packed;
228
147struct mlx4_ib_qp { 229struct mlx4_ib_qp {
148 struct ib_qp ibqp; 230 struct ib_qp ibqp;
149 struct mlx4_qp mqp; 231 struct mlx4_qp mqp;
@@ -159,6 +241,7 @@ struct mlx4_ib_qp {
159 int sq_spare_wqes; 241 int sq_spare_wqes;
160 struct mlx4_ib_wq sq; 242 struct mlx4_ib_wq sq;
161 243
244 enum mlx4_ib_qp_type mlx4_ib_qp_type;
162 struct ib_umem *umem; 245 struct ib_umem *umem;
163 struct mlx4_mtt mtt; 246 struct mlx4_mtt mtt;
164 int buf_size; 247 int buf_size;
@@ -174,6 +257,8 @@ struct mlx4_ib_qp {
174 int mlx_type; 257 int mlx_type;
175 struct list_head gid_list; 258 struct list_head gid_list;
176 struct list_head steering_rules; 259 struct list_head steering_rules;
260 struct mlx4_ib_buf *sqp_proxy_rcv;
261
177}; 262};
178 263
179struct mlx4_ib_srq { 264struct mlx4_ib_srq {
@@ -196,6 +281,138 @@ struct mlx4_ib_ah {
196 union mlx4_ext_av av; 281 union mlx4_ext_av av;
197}; 282};
198 283
284/****************************************/
285/* alias guid support */
286/****************************************/
287#define NUM_PORT_ALIAS_GUID 2
288#define NUM_ALIAS_GUID_IN_REC 8
289#define NUM_ALIAS_GUID_REC_IN_PORT 16
290#define GUID_REC_SIZE 8
291#define NUM_ALIAS_GUID_PER_PORT 128
292#define MLX4_NOT_SET_GUID (0x00LL)
293#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
294
295enum mlx4_guid_alias_rec_status {
296 MLX4_GUID_INFO_STATUS_IDLE,
297 MLX4_GUID_INFO_STATUS_SET,
298 MLX4_GUID_INFO_STATUS_PENDING,
299};
300
301enum mlx4_guid_alias_rec_ownership {
302 MLX4_GUID_DRIVER_ASSIGN,
303 MLX4_GUID_SYSADMIN_ASSIGN,
304 MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
305};
306
307enum mlx4_guid_alias_rec_method {
308 MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
309 MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
310};
311
312struct mlx4_sriov_alias_guid_info_rec_det {
313 u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
314 ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
315 enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
316 u8 method; /*set or delete*/
317 enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
318};
319
320struct mlx4_sriov_alias_guid_port_rec_det {
321 struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
322 struct workqueue_struct *wq;
323 struct delayed_work alias_guid_work;
324 u8 port;
325 struct mlx4_sriov_alias_guid *parent;
326 struct list_head cb_list;
327};
328
329struct mlx4_sriov_alias_guid {
330 struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
331 spinlock_t ag_work_lock;
332 struct ib_sa_client *sa_client;
333};
334
335struct mlx4_ib_demux_work {
336 struct work_struct work;
337 struct mlx4_ib_dev *dev;
338 int slave;
339 int do_init;
340 u8 port;
341
342};
343
344struct mlx4_ib_tun_tx_buf {
345 struct mlx4_ib_buf buf;
346 struct ib_ah *ah;
347};
348
349struct mlx4_ib_demux_pv_qp {
350 struct ib_qp *qp;
351 enum ib_qp_type proxy_qpt;
352 struct mlx4_ib_buf *ring;
353 struct mlx4_ib_tun_tx_buf *tx_ring;
354 spinlock_t tx_lock;
355 unsigned tx_ix_head;
356 unsigned tx_ix_tail;
357};
358
359enum mlx4_ib_demux_pv_state {
360 DEMUX_PV_STATE_DOWN,
361 DEMUX_PV_STATE_STARTING,
362 DEMUX_PV_STATE_ACTIVE,
363 DEMUX_PV_STATE_DOWNING,
364};
365
366struct mlx4_ib_demux_pv_ctx {
367 int port;
368 int slave;
369 enum mlx4_ib_demux_pv_state state;
370 int has_smi;
371 struct ib_device *ib_dev;
372 struct ib_cq *cq;
373 struct ib_pd *pd;
374 struct ib_mr *mr;
375 struct work_struct work;
376 struct workqueue_struct *wq;
377 struct mlx4_ib_demux_pv_qp qp[2];
378};
379
380struct mlx4_ib_demux_ctx {
381 struct ib_device *ib_dev;
382 int port;
383 struct workqueue_struct *wq;
384 struct workqueue_struct *ud_wq;
385 spinlock_t ud_lock;
386 __be64 subnet_prefix;
387 __be64 guid_cache[128];
388 struct mlx4_ib_dev *dev;
389 /* the following lock protects both mcg_table and mcg_mgid0_list */
390 struct mutex mcg_table_lock;
391 struct rb_root mcg_table;
392 struct list_head mcg_mgid0_list;
393 struct workqueue_struct *mcg_wq;
394 struct mlx4_ib_demux_pv_ctx **tun;
395 atomic_t tid;
396 int flushing; /* flushing the work queue */
397};
398
399struct mlx4_ib_sriov {
400 struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
401 struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
402 /* when using this spinlock you should use "irq" because
403 * it may be called from interrupt context.*/
404 spinlock_t going_down_lock;
405 int is_going_down;
406
407 struct mlx4_sriov_alias_guid alias_guid;
408
409 /* CM paravirtualization fields */
410 struct list_head cm_list;
411 spinlock_t id_map_lock;
412 struct rb_root sl_id_map;
413 struct idr pv_id_table;
414};
415
199struct mlx4_ib_iboe { 416struct mlx4_ib_iboe {
200 spinlock_t lock; 417 spinlock_t lock;
201 struct net_device *netdevs[MLX4_MAX_PORTS]; 418 struct net_device *netdevs[MLX4_MAX_PORTS];
@@ -203,6 +420,42 @@ struct mlx4_ib_iboe {
203 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 420 union ib_gid gid_table[MLX4_MAX_PORTS][128];
204}; 421};
205 422
423struct pkey_mgt {
424 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
425 u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
426 struct list_head pkey_port_list[MLX4_MFUNC_MAX];
427 struct kobject *device_parent[MLX4_MFUNC_MAX];
428};
429
430struct mlx4_ib_iov_sysfs_attr {
431 void *ctx;
432 struct kobject *kobj;
433 unsigned long data;
434 u32 entry_num;
435 char name[15];
436 struct device_attribute dentry;
437 struct device *dev;
438};
439
440struct mlx4_ib_iov_sysfs_attr_ar {
441 struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
442};
443
444struct mlx4_ib_iov_port {
445 char name[100];
446 u8 num;
447 struct mlx4_ib_dev *dev;
448 struct list_head list;
449 struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
450 struct ib_port_attr attr;
451 struct kobject *cur_port;
452 struct kobject *admin_alias_parent;
453 struct kobject *gids_parent;
454 struct kobject *pkeys_parent;
455 struct kobject *mcgs_parent;
456 struct mlx4_ib_iov_sysfs_attr mcg_dentry;
457};
458
206struct mlx4_ib_dev { 459struct mlx4_ib_dev {
207 struct ib_device ib_dev; 460 struct ib_device ib_dev;
208 struct mlx4_dev *dev; 461 struct mlx4_dev *dev;
@@ -216,6 +469,7 @@ struct mlx4_ib_dev {
216 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; 469 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
217 struct ib_ah *sm_ah[MLX4_MAX_PORTS]; 470 struct ib_ah *sm_ah[MLX4_MAX_PORTS];
218 spinlock_t sm_lock; 471 spinlock_t sm_lock;
472 struct mlx4_ib_sriov sriov;
219 473
220 struct mutex cap_mask_mutex; 474 struct mutex cap_mask_mutex;
221 bool ib_active; 475 bool ib_active;
@@ -223,6 +477,11 @@ struct mlx4_ib_dev {
223 int counters[MLX4_MAX_PORTS]; 477 int counters[MLX4_MAX_PORTS];
224 int *eq_table; 478 int *eq_table;
225 int eq_added; 479 int eq_added;
480 struct kobject *iov_parent;
481 struct kobject *ports_parent;
482 struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
483 struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
484 struct pkey_mgt pkeys;
226}; 485};
227 486
228struct ib_event_work { 487struct ib_event_work {
@@ -231,6 +490,13 @@ struct ib_event_work {
231 struct mlx4_eqe ib_eqe; 490 struct mlx4_eqe ib_eqe;
232}; 491};
233 492
493struct mlx4_ib_qp_tunnel_init_attr {
494 struct ib_qp_init_attr init_attr;
495 int slave;
496 enum ib_qp_type proxy_qp_type;
497 u8 port;
498};
499
234static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) 500static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
235{ 501{
236 return container_of(ibdev, struct mlx4_ib_dev, ib_dev); 502 return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -300,6 +566,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
300 return container_of(ibah, struct mlx4_ib_ah, ibah); 566 return container_of(ibah, struct mlx4_ib_ah, ibah);
301} 567}
302 568
569int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
570void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
571
303int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, 572int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
304 struct mlx4_db *db); 573 struct mlx4_db *db);
305void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); 574void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -356,7 +625,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
356int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 625int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
357 struct ib_recv_wr **bad_wr); 626 struct ib_recv_wr **bad_wr);
358 627
359int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 628int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
360 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 629 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
361 void *in_mad, void *response_mad); 630 void *in_mad, void *response_mad);
362int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 631int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -371,6 +640,13 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
371 u64 iova); 640 u64 iova);
372int mlx4_ib_unmap_fmr(struct list_head *fmr_list); 641int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
373int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); 642int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
643int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
644 struct ib_port_attr *props, int netw_view);
645int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
646 u16 *pkey, int netw_view);
647
648int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
649 union ib_gid *gid, int netw_view);
374 650
375int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, 651int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
376 u8 *mac, int *is_mcast, u8 port); 652 u8 *mac, int *is_mcast, u8 port);
@@ -385,10 +661,69 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
385 return !!(ah->av.ib.g_slid & 0x80); 661 return !!(ah->av.ib.g_slid & 0x80);
386} 662}
387 663
664int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
665void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
666void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
667int mlx4_ib_mcg_init(void);
668void mlx4_ib_mcg_destroy(void);
669
670int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
671
672int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
673 struct ib_sa_mad *sa_mad);
674int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
675 struct ib_sa_mad *mad);
676
388int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 677int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
389 union ib_gid *gid); 678 union ib_gid *gid);
390 679
391void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, 680void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
392 enum ib_event_type type); 681 enum ib_event_type type);
393 682
683void mlx4_ib_tunnels_update_work(struct work_struct *work);
684
685int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
686 enum ib_qp_type qpt, struct ib_wc *wc,
687 struct ib_grh *grh, struct ib_mad *mad);
688int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
689 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
690 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
691__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
692
693int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
694 struct ib_mad *mad);
695
696int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
697 struct ib_mad *mad);
698
699void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
700void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
701
702/* alias guid support */
703void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
704int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
705void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
706void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
707
708void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
709 int block_num,
710 u8 port_num, u8 *p_data);
711
712void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
713 int block_num, u8 port_num,
714 u8 *p_data);
715
716int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
717 struct attribute *attr);
718void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
719 struct attribute *attr);
720ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
721
722int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
723
724void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
725
726__be64 mlx4_ib_gen_node_guid(void);
727
728
394#endif /* MLX4_IB_H */ 729#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index f585eddef4b..19e0637220b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -38,6 +38,7 @@
38#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h> 40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
41 42
42#include <linux/mlx4/qp.h> 43#include <linux/mlx4/qp.h>
43 44
@@ -110,16 +111,62 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
110 return container_of(mqp, struct mlx4_ib_sqp, qp); 111 return container_of(mqp, struct mlx4_ib_sqp, qp);
111} 112}
112 113
114static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
115{
116 if (!mlx4_is_master(dev->dev))
117 return 0;
118
119 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
120 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
121 8 * MLX4_MFUNC_MAX;
122}
123
113static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 124static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
114{ 125{
115 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 126 int proxy_sqp = 0;
116 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; 127 int real_sqp = 0;
128 int i;
129 /* PPF or Native -- real SQP */
130 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
131 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
132 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
133 if (real_sqp)
134 return 1;
135 /* VF or PF -- proxy SQP */
136 if (mlx4_is_mfunc(dev->dev)) {
137 for (i = 0; i < dev->dev->caps.num_ports; i++) {
138 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
139 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
140 proxy_sqp = 1;
141 break;
142 }
143 }
144 }
145 return proxy_sqp;
117} 146}
118 147
148/* used for INIT/CLOSE port logic */
119static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 149static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
120{ 150{
121 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 151 int proxy_qp0 = 0;
122 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; 152 int real_qp0 = 0;
153 int i;
154 /* PPF or Native -- real QP0 */
155 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
156 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
157 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
158 if (real_qp0)
159 return 1;
160 /* VF or PF -- proxy QP0 */
161 if (mlx4_is_mfunc(dev->dev)) {
162 for (i = 0; i < dev->dev->caps.num_ports; i++) {
163 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
164 proxy_qp0 = 1;
165 break;
166 }
167 }
168 }
169 return proxy_qp0;
123} 170}
124 171
125static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 172static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -270,7 +317,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
270 } 317 }
271} 318}
272 319
273static int send_wqe_overhead(enum ib_qp_type type, u32 flags) 320static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
274{ 321{
275 /* 322 /*
276 * UD WQEs must have a datagram segment. 323 * UD WQEs must have a datagram segment.
@@ -279,19 +326,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
279 * header and space for the ICRC). 326 * header and space for the ICRC).
280 */ 327 */
281 switch (type) { 328 switch (type) {
282 case IB_QPT_UD: 329 case MLX4_IB_QPT_UD:
283 return sizeof (struct mlx4_wqe_ctrl_seg) + 330 return sizeof (struct mlx4_wqe_ctrl_seg) +
284 sizeof (struct mlx4_wqe_datagram_seg) + 331 sizeof (struct mlx4_wqe_datagram_seg) +
285 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 332 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
286 case IB_QPT_UC: 333 case MLX4_IB_QPT_PROXY_SMI_OWNER:
334 case MLX4_IB_QPT_PROXY_SMI:
335 case MLX4_IB_QPT_PROXY_GSI:
336 return sizeof (struct mlx4_wqe_ctrl_seg) +
337 sizeof (struct mlx4_wqe_datagram_seg) + 64;
338 case MLX4_IB_QPT_TUN_SMI_OWNER:
339 case MLX4_IB_QPT_TUN_GSI:
340 return sizeof (struct mlx4_wqe_ctrl_seg) +
341 sizeof (struct mlx4_wqe_datagram_seg);
342
343 case MLX4_IB_QPT_UC:
287 return sizeof (struct mlx4_wqe_ctrl_seg) + 344 return sizeof (struct mlx4_wqe_ctrl_seg) +
288 sizeof (struct mlx4_wqe_raddr_seg); 345 sizeof (struct mlx4_wqe_raddr_seg);
289 case IB_QPT_RC: 346 case MLX4_IB_QPT_RC:
290 return sizeof (struct mlx4_wqe_ctrl_seg) + 347 return sizeof (struct mlx4_wqe_ctrl_seg) +
291 sizeof (struct mlx4_wqe_atomic_seg) + 348 sizeof (struct mlx4_wqe_atomic_seg) +
292 sizeof (struct mlx4_wqe_raddr_seg); 349 sizeof (struct mlx4_wqe_raddr_seg);
293 case IB_QPT_SMI: 350 case MLX4_IB_QPT_SMI:
294 case IB_QPT_GSI: 351 case MLX4_IB_QPT_GSI:
295 return sizeof (struct mlx4_wqe_ctrl_seg) + 352 return sizeof (struct mlx4_wqe_ctrl_seg) +
296 ALIGN(MLX4_IB_UD_HEADER_SIZE + 353 ALIGN(MLX4_IB_UD_HEADER_SIZE +
297 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 354 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -345,7 +402,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
345} 402}
346 403
347static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 404static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
348 enum ib_qp_type type, struct mlx4_ib_qp *qp) 405 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
349{ 406{
350 int s; 407 int s;
351 408
@@ -360,7 +417,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
360 * For MLX transport we need 2 extra S/G entries: 417 * For MLX transport we need 2 extra S/G entries:
361 * one for the header and one for the checksum at the end 418 * one for the header and one for the checksum at the end
362 */ 419 */
363 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && 420 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
421 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
364 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 422 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
365 return -EINVAL; 423 return -EINVAL;
366 424
@@ -404,7 +462,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
404 */ 462 */
405 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 463 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
406 qp->sq_signal_bits && BITS_PER_LONG == 64 && 464 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
407 type != IB_QPT_SMI && type != IB_QPT_GSI) 465 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
466 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
467 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
408 qp->sq.wqe_shift = ilog2(64); 468 qp->sq.wqe_shift = ilog2(64);
409 else 469 else
410 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 470 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -476,6 +536,54 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
476 return 0; 536 return 0;
477} 537}
478 538
539static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
540{
541 int i;
542
543 qp->sqp_proxy_rcv =
544 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
545 GFP_KERNEL);
546 if (!qp->sqp_proxy_rcv)
547 return -ENOMEM;
548 for (i = 0; i < qp->rq.wqe_cnt; i++) {
549 qp->sqp_proxy_rcv[i].addr =
550 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
551 GFP_KERNEL);
552 if (!qp->sqp_proxy_rcv[i].addr)
553 goto err;
554 qp->sqp_proxy_rcv[i].map =
555 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
556 sizeof (struct mlx4_ib_proxy_sqp_hdr),
557 DMA_FROM_DEVICE);
558 }
559 return 0;
560
561err:
562 while (i > 0) {
563 --i;
564 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
565 sizeof (struct mlx4_ib_proxy_sqp_hdr),
566 DMA_FROM_DEVICE);
567 kfree(qp->sqp_proxy_rcv[i].addr);
568 }
569 kfree(qp->sqp_proxy_rcv);
570 qp->sqp_proxy_rcv = NULL;
571 return -ENOMEM;
572}
573
574static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
575{
576 int i;
577
578 for (i = 0; i < qp->rq.wqe_cnt; i++) {
579 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
580 sizeof (struct mlx4_ib_proxy_sqp_hdr),
581 DMA_FROM_DEVICE);
582 kfree(qp->sqp_proxy_rcv[i].addr);
583 }
584 kfree(qp->sqp_proxy_rcv);
585}
586
479static int qp_has_rq(struct ib_qp_init_attr *attr) 587static int qp_has_rq(struct ib_qp_init_attr *attr)
480{ 588{
481 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) 589 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
@@ -486,10 +594,67 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
486 594
487static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 595static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
488 struct ib_qp_init_attr *init_attr, 596 struct ib_qp_init_attr *init_attr,
489 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 597 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
490{ 598{
491 int qpn; 599 int qpn;
492 int err; 600 int err;
601 struct mlx4_ib_sqp *sqp;
602 struct mlx4_ib_qp *qp;
603 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
604
605 /* When tunneling special qps, we use a plain UD qp */
606 if (sqpn) {
607 if (mlx4_is_mfunc(dev->dev) &&
608 (!mlx4_is_master(dev->dev) ||
609 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
610 if (init_attr->qp_type == IB_QPT_GSI)
611 qp_type = MLX4_IB_QPT_PROXY_GSI;
612 else if (mlx4_is_master(dev->dev))
613 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
614 else
615 qp_type = MLX4_IB_QPT_PROXY_SMI;
616 }
617 qpn = sqpn;
618 /* add extra sg entry for tunneling */
619 init_attr->cap.max_recv_sge++;
620 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
621 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
622 container_of(init_attr,
623 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
624 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
625 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
626 !mlx4_is_master(dev->dev))
627 return -EINVAL;
628 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
629 qp_type = MLX4_IB_QPT_TUN_GSI;
630 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
631 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
632 else
633 qp_type = MLX4_IB_QPT_TUN_SMI;
634 /* we are definitely in the PPF here, since we are creating
635 * tunnel QPs. base_tunnel_sqpn is therefore valid. */
636 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
637 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
638 sqpn = qpn;
639 }
640
641 if (!*caller_qp) {
642 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
643 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
644 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
645 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
646 if (!sqp)
647 return -ENOMEM;
648 qp = &sqp->qp;
649 } else {
650 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
651 if (!qp)
652 return -ENOMEM;
653 }
654 } else
655 qp = *caller_qp;
656
657 qp->mlx4_ib_qp_type = qp_type;
493 658
494 mutex_init(&qp->mutex); 659 mutex_init(&qp->mutex);
495 spin_lock_init(&qp->sq.lock); 660 spin_lock_init(&qp->sq.lock);
@@ -550,7 +715,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
550 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 715 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
551 qp->flags |= MLX4_IB_QP_LSO; 716 qp->flags |= MLX4_IB_QP_LSO;
552 717
553 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 718 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
554 if (err) 719 if (err)
555 goto err; 720 goto err;
556 721
@@ -586,7 +751,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
586 } 751 }
587 752
588 if (sqpn) { 753 if (sqpn) {
589 qpn = sqpn; 754 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
755 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
756 if (alloc_proxy_bufs(pd->device, qp)) {
757 err = -ENOMEM;
758 goto err_wrid;
759 }
760 }
590 } else { 761 } else {
591 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE 762 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
592 * BlueFlame setup flow wrongly causes VLAN insertion. */ 763 * BlueFlame setup flow wrongly causes VLAN insertion. */
@@ -595,7 +766,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
595 else 766 else
596 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); 767 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
597 if (err) 768 if (err)
598 goto err_wrid; 769 goto err_proxy;
599 } 770 }
600 771
601 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 772 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
@@ -613,13 +784,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
613 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 784 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
614 785
615 qp->mqp.event = mlx4_ib_qp_event; 786 qp->mqp.event = mlx4_ib_qp_event;
616 787 if (!*caller_qp)
788 *caller_qp = qp;
617 return 0; 789 return 0;
618 790
619err_qpn: 791err_qpn:
620 if (!sqpn) 792 if (!sqpn)
621 mlx4_qp_release_range(dev->dev, qpn, 1); 793 mlx4_qp_release_range(dev->dev, qpn, 1);
622 794err_proxy:
795 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
796 free_proxy_bufs(pd->device, qp);
623err_wrid: 797err_wrid:
624 if (pd->uobject) { 798 if (pd->uobject) {
625 if (qp_has_rq(init_attr)) 799 if (qp_has_rq(init_attr))
@@ -643,6 +817,8 @@ err_db:
643 mlx4_db_free(dev->dev, &qp->db); 817 mlx4_db_free(dev->dev, &qp->db);
644 818
645err: 819err:
820 if (!*caller_qp)
821 kfree(qp);
646 return err; 822 return err;
647} 823}
648 824
@@ -755,7 +931,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
755 931
756 mlx4_qp_free(dev->dev, &qp->mqp); 932 mlx4_qp_free(dev->dev, &qp->mqp);
757 933
758 if (!is_sqp(dev, qp)) 934 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
759 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 935 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
760 936
761 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 937 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@@ -768,6 +944,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
768 } else { 944 } else {
769 kfree(qp->sq.wrid); 945 kfree(qp->sq.wrid);
770 kfree(qp->rq.wrid); 946 kfree(qp->rq.wrid);
947 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
948 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
949 free_proxy_bufs(&dev->ib_dev, qp);
771 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 950 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
772 if (qp->rq.wqe_cnt) 951 if (qp->rq.wqe_cnt)
773 mlx4_db_free(dev->dev, &qp->db); 952 mlx4_db_free(dev->dev, &qp->db);
@@ -776,25 +955,46 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
776 del_gid_entries(qp); 955 del_gid_entries(qp);
777} 956}
778 957
958static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
959{
960 /* Native or PPF */
961 if (!mlx4_is_mfunc(dev->dev) ||
962 (mlx4_is_master(dev->dev) &&
963 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
964 return dev->dev->phys_caps.base_sqpn +
965 (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
966 attr->port_num - 1;
967 }
968 /* PF or VF -- creating proxies */
969 if (attr->qp_type == IB_QPT_SMI)
970 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
971 else
972 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
973}
974
779struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 975struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
780 struct ib_qp_init_attr *init_attr, 976 struct ib_qp_init_attr *init_attr,
781 struct ib_udata *udata) 977 struct ib_udata *udata)
782{ 978{
783 struct mlx4_ib_sqp *sqp; 979 struct mlx4_ib_qp *qp = NULL;
784 struct mlx4_ib_qp *qp;
785 int err; 980 int err;
786 u16 xrcdn = 0; 981 u16 xrcdn = 0;
787 982
788 /* 983 /*
789 * We only support LSO and multicast loopback blocking, and 984 * We only support LSO, vendor flag1, and multicast loopback blocking,
790 * only for kernel UD QPs. 985 * and only for kernel UD QPs.
791 */ 986 */
792 if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | 987 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
793 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) 988 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
989 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
794 return ERR_PTR(-EINVAL); 990 return ERR_PTR(-EINVAL);
795 991
796 if (init_attr->create_flags && 992 if (init_attr->create_flags &&
797 (udata || init_attr->qp_type != IB_QPT_UD)) 993 (udata ||
994 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
995 init_attr->qp_type != IB_QPT_UD) ||
996 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
997 init_attr->qp_type > IB_QPT_GSI)))
798 return ERR_PTR(-EINVAL); 998 return ERR_PTR(-EINVAL);
799 999
800 switch (init_attr->qp_type) { 1000 switch (init_attr->qp_type) {
@@ -810,18 +1010,17 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
810 /* fall through */ 1010 /* fall through */
811 case IB_QPT_RC: 1011 case IB_QPT_RC:
812 case IB_QPT_UC: 1012 case IB_QPT_UC:
813 case IB_QPT_UD:
814 case IB_QPT_RAW_PACKET: 1013 case IB_QPT_RAW_PACKET:
815 {
816 qp = kzalloc(sizeof *qp, GFP_KERNEL); 1014 qp = kzalloc(sizeof *qp, GFP_KERNEL);
817 if (!qp) 1015 if (!qp)
818 return ERR_PTR(-ENOMEM); 1016 return ERR_PTR(-ENOMEM);
819 1017 /* fall through */
820 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); 1018 case IB_QPT_UD:
821 if (err) { 1019 {
822 kfree(qp); 1020 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
1021 udata, 0, &qp);
1022 if (err)
823 return ERR_PTR(err); 1023 return ERR_PTR(err);
824 }
825 1024
826 qp->ibqp.qp_num = qp->mqp.qpn; 1025 qp->ibqp.qp_num = qp->mqp.qpn;
827 qp->xrcdn = xrcdn; 1026 qp->xrcdn = xrcdn;
@@ -835,21 +1034,11 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
835 if (udata) 1034 if (udata)
836 return ERR_PTR(-EINVAL); 1035 return ERR_PTR(-EINVAL);
837 1036
838 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
839 if (!sqp)
840 return ERR_PTR(-ENOMEM);
841
842 qp = &sqp->qp;
843
844 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 1037 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
845 to_mdev(pd->device)->dev->caps.sqp_start + 1038 get_sqp_num(to_mdev(pd->device), init_attr),
846 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + 1039 &qp);
847 init_attr->port_num - 1, 1040 if (err)
848 qp);
849 if (err) {
850 kfree(sqp);
851 return ERR_PTR(err); 1041 return ERR_PTR(err);
852 }
853 1042
854 qp->port = init_attr->port_num; 1043 qp->port = init_attr->port_num;
855 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 1044 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -884,18 +1073,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
884 return 0; 1073 return 0;
885} 1074}
886 1075
887static int to_mlx4_st(enum ib_qp_type type) 1076static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
888{ 1077{
889 switch (type) { 1078 switch (type) {
890 case IB_QPT_RC: return MLX4_QP_ST_RC; 1079 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
891 case IB_QPT_UC: return MLX4_QP_ST_UC; 1080 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
892 case IB_QPT_UD: return MLX4_QP_ST_UD; 1081 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
893 case IB_QPT_XRC_INI: 1082 case MLX4_IB_QPT_XRC_INI:
894 case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; 1083 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
895 case IB_QPT_SMI: 1084 case MLX4_IB_QPT_SMI:
896 case IB_QPT_GSI: 1085 case MLX4_IB_QPT_GSI:
897 case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; 1086 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
898 default: return -1; 1087
1088 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1089 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1090 MLX4_QP_ST_MLX : -1);
1091 case MLX4_IB_QPT_PROXY_SMI:
1092 case MLX4_IB_QPT_TUN_SMI:
1093 case MLX4_IB_QPT_PROXY_GSI:
1094 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1095 MLX4_QP_ST_UD : -1);
1096 default: return -1;
899 } 1097 }
900} 1098}
901 1099
@@ -1043,7 +1241,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1043 return -ENOMEM; 1241 return -ENOMEM;
1044 1242
1045 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 1243 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1046 (to_mlx4_st(ibqp->qp_type) << 16)); 1244 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1047 1245
1048 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 1246 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1049 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1247 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1121,13 +1319,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1121 } 1319 }
1122 1320
1123 if (attr_mask & IB_QP_PKEY_INDEX) { 1321 if (attr_mask & IB_QP_PKEY_INDEX) {
1322 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1323 context->pri_path.disable_pkey_check = 0x40;
1124 context->pri_path.pkey_index = attr->pkey_index; 1324 context->pri_path.pkey_index = attr->pkey_index;
1125 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1325 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1126 } 1326 }
1127 1327
1128 if (attr_mask & IB_QP_AV) { 1328 if (attr_mask & IB_QP_AV) {
1129 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 1329 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1130 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) 1330 attr_mask & IB_QP_PORT ?
1331 attr->port_num : qp->port))
1131 goto out; 1332 goto out;
1132 1333
1133 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1334 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1210,8 +1411,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1210 if (attr_mask & IB_QP_RQ_PSN) 1411 if (attr_mask & IB_QP_RQ_PSN)
1211 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1412 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1212 1413
1414 /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
1213 if (attr_mask & IB_QP_QKEY) { 1415 if (attr_mask & IB_QP_QKEY) {
1214 context->qkey = cpu_to_be32(attr->qkey); 1416 if (qp->mlx4_ib_qp_type &
1417 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1418 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1419 else {
1420 if (mlx4_is_mfunc(dev->dev) &&
1421 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1422 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1423 MLX4_RESERVED_QKEY_BASE) {
1424 pr_err("Cannot use reserved QKEY"
1425 " 0x%x (range 0xffff0000..0xffffffff"
1426 " is reserved)\n", attr->qkey);
1427 err = -EINVAL;
1428 goto out;
1429 }
1430 context->qkey = cpu_to_be32(attr->qkey);
1431 }
1215 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1432 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1216 } 1433 }
1217 1434
@@ -1227,10 +1444,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1227 ibqp->qp_type == IB_QPT_UD || 1444 ibqp->qp_type == IB_QPT_UD ||
1228 ibqp->qp_type == IB_QPT_RAW_PACKET)) { 1445 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1229 context->pri_path.sched_queue = (qp->port - 1) << 6; 1446 context->pri_path.sched_queue = (qp->port - 1) << 6;
1230 if (is_qp0(dev, qp)) 1447 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1448 qp->mlx4_ib_qp_type &
1449 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1231 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1450 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1232 else 1451 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1452 context->pri_path.fl = 0x80;
1453 } else {
1454 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1455 context->pri_path.fl = 0x80;
1233 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1456 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1457 }
1234 } 1458 }
1235 1459
1236 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1460 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1346,7 +1570,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1346 } 1570 }
1347 1571
1348 if ((attr_mask & IB_QP_PORT) && 1572 if ((attr_mask & IB_QP_PORT) &&
1349 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 1573 (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1350 pr_debug("qpn 0x%x: invalid port number (%d) specified " 1574 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1351 "for transition %d to %d. qp_type %d\n", 1575 "for transition %d to %d. qp_type %d\n",
1352 ibqp->qp_num, attr->port_num, cur_state, 1576 ibqp->qp_num, attr->port_num, cur_state,
@@ -1400,6 +1624,114 @@ out:
1400 return err; 1624 return err;
1401} 1625}
1402 1626
1627static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1628 struct ib_send_wr *wr,
1629 void *wqe, unsigned *mlx_seg_len)
1630{
1631 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1632 struct ib_device *ib_dev = &mdev->ib_dev;
1633 struct mlx4_wqe_mlx_seg *mlx = wqe;
1634 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1635 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1636 u16 pkey;
1637 u32 qkey;
1638 int send_size;
1639 int header_size;
1640 int spc;
1641 int i;
1642
1643 if (wr->opcode != IB_WR_SEND)
1644 return -EINVAL;
1645
1646 send_size = 0;
1647
1648 for (i = 0; i < wr->num_sge; ++i)
1649 send_size += wr->sg_list[i].length;
1650
1651 /* for proxy-qp0 sends, need to add in size of tunnel header */
1652 /* for tunnel-qp0 sends, tunnel header is already in s/g list */
1653 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1654 send_size += sizeof (struct mlx4_ib_tunnel_header);
1655
1656 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1657
1658 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1659 sqp->ud_header.lrh.service_level =
1660 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1661 sqp->ud_header.lrh.destination_lid =
1662 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1663 sqp->ud_header.lrh.source_lid =
1664 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1665 }
1666
1667 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1668
1669 /* force loopback */
1670 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1671 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1672
1673 sqp->ud_header.lrh.virtual_lane = 0;
1674 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1675 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1676 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1677 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1678 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1679 else
1680 sqp->ud_header.bth.destination_qpn =
1681 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
1682
1683 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1684 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1685 return -EINVAL;
1686 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1687 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1688
1689 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1690 sqp->ud_header.immediate_present = 0;
1691
1692 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1693
1694 /*
1695 * Inline data segments may not cross a 64 byte boundary. If
1696 * our UD header is bigger than the space available up to the
1697 * next 64 byte boundary in the WQE, use two inline data
1698 * segments to hold the UD header.
1699 */
1700 spc = MLX4_INLINE_ALIGN -
1701 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1702 if (header_size <= spc) {
1703 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1704 memcpy(inl + 1, sqp->header_buf, header_size);
1705 i = 1;
1706 } else {
1707 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1708 memcpy(inl + 1, sqp->header_buf, spc);
1709
1710 inl = (void *) (inl + 1) + spc;
1711 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1712 /*
1713 * Need a barrier here to make sure all the data is
1714 * visible before the byte_count field is set.
1715 * Otherwise the HCA prefetcher could grab the 64-byte
1716 * chunk with this inline segment and get a valid (!=
1717 * 0xffffffff) byte count but stale data, and end up
1718 * generating a packet with bad headers.
1719 *
1720 * The first inline segment's byte_count field doesn't
1721 * need a barrier, because it comes after a
1722 * control/MLX segment and therefore is at an offset
1723 * of 16 mod 64.
1724 */
1725 wmb();
1726 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1727 i = 2;
1728 }
1729
1730 *mlx_seg_len =
1731 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1732 return 0;
1733}
1734
1403static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1735static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1404 void *wqe, unsigned *mlx_seg_len) 1736 void *wqe, unsigned *mlx_seg_len)
1405{ 1737{
@@ -1418,6 +1750,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1418 int is_vlan = 0; 1750 int is_vlan = 0;
1419 int is_grh; 1751 int is_grh;
1420 u16 vlan; 1752 u16 vlan;
1753 int err = 0;
1421 1754
1422 send_size = 0; 1755 send_size = 0;
1423 for (i = 0; i < wr->num_sge; ++i) 1756 for (i = 0; i < wr->num_sge; ++i)
@@ -1426,8 +1759,24 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1426 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 1759 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1427 is_grh = mlx4_ib_ah_grh_present(ah); 1760 is_grh = mlx4_ib_ah_grh_present(ah);
1428 if (is_eth) { 1761 if (is_eth) {
1429 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1762 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1430 ah->av.ib.gid_index, &sgid); 1763 /* When multi-function is enabled, the ib_core gid
1764 * indexes don't necessarily match the hw ones, so
1765 * we must use our own cache */
1766 sgid.global.subnet_prefix =
1767 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1768 subnet_prefix;
1769 sgid.global.interface_id =
1770 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1771 guid_cache[ah->av.ib.gid_index];
1772 } else {
1773 err = ib_get_cached_gid(ib_dev,
1774 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1775 ah->av.ib.gid_index, &sgid);
1776 if (err)
1777 return err;
1778 }
1779
1431 vlan = rdma_get_vlan_id(&sgid); 1780 vlan = rdma_get_vlan_id(&sgid);
1432 is_vlan = vlan < 0x1000; 1781 is_vlan = vlan < 0x1000;
1433 } 1782 }
@@ -1446,8 +1795,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1446 sqp->ud_header.grh.flow_label = 1795 sqp->ud_header.grh.flow_label =
1447 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1796 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1448 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 1797 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1449 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1798 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1450 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); 1799 /* When multi-function is enabled, the ib_core gid
1800 * indexes don't necessarily match the hw ones, so
1801 * we must use our own cache */
1802 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1803 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1804 subnet_prefix;
1805 sqp->ud_header.grh.source_gid.global.interface_id =
1806 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1807 guid_cache[ah->av.ib.gid_index];
1808 } else
1809 ib_get_cached_gid(ib_dev,
1810 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1811 ah->av.ib.gid_index,
1812 &sqp->ud_header.grh.source_gid);
1451 memcpy(sqp->ud_header.grh.destination_gid.raw, 1813 memcpy(sqp->ud_header.grh.destination_gid.raw,
1452 ah->av.ib.dgid, 16); 1814 ah->av.ib.dgid, 16);
1453 } 1815 }
@@ -1459,6 +1821,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1459 (sqp->ud_header.lrh.destination_lid == 1821 (sqp->ud_header.lrh.destination_lid ==
1460 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1822 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1461 (sqp->ud_header.lrh.service_level << 8)); 1823 (sqp->ud_header.lrh.service_level << 8));
1824 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1825 mlx->flags |= cpu_to_be32(0x1); /* force loopback */
1462 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1826 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1463 } 1827 }
1464 1828
@@ -1667,6 +2031,63 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1667 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 2031 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
1668} 2032}
1669 2033
2034static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2035 struct mlx4_wqe_datagram_seg *dseg,
2036 struct ib_send_wr *wr, enum ib_qp_type qpt)
2037{
2038 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2039 struct mlx4_av sqp_av = {0};
2040 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2041
2042 /* force loopback */
2043 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2044 sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
2045 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2046 cpu_to_be32(0xf0000000);
2047
2048 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2049 /* This function used only for sending on QP1 proxies */
2050 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2051 /* Use QKEY from the QP context, which is set by master */
2052 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2053}
2054
2055static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2056{
2057 struct mlx4_wqe_inline_seg *inl = wqe;
2058 struct mlx4_ib_tunnel_header hdr;
2059 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2060 int spc;
2061 int i;
2062
2063 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2064 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2065 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2066 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2067
2068 spc = MLX4_INLINE_ALIGN -
2069 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2070 if (sizeof (hdr) <= spc) {
2071 memcpy(inl + 1, &hdr, sizeof (hdr));
2072 wmb();
2073 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2074 i = 1;
2075 } else {
2076 memcpy(inl + 1, &hdr, spc);
2077 wmb();
2078 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2079
2080 inl = (void *) (inl + 1) + spc;
2081 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2082 wmb();
2083 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2084 i = 2;
2085 }
2086
2087 *mlx_seg_len =
2088 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2089}
2090
1670static void set_mlx_icrc_seg(void *dseg) 2091static void set_mlx_icrc_seg(void *dseg)
1671{ 2092{
1672 u32 *t = dseg; 2093 u32 *t = dseg;
@@ -1748,6 +2169,13 @@ static __be32 send_ieth(struct ib_send_wr *wr)
1748 } 2169 }
1749} 2170}
1750 2171
2172static void add_zero_len_inline(void *wqe)
2173{
2174 struct mlx4_wqe_inline_seg *inl = wqe;
2175 memset(wqe, 0, 16);
2176 inl->byte_count = cpu_to_be32(1 << 31);
2177}
2178
1751int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2179int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1752 struct ib_send_wr **bad_wr) 2180 struct ib_send_wr **bad_wr)
1753{ 2181{
@@ -1806,9 +2234,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1806 wqe += sizeof *ctrl; 2234 wqe += sizeof *ctrl;
1807 size = sizeof *ctrl / 16; 2235 size = sizeof *ctrl / 16;
1808 2236
1809 switch (ibqp->qp_type) { 2237 switch (qp->mlx4_ib_qp_type) {
1810 case IB_QPT_RC: 2238 case MLX4_IB_QPT_RC:
1811 case IB_QPT_UC: 2239 case MLX4_IB_QPT_UC:
1812 switch (wr->opcode) { 2240 switch (wr->opcode) {
1813 case IB_WR_ATOMIC_CMP_AND_SWP: 2241 case IB_WR_ATOMIC_CMP_AND_SWP:
1814 case IB_WR_ATOMIC_FETCH_AND_ADD: 2242 case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -1869,7 +2297,25 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1869 } 2297 }
1870 break; 2298 break;
1871 2299
1872 case IB_QPT_UD: 2300 case MLX4_IB_QPT_TUN_SMI_OWNER:
2301 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2302 if (unlikely(err)) {
2303 *bad_wr = wr;
2304 goto out;
2305 }
2306 wqe += seglen;
2307 size += seglen / 16;
2308 break;
2309 case MLX4_IB_QPT_TUN_SMI:
2310 case MLX4_IB_QPT_TUN_GSI:
2311 /* this is a UD qp used in MAD responses to slaves. */
2312 set_datagram_seg(wqe, wr);
2313 /* set the forced-loopback bit in the data seg av */
2314 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2315 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2316 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2317 break;
2318 case MLX4_IB_QPT_UD:
1873 set_datagram_seg(wqe, wr); 2319 set_datagram_seg(wqe, wr);
1874 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2320 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1875 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2321 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
@@ -1886,8 +2332,47 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1886 } 2332 }
1887 break; 2333 break;
1888 2334
1889 case IB_QPT_SMI: 2335 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1890 case IB_QPT_GSI: 2336 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
2337 err = -ENOSYS;
2338 *bad_wr = wr;
2339 goto out;
2340 }
2341 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2342 if (unlikely(err)) {
2343 *bad_wr = wr;
2344 goto out;
2345 }
2346 wqe += seglen;
2347 size += seglen / 16;
2348 /* to start tunnel header on a cache-line boundary */
2349 add_zero_len_inline(wqe);
2350 wqe += 16;
2351 size++;
2352 build_tunnel_header(wr, wqe, &seglen);
2353 wqe += seglen;
2354 size += seglen / 16;
2355 break;
2356 case MLX4_IB_QPT_PROXY_SMI:
2357 /* don't allow QP0 sends on guests */
2358 err = -ENOSYS;
2359 *bad_wr = wr;
2360 goto out;
2361 case MLX4_IB_QPT_PROXY_GSI:
2362 /* If we are tunneling special qps, this is a UD qp.
2363 * In this case we first add a UD segment targeting
2364 * the tunnel qp, and then add a header with address
2365 * information */
2366 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2367 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2368 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2369 build_tunnel_header(wr, wqe, &seglen);
2370 wqe += seglen;
2371 size += seglen / 16;
2372 break;
2373
2374 case MLX4_IB_QPT_SMI:
2375 case MLX4_IB_QPT_GSI:
1891 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 2376 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
1892 if (unlikely(err)) { 2377 if (unlikely(err)) {
1893 *bad_wr = wr; 2378 *bad_wr = wr;
@@ -1913,8 +2398,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1913 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 2398 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
1914 2399
1915 /* Add one more inline data segment for ICRC for MLX sends */ 2400 /* Add one more inline data segment for ICRC for MLX sends */
1916 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || 2401 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1917 qp->ibqp.qp_type == IB_QPT_GSI)) { 2402 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2403 qp->mlx4_ib_qp_type &
2404 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
1918 set_mlx_icrc_seg(dseg + 1); 2405 set_mlx_icrc_seg(dseg + 1);
1919 size += sizeof (struct mlx4_wqe_data_seg) / 16; 2406 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1920 } 2407 }
@@ -2006,8 +2493,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2006 int err = 0; 2493 int err = 0;
2007 int nreq; 2494 int nreq;
2008 int ind; 2495 int ind;
2496 int max_gs;
2009 int i; 2497 int i;
2010 2498
2499 max_gs = qp->rq.max_gs;
2011 spin_lock_irqsave(&qp->rq.lock, flags); 2500 spin_lock_irqsave(&qp->rq.lock, flags);
2012 2501
2013 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 2502 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -2027,10 +2516,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2027 2516
2028 scat = get_recv_wqe(qp, ind); 2517 scat = get_recv_wqe(qp, ind);
2029 2518
2519 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2520 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2521 ib_dma_sync_single_for_device(ibqp->device,
2522 qp->sqp_proxy_rcv[ind].map,
2523 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2524 DMA_FROM_DEVICE);
2525 scat->byte_count =
2526 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2527 /* use dma lkey from upper layer entry */
2528 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2529 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2530 scat++;
2531 max_gs--;
2532 }
2533
2030 for (i = 0; i < wr->num_sge; ++i) 2534 for (i = 0; i < wr->num_sge; ++i)
2031 __set_data_seg(scat + i, wr->sg_list + i); 2535 __set_data_seg(scat + i, wr->sg_list + i);
2032 2536
2033 if (i < qp->rq.max_gs) { 2537 if (i < max_gs) {
2034 scat[i].byte_count = 0; 2538 scat[i].byte_count = 0;
2035 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 2539 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2036 scat[i].addr = 0; 2540 scat[i].addr = 0;
@@ -2225,6 +2729,10 @@ done:
2225 if (qp->flags & MLX4_IB_QP_LSO) 2729 if (qp->flags & MLX4_IB_QP_LSO)
2226 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 2730 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
2227 2731
2732 qp_init_attr->sq_sig_type =
2733 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
2734 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2735
2228out: 2736out:
2229 mutex_unlock(&qp->mutex); 2737 mutex_unlock(&qp->mutex);
2230 return err; 2738 return err;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
new file mode 100644
index 00000000000..5b2a01dfb90
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -0,0 +1,794 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*#include "core_priv.h"*/
34#include "mlx4_ib.h"
35#include <linux/slab.h>
36#include <linux/string.h>
37#include <linux/stat.h>
38
39#include <rdma/ib_mad.h>
40/*show_admin_alias_guid returns the administratively assigned value of that GUID.
41 * Values returned in buf parameter string:
42 * 0 - requests opensm to assign a value.
43 * ffffffffffffffff - delete this entry.
44 * other - value assigned by administrator.
45 */
46static ssize_t show_admin_alias_guid(struct device *dev,
47 struct device_attribute *attr, char *buf)
48{
49 int record_num;/*0-15*/
50 int guid_index_in_rec; /*0 - 7*/
51 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
52 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
53 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
54 struct mlx4_ib_dev *mdev = port->dev;
55
56 record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
57 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
58
59 return sprintf(buf, "%llx\n",
60 be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
61 ports_guid[port->num - 1].
62 all_rec_per_port[record_num].
63 all_recs[8 * guid_index_in_rec]));
64}
65
66/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
67 * Values in buf parameter string:
68 * 0 - requests opensm to assign a value.
69 * 0xffffffffffffffff - delete this entry.
70 * other - guid value assigned by the administrator.
71 */
72static ssize_t store_admin_alias_guid(struct device *dev,
73 struct device_attribute *attr,
74 const char *buf, size_t count)
75{
76 int record_num;/*0-15*/
77 int guid_index_in_rec; /*0 - 7*/
78 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
79 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
80 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
81 struct mlx4_ib_dev *mdev = port->dev;
82 u64 sysadmin_ag_val;
83
84 record_num = mlx4_ib_iov_dentry->entry_num / 8;
85 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
86 if (0 == record_num && 0 == guid_index_in_rec) {
87 pr_err("GUID 0 block 0 is RO\n");
88 return count;
89 }
90 sscanf(buf, "%llx", &sysadmin_ag_val);
91 *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
92 all_rec_per_port[record_num].
93 all_recs[GUID_REC_SIZE * guid_index_in_rec] =
94 cpu_to_be64(sysadmin_ag_val);
95
96 /* Change the state to be pending for update */
97 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
98 = MLX4_GUID_INFO_STATUS_IDLE ;
99
100 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
101 = MLX4_GUID_INFO_RECORD_SET;
102
103 switch (sysadmin_ag_val) {
104 case MLX4_GUID_FOR_DELETE_VAL:
105 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
106 = MLX4_GUID_INFO_RECORD_DELETE;
107 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
108 = MLX4_GUID_SYSADMIN_ASSIGN;
109 break;
110 /* The sysadmin requests the SM to re-assign */
111 case MLX4_NOT_SET_GUID:
112 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
113 = MLX4_GUID_DRIVER_ASSIGN;
114 break;
115 /* The sysadmin requests a specific value.*/
116 default:
117 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
118 = MLX4_GUID_SYSADMIN_ASSIGN;
119 break;
120 }
121
122 /* set the record index */
123 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
124 = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
125
126 mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
127
128 return count;
129}
130
131static ssize_t show_port_gid(struct device *dev,
132 struct device_attribute *attr,
133 char *buf)
134{
135 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
136 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
137 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
138 struct mlx4_ib_dev *mdev = port->dev;
139 union ib_gid gid;
140 ssize_t ret;
141
142 ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
143 mlx4_ib_iov_dentry->entry_num, &gid, 1);
144 if (ret)
145 return ret;
146 ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
147 be16_to_cpu(((__be16 *) gid.raw)[0]),
148 be16_to_cpu(((__be16 *) gid.raw)[1]),
149 be16_to_cpu(((__be16 *) gid.raw)[2]),
150 be16_to_cpu(((__be16 *) gid.raw)[3]),
151 be16_to_cpu(((__be16 *) gid.raw)[4]),
152 be16_to_cpu(((__be16 *) gid.raw)[5]),
153 be16_to_cpu(((__be16 *) gid.raw)[6]),
154 be16_to_cpu(((__be16 *) gid.raw)[7]));
155 return ret;
156}
157
158static ssize_t show_phys_port_pkey(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
163 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
164 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
165 struct mlx4_ib_dev *mdev = port->dev;
166 u16 pkey;
167 ssize_t ret;
168
169 ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
170 mlx4_ib_iov_dentry->entry_num, &pkey, 1);
171 if (ret)
172 return ret;
173
174 return sprintf(buf, "0x%04x\n", pkey);
175}
176
177#define DENTRY_REMOVE(_dentry) \
178do { \
179 sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
180} while (0);
181
182static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
183 char *_name, struct kobject *_kobj,
184 ssize_t (*show)(struct device *dev,
185 struct device_attribute *attr,
186 char *buf),
187 ssize_t (*store)(struct device *dev,
188 struct device_attribute *attr,
189 const char *buf, size_t count)
190 )
191{
192 int ret = 0;
193 struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
194
195 vdentry->ctx = _ctx;
196 vdentry->dentry.show = show;
197 vdentry->dentry.store = store;
198 sysfs_attr_init(&vdentry->dentry.attr);
199 vdentry->dentry.attr.name = vdentry->name;
200 vdentry->dentry.attr.mode = 0;
201 vdentry->kobj = _kobj;
202 snprintf(vdentry->name, 15, "%s", _name);
203
204 if (vdentry->dentry.store)
205 vdentry->dentry.attr.mode |= S_IWUSR;
206
207 if (vdentry->dentry.show)
208 vdentry->dentry.attr.mode |= S_IRUGO;
209
210 ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
211 if (ret) {
212 pr_err("failed to create %s\n", vdentry->dentry.attr.name);
213 vdentry->ctx = NULL;
214 return ret;
215 }
216
217 return ret;
218}
219
220int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
221 struct attribute *attr)
222{
223 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
224 int ret;
225
226 ret = sysfs_create_file(port->mcgs_parent, attr);
227 if (ret)
228 pr_err("failed to create %s\n", attr->name);
229
230 return ret;
231}
232
233void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
234 struct attribute *attr)
235{
236 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
237
238 sysfs_remove_file(port->mcgs_parent, attr);
239}
240
241static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
242{
243 int i;
244 char buff[10];
245 struct mlx4_ib_iov_port *port = NULL;
246 int ret = 0 ;
247 struct ib_port_attr attr;
248
249 /* get the physical gid and pkey table sizes.*/
250 ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
251 if (ret)
252 goto err;
253
254 port = &device->iov_ports[port_num - 1];
255 port->dev = device;
256 port->num = port_num;
257 /* Directory structure:
258 * iov -
259 * port num -
260 * admin_guids
261 * gids (operational)
262 * mcg_table
263 */
264 port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
265 GFP_KERNEL);
266 if (!port->dentr_ar) {
267 ret = -ENOMEM;
268 goto err;
269 }
270 sprintf(buff, "%d", port_num);
271 port->cur_port = kobject_create_and_add(buff,
272 kobject_get(device->ports_parent));
273 if (!port->cur_port) {
274 ret = -ENOMEM;
275 goto kobj_create_err;
276 }
277 /* admin GUIDs */
278 port->admin_alias_parent = kobject_create_and_add("admin_guids",
279 kobject_get(port->cur_port));
280 if (!port->admin_alias_parent) {
281 ret = -ENOMEM;
282 goto err_admin_guids;
283 }
284 for (i = 0 ; i < attr.gid_tbl_len; i++) {
285 sprintf(buff, "%d", i);
286 port->dentr_ar->dentries[i].entry_num = i;
287 ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
288 buff, port->admin_alias_parent,
289 show_admin_alias_guid, store_admin_alias_guid);
290 if (ret)
291 goto err_admin_alias_parent;
292 }
293
294 /* gids subdirectory (operational gids) */
295 port->gids_parent = kobject_create_and_add("gids",
296 kobject_get(port->cur_port));
297 if (!port->gids_parent) {
298 ret = -ENOMEM;
299 goto err_gids;
300 }
301
302 for (i = 0 ; i < attr.gid_tbl_len; i++) {
303 sprintf(buff, "%d", i);
304 port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
305 ret = create_sysfs_entry(port,
306 &port->dentr_ar->dentries[attr.gid_tbl_len + i],
307 buff,
308 port->gids_parent, show_port_gid, NULL);
309 if (ret)
310 goto err_gids_parent;
311 }
312
313 /* physical port pkey table */
314 port->pkeys_parent =
315 kobject_create_and_add("pkeys", kobject_get(port->cur_port));
316 if (!port->pkeys_parent) {
317 ret = -ENOMEM;
318 goto err_pkeys;
319 }
320
321 for (i = 0 ; i < attr.pkey_tbl_len; i++) {
322 sprintf(buff, "%d", i);
323 port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
324 ret = create_sysfs_entry(port,
325 &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
326 buff, port->pkeys_parent,
327 show_phys_port_pkey, NULL);
328 if (ret)
329 goto err_pkeys_parent;
330 }
331
332 /* MCGs table */
333 port->mcgs_parent =
334 kobject_create_and_add("mcgs", kobject_get(port->cur_port));
335 if (!port->mcgs_parent) {
336 ret = -ENOMEM;
337 goto err_mcgs;
338 }
339 return 0;
340
341err_mcgs:
342 kobject_put(port->cur_port);
343
344err_pkeys_parent:
345 kobject_put(port->pkeys_parent);
346
347err_pkeys:
348 kobject_put(port->cur_port);
349
350err_gids_parent:
351 kobject_put(port->gids_parent);
352
353err_gids:
354 kobject_put(port->cur_port);
355
356err_admin_alias_parent:
357 kobject_put(port->admin_alias_parent);
358
359err_admin_guids:
360 kobject_put(port->cur_port);
361 kobject_put(port->cur_port); /* once more for create_and_add buff */
362
363kobj_create_err:
364 kobject_put(device->ports_parent);
365 kfree(port->dentr_ar);
366
367err:
368 pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
369 port_num, ret);
370 return ret;
371}
372
373static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
374{
375 char base_name[9];
376
377 /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
378 strlcpy(name, pci_name(dev->dev->pdev), max);
379 strncpy(base_name, name, 8); /*till xxxx:yy:*/
380 base_name[8] = '\0';
381 /* with no ARI only 3 last bits are used so when the fn is higher than 8
382 * need to add it to the dev num, so count in the last number will be
383 * modulo 8 */
384 sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
385}
386
387struct mlx4_port {
388 struct kobject kobj;
389 struct mlx4_ib_dev *dev;
390 struct attribute_group pkey_group;
391 struct attribute_group gid_group;
392 u8 port_num;
393 int slave;
394};
395
396
397static void mlx4_port_release(struct kobject *kobj)
398{
399 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
400 struct attribute *a;
401 int i;
402
403 for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
404 kfree(a);
405 kfree(p->pkey_group.attrs);
406 for (i = 0; (a = p->gid_group.attrs[i]); ++i)
407 kfree(a);
408 kfree(p->gid_group.attrs);
409 kfree(p);
410}
411
412struct port_attribute {
413 struct attribute attr;
414 ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
415 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
416 const char *buf, size_t count);
417};
418
419static ssize_t port_attr_show(struct kobject *kobj,
420 struct attribute *attr, char *buf)
421{
422 struct port_attribute *port_attr =
423 container_of(attr, struct port_attribute, attr);
424 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
425
426 if (!port_attr->show)
427 return -EIO;
428 return port_attr->show(p, port_attr, buf);
429}
430
431static ssize_t port_attr_store(struct kobject *kobj,
432 struct attribute *attr,
433 const char *buf, size_t size)
434{
435 struct port_attribute *port_attr =
436 container_of(attr, struct port_attribute, attr);
437 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
438
439 if (!port_attr->store)
440 return -EIO;
441 return port_attr->store(p, port_attr, buf, size);
442}
443
444static const struct sysfs_ops port_sysfs_ops = {
445 .show = port_attr_show,
446 .store = port_attr_store,
447};
448
449static struct kobj_type port_type = {
450 .release = mlx4_port_release,
451 .sysfs_ops = &port_sysfs_ops,
452};
453
454struct port_table_attribute {
455 struct port_attribute attr;
456 char name[8];
457 int index;
458};
459
460static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
461 char *buf)
462{
463 struct port_table_attribute *tab_attr =
464 container_of(attr, struct port_table_attribute, attr);
465 ssize_t ret = -ENODEV;
466
467 if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
468 (p->dev->dev->caps.pkey_table_len[p->port_num]))
469 ret = sprintf(buf, "none\n");
470 else
471 ret = sprintf(buf, "%d\n",
472 p->dev->pkeys.virt2phys_pkey[p->slave]
473 [p->port_num - 1][tab_attr->index]);
474 return ret;
475}
476
477static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
478 const char *buf, size_t count)
479{
480 struct port_table_attribute *tab_attr =
481 container_of(attr, struct port_table_attribute, attr);
482 int idx;
483 int err;
484
485 /* do not allow remapping Dom0 virtual pkey table */
486 if (p->slave == mlx4_master_func_num(p->dev->dev))
487 return -EINVAL;
488
489 if (!strncasecmp(buf, "no", 2))
490 idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
491 else if (sscanf(buf, "%i", &idx) != 1 ||
492 idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
493 idx < 0)
494 return -EINVAL;
495
496 p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
497 [tab_attr->index] = idx;
498 mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
499 tab_attr->index, idx);
500 err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
501 if (err) {
502 pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
503 " port %d, index %d\n", p->slave, p->port_num, idx);
504 return err;
505 }
506 return count;
507}
508
509static ssize_t show_port_gid_idx(struct mlx4_port *p,
510 struct port_attribute *attr, char *buf)
511{
512 return sprintf(buf, "%d\n", p->slave);
513}
514
515static struct attribute **
516alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
517 struct port_attribute *, char *buf),
518 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
519 const char *buf, size_t count),
520 int len)
521{
522 struct attribute **tab_attr;
523 struct port_table_attribute *element;
524 int i;
525
526 tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
527 if (!tab_attr)
528 return NULL;
529
530 for (i = 0; i < len; i++) {
531 element = kzalloc(sizeof (struct port_table_attribute),
532 GFP_KERNEL);
533 if (!element)
534 goto err;
535 if (snprintf(element->name, sizeof (element->name),
536 "%d", i) >= sizeof (element->name)) {
537 kfree(element);
538 goto err;
539 }
540 sysfs_attr_init(&element->attr.attr);
541 element->attr.attr.name = element->name;
542 if (store) {
543 element->attr.attr.mode = S_IWUSR | S_IRUGO;
544 element->attr.store = store;
545 } else
546 element->attr.attr.mode = S_IRUGO;
547
548 element->attr.show = show;
549 element->index = i;
550 tab_attr[i] = &element->attr.attr;
551 }
552 return tab_attr;
553
554err:
555 while (--i >= 0)
556 kfree(tab_attr[i]);
557 kfree(tab_attr);
558 return NULL;
559}
560
561static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
562{
563 struct mlx4_port *p;
564 int i;
565 int ret;
566
567 p = kzalloc(sizeof *p, GFP_KERNEL);
568 if (!p)
569 return -ENOMEM;
570
571 p->dev = dev;
572 p->port_num = port_num;
573 p->slave = slave;
574
575 ret = kobject_init_and_add(&p->kobj, &port_type,
576 kobject_get(dev->dev_ports_parent[slave]),
577 "%d", port_num);
578 if (ret)
579 goto err_alloc;
580
581 p->pkey_group.name = "pkey_idx";
582 p->pkey_group.attrs =
583 alloc_group_attrs(show_port_pkey, store_port_pkey,
584 dev->dev->caps.pkey_table_len[port_num]);
585 if (!p->pkey_group.attrs)
586 goto err_alloc;
587
588 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
589 if (ret)
590 goto err_free_pkey;
591
592 p->gid_group.name = "gid_idx";
593 p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
594 if (!p->gid_group.attrs)
595 goto err_free_pkey;
596
597 ret = sysfs_create_group(&p->kobj, &p->gid_group);
598 if (ret)
599 goto err_free_gid;
600
601 list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
602 return 0;
603
604err_free_gid:
605 kfree(p->gid_group.attrs[0]);
606 kfree(p->gid_group.attrs);
607
608err_free_pkey:
609 for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
610 kfree(p->pkey_group.attrs[i]);
611 kfree(p->pkey_group.attrs);
612
613err_alloc:
614 kobject_put(dev->dev_ports_parent[slave]);
615 kfree(p);
616 return ret;
617}
618
619static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
620{
621 char name[32];
622 int err;
623 int port;
624 struct kobject *p, *t;
625 struct mlx4_port *mport;
626
627 get_name(dev, name, slave, sizeof name);
628
629 dev->pkeys.device_parent[slave] =
630 kobject_create_and_add(name, kobject_get(dev->iov_parent));
631
632 if (!dev->pkeys.device_parent[slave]) {
633 err = -ENOMEM;
634 goto fail_dev;
635 }
636
637 INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
638
639 dev->dev_ports_parent[slave] =
640 kobject_create_and_add("ports",
641 kobject_get(dev->pkeys.device_parent[slave]));
642
643 if (!dev->dev_ports_parent[slave]) {
644 err = -ENOMEM;
645 goto err_ports;
646 }
647
648 for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
649 err = add_port(dev, port, slave);
650 if (err)
651 goto err_add;
652 }
653 return 0;
654
655err_add:
656 list_for_each_entry_safe(p, t,
657 &dev->pkeys.pkey_port_list[slave],
658 entry) {
659 list_del(&p->entry);
660 mport = container_of(p, struct mlx4_port, kobj);
661 sysfs_remove_group(p, &mport->pkey_group);
662 sysfs_remove_group(p, &mport->gid_group);
663 kobject_put(p);
664 }
665 kobject_put(dev->dev_ports_parent[slave]);
666
667err_ports:
668 kobject_put(dev->pkeys.device_parent[slave]);
669 /* extra put for the device_parent create_and_add */
670 kobject_put(dev->pkeys.device_parent[slave]);
671
672fail_dev:
673 kobject_put(dev->iov_parent);
674 return err;
675}
676
677static int register_pkey_tree(struct mlx4_ib_dev *device)
678{
679 int i;
680
681 if (!mlx4_is_master(device->dev))
682 return 0;
683
684 for (i = 0; i <= device->dev->num_vfs; ++i)
685 register_one_pkey_tree(device, i);
686
687 return 0;
688}
689
690static void unregister_pkey_tree(struct mlx4_ib_dev *device)
691{
692 int slave;
693 struct kobject *p, *t;
694 struct mlx4_port *port;
695
696 if (!mlx4_is_master(device->dev))
697 return;
698
699 for (slave = device->dev->num_vfs; slave >= 0; --slave) {
700 list_for_each_entry_safe(p, t,
701 &device->pkeys.pkey_port_list[slave],
702 entry) {
703 list_del(&p->entry);
704 port = container_of(p, struct mlx4_port, kobj);
705 sysfs_remove_group(p, &port->pkey_group);
706 sysfs_remove_group(p, &port->gid_group);
707 kobject_put(p);
708 kobject_put(device->dev_ports_parent[slave]);
709 }
710 kobject_put(device->dev_ports_parent[slave]);
711 kobject_put(device->pkeys.device_parent[slave]);
712 kobject_put(device->pkeys.device_parent[slave]);
713 kobject_put(device->iov_parent);
714 }
715}
716
717int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
718{
719 int i;
720 int ret = 0;
721
722 if (!mlx4_is_master(dev->dev))
723 return 0;
724
725 dev->iov_parent =
726 kobject_create_and_add("iov",
727 kobject_get(dev->ib_dev.ports_parent->parent));
728 if (!dev->iov_parent) {
729 ret = -ENOMEM;
730 goto err;
731 }
732 dev->ports_parent =
733 kobject_create_and_add("ports",
734 kobject_get(dev->iov_parent));
735 if (!dev->iov_parent) {
736 ret = -ENOMEM;
737 goto err_ports;
738 }
739
740 for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
741 ret = add_port_entries(dev, i);
742 if (ret)
743 goto err_add_entries;
744 }
745
746 ret = register_pkey_tree(dev);
747 if (ret)
748 goto err_add_entries;
749 return 0;
750
751err_add_entries:
752 kobject_put(dev->ports_parent);
753
754err_ports:
755 kobject_put(dev->iov_parent);
756err:
757 kobject_put(dev->ib_dev.ports_parent->parent);
758 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
759 return ret;
760}
761
762static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
763{
764 struct mlx4_ib_iov_port *p;
765 int i;
766
767 if (!mlx4_is_master(device->dev))
768 return;
769
770 for (i = 0; i < device->dev->caps.num_ports; i++) {
771 p = &device->iov_ports[i];
772 kobject_put(p->admin_alias_parent);
773 kobject_put(p->gids_parent);
774 kobject_put(p->pkeys_parent);
775 kobject_put(p->mcgs_parent);
776 kobject_put(p->cur_port);
777 kobject_put(p->cur_port);
778 kobject_put(p->cur_port);
779 kobject_put(p->cur_port);
780 kobject_put(p->cur_port);
781 kobject_put(p->dev->ports_parent);
782 kfree(p->dentr_ar);
783 }
784}
785
786void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
787{
788 unregister_alias_guid_tree(device);
789 unregister_pkey_tree(device);
790 kobject_put(device->ports_parent);
791 kobject_put(device->iov_parent);
792 kobject_put(device->iov_parent);
793 kobject_put(device->ib_dev.ports_parent->parent);
794}
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 145da404088..d39e0183ff8 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -285,7 +285,6 @@ struct qib_base_info {
285 285
286#ifndef QIB_KERN_TYPE 286#ifndef QIB_KERN_TYPE
287#define QIB_KERN_TYPE 0 287#define QIB_KERN_TYPE 0
288#define QIB_IDSTR "QLogic kernel.org driver"
289#endif 288#endif
290 289
291/* 290/*
@@ -302,6 +301,19 @@ struct qib_base_info {
302#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) 301#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
303 302
304/* 303/*
304 * Define the driver version number. This is something that refers only
305 * to the driver itself, not the software interfaces it supports.
306 */
307#define QIB_DRIVER_VERSION_BASE "1.11"
308
309/* create the final driver version string */
310#ifdef QIB_IDSTR
311#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
312#else
313#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
314#endif
315
316/*
305 * If the unit is specified via open, HCA choice is fixed. If port is 317 * If the unit is specified via open, HCA choice is fixed. If port is
306 * specified, it's also fixed. Otherwise we try to spread contexts 318 * specified, it's also fixed. Otherwise we try to spread contexts
307 * across ports and HCAs, using different algorithims. WITHIN is 319 * across ports and HCAs, using different algorithims. WITHIN is
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index e41e7f7fc76..5423edcab51 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -46,7 +46,7 @@
46 * The size has to be longer than this string, so we can append 46 * The size has to be longer than this string, so we can append
47 * board/chip information to it in the init code. 47 * board/chip information to it in the init code.
48 */ 48 */
49const char ib_qib_version[] = QIB_IDSTR "\n"; 49const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
50 50
51DEFINE_SPINLOCK(qib_devs_lock); 51DEFINE_SPINLOCK(qib_devs_lock);
52LIST_HEAD(qib_dev_list); 52LIST_HEAD(qib_dev_list);
@@ -65,6 +65,7 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
65MODULE_LICENSE("Dual BSD/GPL"); 65MODULE_LICENSE("Dual BSD/GPL");
66MODULE_AUTHOR("QLogic <support@qlogic.com>"); 66MODULE_AUTHOR("QLogic <support@qlogic.com>");
67MODULE_DESCRIPTION("QLogic IB driver"); 67MODULE_DESCRIPTION("QLogic IB driver");
68MODULE_VERSION(QIB_DRIVER_VERSION);
68 69
69/* 70/*
70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our 71 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index e9486c74c22..81c7b73695d 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -186,8 +186,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
186 goto bail; 186 goto bail;
187 187
188 off = sge->addr - mr->user_base; 188 off = sge->addr - mr->user_base;
189 if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || 189 if (unlikely(sge->addr < mr->user_base ||
190 (mr->access_flags & acc) == 0)) 190 off + sge->length > mr->length ||
191 (mr->access_flags & acc) != acc))
191 goto bail; 192 goto bail;
192 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 193 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
193 goto bail; 194 goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fc9b205c241..ba51a4715a1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -2224,7 +2224,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
2224 ibdev->dma_ops = &qib_dma_mapping_ops; 2224 ibdev->dma_ops = &qib_dma_mapping_ops;
2225 2225
2226 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), 2226 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
2227 QIB_IDSTR " %s", init_utsname()->nodename); 2227 "QLogic Infiniband HCA %s", init_utsname()->nodename);
2228 2228
2229 ret = ib_register_device(ibdev, qib_create_port_files); 2229 ret = ib_register_device(ibdev, qib_create_port_files);
2230 if (ret) 2230 if (ret)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 1e19b5ae7c4..ea0dfc77a7f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -150,7 +150,7 @@ static int ipoib_stop(struct net_device *dev)
150 150
151 netif_stop_queue(dev); 151 netif_stop_queue(dev);
152 152
153 ipoib_ib_dev_down(dev, 0); 153 ipoib_ib_dev_down(dev, 1);
154 ipoib_ib_dev_stop(dev, 0); 154 ipoib_ib_dev_stop(dev, 0);
155 155
156 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 156 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 75367249f44..cecb98a4c66 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -175,7 +175,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
175 175
176 mcast->mcmember = *mcmember; 176 mcast->mcmember = *mcmember;
177 177
178 /* Set the cached Q_Key before we attach if it's the broadcast group */ 178 /* Set the multicast MTU and cached Q_Key before we attach if it's
179 * the broadcast group.
180 */
179 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 181 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
180 sizeof (union ib_gid))) { 182 sizeof (union ib_gid))) {
181 spin_lock_irq(&priv->lock); 183 spin_lock_irq(&priv->lock);
@@ -183,10 +185,17 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
183 spin_unlock_irq(&priv->lock); 185 spin_unlock_irq(&priv->lock);
184 return -EAGAIN; 186 return -EAGAIN;
185 } 187 }
188 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
186 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 189 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
187 spin_unlock_irq(&priv->lock); 190 spin_unlock_irq(&priv->lock);
188 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
189 set_qkey = 1; 192 set_qkey = 1;
193
194 if (!ipoib_cm_admin_enabled(dev)) {
195 rtnl_lock();
196 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
197 rtnl_unlock();
198 }
190 } 199 }
191 200
192 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 201 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -574,14 +583,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
574 return; 583 return;
575 } 584 }
576 585
577 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
578
579 if (!ipoib_cm_admin_enabled(dev)) {
580 rtnl_lock();
581 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
582 rtnl_unlock();
583 }
584
585 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 586 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
586 587
587 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 588 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1b5b0c73005..922d845f76b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -638,9 +638,9 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
638 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 638 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
639 639
640 if (scmnd) { 640 if (scmnd) {
641 srp_free_req(target, req, scmnd, 0);
641 scmnd->result = DID_RESET << 16; 642 scmnd->result = DID_RESET << 16;
642 scmnd->scsi_done(scmnd); 643 scmnd->scsi_done(scmnd);
643 srp_free_req(target, req, scmnd, 0);
644 } 644 }
645} 645}
646 646
@@ -1687,6 +1687,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1687 SRP_TSK_ABORT_TASK); 1687 SRP_TSK_ABORT_TASK);
1688 srp_free_req(target, req, scmnd, 0); 1688 srp_free_req(target, req, scmnd, 0);
1689 scmnd->result = DID_ABORT << 16; 1689 scmnd->result = DID_ABORT << 16;
1690 scmnd->scsi_done(scmnd);
1690 1691
1691 return SUCCESS; 1692 return SUCCESS;
1692} 1693}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c8fef435302..3d1899ff107 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -40,6 +40,7 @@
40 40
41#include <linux/mlx4/cmd.h> 41#include <linux/mlx4/cmd.h>
42#include <linux/semaphore.h> 42#include <linux/semaphore.h>
43#include <rdma/ib_smi.h>
43 44
44#include <asm/io.h> 45#include <asm/io.h>
45 46
@@ -394,7 +395,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
394 struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; 395 struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
395 int ret; 396 int ret;
396 397
397 down(&priv->cmd.slave_sem); 398 mutex_lock(&priv->cmd.slave_cmd_mutex);
399
398 vhcr->in_param = cpu_to_be64(in_param); 400 vhcr->in_param = cpu_to_be64(in_param);
399 vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; 401 vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
400 vhcr->in_modifier = cpu_to_be32(in_modifier); 402 vhcr->in_modifier = cpu_to_be32(in_modifier);
@@ -402,6 +404,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
402 vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); 404 vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
403 vhcr->status = 0; 405 vhcr->status = 0;
404 vhcr->flags = !!(priv->cmd.use_events) << 6; 406 vhcr->flags = !!(priv->cmd.use_events) << 6;
407
405 if (mlx4_is_master(dev)) { 408 if (mlx4_is_master(dev)) {
406 ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); 409 ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
407 if (!ret) { 410 if (!ret) {
@@ -438,7 +441,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
438 mlx4_err(dev, "failed execution of VHCR_POST command" 441 mlx4_err(dev, "failed execution of VHCR_POST command"
439 "opcode 0x%x\n", op); 442 "opcode 0x%x\n", op);
440 } 443 }
441 up(&priv->cmd.slave_sem); 444
445 mutex_unlock(&priv->cmd.slave_cmd_mutex);
442 return ret; 446 return ret;
443} 447}
444 448
@@ -627,6 +631,162 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
627 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 631 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
628} 632}
629 633
634static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
635 struct mlx4_cmd_mailbox *inbox,
636 struct mlx4_cmd_mailbox *outbox)
637{
638 struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
639 struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
640 int err;
641 int i;
642
643 if (index & 0x1f)
644 return -EINVAL;
645
646 in_mad->attr_mod = cpu_to_be32(index / 32);
647
648 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
649 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
650 MLX4_CMD_NATIVE);
651 if (err)
652 return err;
653
654 for (i = 0; i < 32; ++i)
655 pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
656
657 return err;
658}
659
660static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
661 struct mlx4_cmd_mailbox *inbox,
662 struct mlx4_cmd_mailbox *outbox)
663{
664 int i;
665 int err;
666
667 for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
668 err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
669 if (err)
670 return err;
671 }
672
673 return 0;
674}
675#define PORT_CAPABILITY_LOCATION_IN_SMP 20
676#define PORT_STATE_OFFSET 32
677
678static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
679{
680 if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
681 return IB_PORT_ACTIVE;
682 else
683 return IB_PORT_DOWN;
684}
685
686static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
687 struct mlx4_vhcr *vhcr,
688 struct mlx4_cmd_mailbox *inbox,
689 struct mlx4_cmd_mailbox *outbox,
690 struct mlx4_cmd_info *cmd)
691{
692 struct ib_smp *smp = inbox->buf;
693 u32 index;
694 u8 port;
695 u16 *table;
696 int err;
697 int vidx, pidx;
698 struct mlx4_priv *priv = mlx4_priv(dev);
699 struct ib_smp *outsmp = outbox->buf;
700 __be16 *outtab = (__be16 *)(outsmp->data);
701 __be32 slave_cap_mask;
702 __be64 slave_node_guid;
703 port = vhcr->in_modifier;
704
705 if (smp->base_version == 1 &&
706 smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
707 smp->class_version == 1) {
708 if (smp->method == IB_MGMT_METHOD_GET) {
709 if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
710 index = be32_to_cpu(smp->attr_mod);
711 if (port < 1 || port > dev->caps.num_ports)
712 return -EINVAL;
713 table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
714 if (!table)
715 return -ENOMEM;
716 /* need to get the full pkey table because the paravirtualized
717 * pkeys may be scattered among several pkey blocks.
718 */
719 err = get_full_pkey_table(dev, port, table, inbox, outbox);
720 if (!err) {
721 for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
722 pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
723 outtab[vidx % 32] = cpu_to_be16(table[pidx]);
724 }
725 }
726 kfree(table);
727 return err;
728 }
729 if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
730 /*get the slave specific caps:*/
731 /*do the command */
732 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
733 vhcr->in_modifier, vhcr->op_modifier,
734 vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
735 /* modify the response for slaves */
736 if (!err && slave != mlx4_master_func_num(dev)) {
737 u8 *state = outsmp->data + PORT_STATE_OFFSET;
738
739 *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
740 slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
741 memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
742 }
743 return err;
744 }
745 if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
746 /* compute slave's gid block */
747 smp->attr_mod = cpu_to_be32(slave / 8);
748 /* execute cmd */
749 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
750 vhcr->in_modifier, vhcr->op_modifier,
751 vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
752 if (!err) {
753 /* if needed, move slave gid to index 0 */
754 if (slave % 8)
755 memcpy(outsmp->data,
756 outsmp->data + (slave % 8) * 8, 8);
757 /* delete all other gids */
758 memset(outsmp->data + 8, 0, 56);
759 }
760 return err;
761 }
762 if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
763 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
764 vhcr->in_modifier, vhcr->op_modifier,
765 vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
766 if (!err) {
767 slave_node_guid = mlx4_get_slave_node_guid(dev, slave);
768 memcpy(outsmp->data + 12, &slave_node_guid, 8);
769 }
770 return err;
771 }
772 }
773 }
774 if (slave != mlx4_master_func_num(dev) &&
775 ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
776 (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
777 smp->method == IB_MGMT_METHOD_SET))) {
778 mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
779 "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
780 slave, smp->method, smp->mgmt_class,
781 be16_to_cpu(smp->attr_id));
782 return -EPERM;
783 }
784 /*default:*/
785 return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
786 vhcr->in_modifier, vhcr->op_modifier,
787 vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
788}
789
630int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, 790int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
631 struct mlx4_vhcr *vhcr, 791 struct mlx4_vhcr *vhcr,
632 struct mlx4_cmd_mailbox *inbox, 792 struct mlx4_cmd_mailbox *inbox,
@@ -950,7 +1110,7 @@ static struct mlx4_cmd_info cmd_info[] = {
950 .out_is_imm = false, 1110 .out_is_imm = false,
951 .encode_slave_id = false, 1111 .encode_slave_id = false,
952 .verify = NULL, 1112 .verify = NULL,
953 .wrapper = mlx4_GEN_QP_wrapper 1113 .wrapper = mlx4_INIT2INIT_QP_wrapper
954 }, 1114 },
955 { 1115 {
956 .opcode = MLX4_CMD_INIT2RTR_QP, 1116 .opcode = MLX4_CMD_INIT2RTR_QP,
@@ -968,7 +1128,7 @@ static struct mlx4_cmd_info cmd_info[] = {
968 .out_is_imm = false, 1128 .out_is_imm = false,
969 .encode_slave_id = false, 1129 .encode_slave_id = false,
970 .verify = NULL, 1130 .verify = NULL,
971 .wrapper = mlx4_GEN_QP_wrapper 1131 .wrapper = mlx4_RTR2RTS_QP_wrapper
972 }, 1132 },
973 { 1133 {
974 .opcode = MLX4_CMD_RTS2RTS_QP, 1134 .opcode = MLX4_CMD_RTS2RTS_QP,
@@ -977,7 +1137,7 @@ static struct mlx4_cmd_info cmd_info[] = {
977 .out_is_imm = false, 1137 .out_is_imm = false,
978 .encode_slave_id = false, 1138 .encode_slave_id = false,
979 .verify = NULL, 1139 .verify = NULL,
980 .wrapper = mlx4_GEN_QP_wrapper 1140 .wrapper = mlx4_RTS2RTS_QP_wrapper
981 }, 1141 },
982 { 1142 {
983 .opcode = MLX4_CMD_SQERR2RTS_QP, 1143 .opcode = MLX4_CMD_SQERR2RTS_QP,
@@ -986,7 +1146,7 @@ static struct mlx4_cmd_info cmd_info[] = {
986 .out_is_imm = false, 1146 .out_is_imm = false,
987 .encode_slave_id = false, 1147 .encode_slave_id = false,
988 .verify = NULL, 1148 .verify = NULL,
989 .wrapper = mlx4_GEN_QP_wrapper 1149 .wrapper = mlx4_SQERR2RTS_QP_wrapper
990 }, 1150 },
991 { 1151 {
992 .opcode = MLX4_CMD_2ERR_QP, 1152 .opcode = MLX4_CMD_2ERR_QP,
@@ -1013,7 +1173,7 @@ static struct mlx4_cmd_info cmd_info[] = {
1013 .out_is_imm = false, 1173 .out_is_imm = false,
1014 .encode_slave_id = false, 1174 .encode_slave_id = false,
1015 .verify = NULL, 1175 .verify = NULL,
1016 .wrapper = mlx4_GEN_QP_wrapper 1176 .wrapper = mlx4_SQD2SQD_QP_wrapper
1017 }, 1177 },
1018 { 1178 {
1019 .opcode = MLX4_CMD_SQD2RTS_QP, 1179 .opcode = MLX4_CMD_SQD2RTS_QP,
@@ -1022,7 +1182,7 @@ static struct mlx4_cmd_info cmd_info[] = {
1022 .out_is_imm = false, 1182 .out_is_imm = false,
1023 .encode_slave_id = false, 1183 .encode_slave_id = false,
1024 .verify = NULL, 1184 .verify = NULL,
1025 .wrapper = mlx4_GEN_QP_wrapper 1185 .wrapper = mlx4_SQD2RTS_QP_wrapper
1026 }, 1186 },
1027 { 1187 {
1028 .opcode = MLX4_CMD_2RST_QP, 1188 .opcode = MLX4_CMD_2RST_QP,
@@ -1061,6 +1221,24 @@ static struct mlx4_cmd_info cmd_info[] = {
1061 .wrapper = mlx4_GEN_QP_wrapper 1221 .wrapper = mlx4_GEN_QP_wrapper
1062 }, 1222 },
1063 { 1223 {
1224 .opcode = MLX4_CMD_CONF_SPECIAL_QP,
1225 .has_inbox = false,
1226 .has_outbox = false,
1227 .out_is_imm = false,
1228 .encode_slave_id = false,
1229 .verify = NULL, /* XXX verify: only demux can do this */
1230 .wrapper = NULL
1231 },
1232 {
1233 .opcode = MLX4_CMD_MAD_IFC,
1234 .has_inbox = true,
1235 .has_outbox = true,
1236 .out_is_imm = false,
1237 .encode_slave_id = false,
1238 .verify = NULL,
1239 .wrapper = mlx4_MAD_IFC_wrapper
1240 },
1241 {
1064 .opcode = MLX4_CMD_QUERY_IF_STAT, 1242 .opcode = MLX4_CMD_QUERY_IF_STAT,
1065 .has_inbox = false, 1243 .has_inbox = false,
1066 .has_outbox = true, 1244 .has_outbox = true,
@@ -1340,6 +1518,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1340 if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) 1518 if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
1341 goto inform_slave_state; 1519 goto inform_slave_state;
1342 1520
1521 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
1522
1343 /* write the version in the event field */ 1523 /* write the version in the event field */
1344 reply |= mlx4_comm_get_version(); 1524 reply |= mlx4_comm_get_version();
1345 1525
@@ -1376,19 +1556,21 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1376 goto reset_slave; 1556 goto reset_slave;
1377 slave_state[slave].vhcr_dma |= param; 1557 slave_state[slave].vhcr_dma |= param;
1378 slave_state[slave].active = true; 1558 slave_state[slave].active = true;
1559 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
1379 break; 1560 break;
1380 case MLX4_COMM_CMD_VHCR_POST: 1561 case MLX4_COMM_CMD_VHCR_POST:
1381 if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && 1562 if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
1382 (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) 1563 (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
1383 goto reset_slave; 1564 goto reset_slave;
1384 down(&priv->cmd.slave_sem); 1565
1566 mutex_lock(&priv->cmd.slave_cmd_mutex);
1385 if (mlx4_master_process_vhcr(dev, slave, NULL)) { 1567 if (mlx4_master_process_vhcr(dev, slave, NULL)) {
1386 mlx4_err(dev, "Failed processing vhcr for slave:%d," 1568 mlx4_err(dev, "Failed processing vhcr for slave:%d,"
1387 " resetting slave.\n", slave); 1569 " resetting slave.\n", slave);
1388 up(&priv->cmd.slave_sem); 1570 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1389 goto reset_slave; 1571 goto reset_slave;
1390 } 1572 }
1391 up(&priv->cmd.slave_sem); 1573 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1392 break; 1574 break;
1393 default: 1575 default:
1394 mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); 1576 mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
@@ -1529,14 +1711,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1529 struct mlx4_slave_state *s_state; 1711 struct mlx4_slave_state *s_state;
1530 int i, j, err, port; 1712 int i, j, err, port;
1531 1713
1532 priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
1533 &priv->mfunc.vhcr_dma,
1534 GFP_KERNEL);
1535 if (!priv->mfunc.vhcr) {
1536 mlx4_err(dev, "Couldn't allocate vhcr.\n");
1537 return -ENOMEM;
1538 }
1539
1540 if (mlx4_is_master(dev)) 1714 if (mlx4_is_master(dev))
1541 priv->mfunc.comm = 1715 priv->mfunc.comm =
1542 ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + 1716 ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
@@ -1590,6 +1764,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1590 INIT_WORK(&priv->mfunc.master.slave_flr_event_work, 1764 INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
1591 mlx4_master_handle_slave_flr); 1765 mlx4_master_handle_slave_flr);
1592 spin_lock_init(&priv->mfunc.master.slave_state_lock); 1766 spin_lock_init(&priv->mfunc.master.slave_state_lock);
1767 spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
1593 priv->mfunc.master.comm_wq = 1768 priv->mfunc.master.comm_wq =
1594 create_singlethread_workqueue("mlx4_comm"); 1769 create_singlethread_workqueue("mlx4_comm");
1595 if (!priv->mfunc.master.comm_wq) 1770 if (!priv->mfunc.master.comm_wq)
@@ -1598,7 +1773,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1598 if (mlx4_init_resource_tracker(dev)) 1773 if (mlx4_init_resource_tracker(dev))
1599 goto err_thread; 1774 goto err_thread;
1600 1775
1601 sema_init(&priv->cmd.slave_sem, 1);
1602 err = mlx4_ARM_COMM_CHANNEL(dev); 1776 err = mlx4_ARM_COMM_CHANNEL(dev);
1603 if (err) { 1777 if (err) {
1604 mlx4_err(dev, " Failed to arm comm channel eq: %x\n", 1778 mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
@@ -1612,8 +1786,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1612 mlx4_err(dev, "Couldn't sync toggles\n"); 1786 mlx4_err(dev, "Couldn't sync toggles\n");
1613 goto err_comm; 1787 goto err_comm;
1614 } 1788 }
1615
1616 sema_init(&priv->cmd.slave_sem, 1);
1617 } 1789 }
1618 return 0; 1790 return 0;
1619 1791
@@ -1643,6 +1815,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
1643 struct mlx4_priv *priv = mlx4_priv(dev); 1815 struct mlx4_priv *priv = mlx4_priv(dev);
1644 1816
1645 mutex_init(&priv->cmd.hcr_mutex); 1817 mutex_init(&priv->cmd.hcr_mutex);
1818 mutex_init(&priv->cmd.slave_cmd_mutex);
1646 sema_init(&priv->cmd.poll_sem, 1); 1819 sema_init(&priv->cmd.poll_sem, 1);
1647 priv->cmd.use_events = 0; 1820 priv->cmd.use_events = 0;
1648 priv->cmd.toggle = 1; 1821 priv->cmd.toggle = 1;
@@ -1659,14 +1832,30 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
1659 } 1832 }
1660 } 1833 }
1661 1834
1835 if (mlx4_is_mfunc(dev)) {
1836 priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
1837 &priv->mfunc.vhcr_dma,
1838 GFP_KERNEL);
1839 if (!priv->mfunc.vhcr) {
1840 mlx4_err(dev, "Couldn't allocate VHCR.\n");
1841 goto err_hcr;
1842 }
1843 }
1844
1662 priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, 1845 priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
1663 MLX4_MAILBOX_SIZE, 1846 MLX4_MAILBOX_SIZE,
1664 MLX4_MAILBOX_SIZE, 0); 1847 MLX4_MAILBOX_SIZE, 0);
1665 if (!priv->cmd.pool) 1848 if (!priv->cmd.pool)
1666 goto err_hcr; 1849 goto err_vhcr;
1667 1850
1668 return 0; 1851 return 0;
1669 1852
1853err_vhcr:
1854 if (mlx4_is_mfunc(dev))
1855 dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
1856 priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
1857 priv->mfunc.vhcr = NULL;
1858
1670err_hcr: 1859err_hcr:
1671 if (!mlx4_is_slave(dev)) 1860 if (!mlx4_is_slave(dev))
1672 iounmap(priv->cmd.hcr); 1861 iounmap(priv->cmd.hcr);
@@ -1689,9 +1878,6 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
1689 } 1878 }
1690 1879
1691 iounmap(priv->mfunc.comm); 1880 iounmap(priv->mfunc.comm);
1692 dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
1693 priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
1694 priv->mfunc.vhcr = NULL;
1695} 1881}
1696 1882
1697void mlx4_cmd_cleanup(struct mlx4_dev *dev) 1883void mlx4_cmd_cleanup(struct mlx4_dev *dev)
@@ -1702,6 +1888,10 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev)
1702 1888
1703 if (!mlx4_is_slave(dev)) 1889 if (!mlx4_is_slave(dev))
1704 iounmap(priv->cmd.hcr); 1890 iounmap(priv->cmd.hcr);
1891 if (mlx4_is_mfunc(dev))
1892 dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
1893 priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
1894 priv->mfunc.vhcr = NULL;
1705} 1895}
1706 1896
1707/* 1897/*
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 99a04648fab..51c764901ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -164,13 +164,16 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
164{ 164{
165 struct mlx4_priv *priv = mlx4_priv(dev); 165 struct mlx4_priv *priv = mlx4_priv(dev);
166 struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; 166 struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
167 struct mlx4_eqe *s_eqe = 167 struct mlx4_eqe *s_eqe;
168 &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; 168 unsigned long flags;
169 169
170 spin_lock_irqsave(&slave_eq->event_lock, flags);
171 s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
170 if ((!!(s_eqe->owner & 0x80)) ^ 172 if ((!!(s_eqe->owner & 0x80)) ^
171 (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { 173 (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
172 mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. " 174 mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
173 "No free EQE on slave events queue\n", slave); 175 "No free EQE on slave events queue\n", slave);
176 spin_unlock_irqrestore(&slave_eq->event_lock, flags);
174 return; 177 return;
175 } 178 }
176 179
@@ -183,6 +186,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
183 186
184 queue_work(priv->mfunc.master.comm_wq, 187 queue_work(priv->mfunc.master.comm_wq,
185 &priv->mfunc.master.slave_event_work); 188 &priv->mfunc.master.slave_event_work);
189 spin_unlock_irqrestore(&slave_eq->event_lock, flags);
186} 190}
187 191
188static void mlx4_slave_event(struct mlx4_dev *dev, int slave, 192static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
@@ -200,6 +204,196 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
200 slave_event(dev, slave, eqe); 204 slave_event(dev, slave, eqe);
201} 205}
202 206
207int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
208{
209 struct mlx4_eqe eqe;
210
211 struct mlx4_priv *priv = mlx4_priv(dev);
212 struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
213
214 if (!s_slave->active)
215 return 0;
216
217 memset(&eqe, 0, sizeof eqe);
218
219 eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
220 eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
221 eqe.event.port_mgmt_change.port = port;
222
223 return mlx4_GEN_EQE(dev, slave, &eqe);
224}
225EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
226
227int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
228{
229 struct mlx4_eqe eqe;
230
231 /*don't send if we don't have the that slave */
232 if (dev->num_vfs < slave)
233 return 0;
234 memset(&eqe, 0, sizeof eqe);
235
236 eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
237 eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
238 eqe.event.port_mgmt_change.port = port;
239
240 return mlx4_GEN_EQE(dev, slave, &eqe);
241}
242EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
243
244int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
245 u8 port_subtype_change)
246{
247 struct mlx4_eqe eqe;
248
249 /*don't send if we don't have the that slave */
250 if (dev->num_vfs < slave)
251 return 0;
252 memset(&eqe, 0, sizeof eqe);
253
254 eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
255 eqe.subtype = port_subtype_change;
256 eqe.event.port_change.port = cpu_to_be32(port << 28);
257
258 mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
259 port_subtype_change, slave, port);
260 return mlx4_GEN_EQE(dev, slave, &eqe);
261}
262EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
263
264enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
265{
266 struct mlx4_priv *priv = mlx4_priv(dev);
267 struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
268 if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
269 pr_err("%s: Error: asking for slave:%d, port:%d\n",
270 __func__, slave, port);
271 return SLAVE_PORT_DOWN;
272 }
273 return s_state[slave].port_state[port];
274}
275EXPORT_SYMBOL(mlx4_get_slave_port_state);
276
277static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
278 enum slave_port_state state)
279{
280 struct mlx4_priv *priv = mlx4_priv(dev);
281 struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
282
283 if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
284 pr_err("%s: Error: asking for slave:%d, port:%d\n",
285 __func__, slave, port);
286 return -1;
287 }
288 s_state[slave].port_state[port] = state;
289
290 return 0;
291}
292
293static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
294{
295 int i;
296 enum slave_port_gen_event gen_event;
297
298 for (i = 0; i < dev->num_slaves; i++)
299 set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
300}
301/**************************************************************************
302 The function get as input the new event to that port,
303 and according to the prev state change the slave's port state.
304 The events are:
305 MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
306 MLX4_PORT_STATE_DEV_EVENT_PORT_UP
307 MLX4_PORT_STATE_IB_EVENT_GID_VALID
308 MLX4_PORT_STATE_IB_EVENT_GID_INVALID
309***************************************************************************/
310int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
311 u8 port, int event,
312 enum slave_port_gen_event *gen_event)
313{
314 struct mlx4_priv *priv = mlx4_priv(dev);
315 struct mlx4_slave_state *ctx = NULL;
316 unsigned long flags;
317 int ret = -1;
318 enum slave_port_state cur_state =
319 mlx4_get_slave_port_state(dev, slave, port);
320
321 *gen_event = SLAVE_PORT_GEN_EVENT_NONE;
322
323 if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
324 pr_err("%s: Error: asking for slave:%d, port:%d\n",
325 __func__, slave, port);
326 return ret;
327 }
328
329 ctx = &priv->mfunc.master.slave_state[slave];
330 spin_lock_irqsave(&ctx->lock, flags);
331
332 mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n",
333 __func__, slave, cur_state, event);
334
335 switch (cur_state) {
336 case SLAVE_PORT_DOWN:
337 if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
338 mlx4_set_slave_port_state(dev, slave, port,
339 SLAVE_PENDING_UP);
340 break;
341 case SLAVE_PENDING_UP:
342 if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
343 mlx4_set_slave_port_state(dev, slave, port,
344 SLAVE_PORT_DOWN);
345 else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
346 mlx4_set_slave_port_state(dev, slave, port,
347 SLAVE_PORT_UP);
348 *gen_event = SLAVE_PORT_GEN_EVENT_UP;
349 }
350 break;
351 case SLAVE_PORT_UP:
352 if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
353 mlx4_set_slave_port_state(dev, slave, port,
354 SLAVE_PORT_DOWN);
355 *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
356 } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
357 event) {
358 mlx4_set_slave_port_state(dev, slave, port,
359 SLAVE_PENDING_UP);
360 *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
361 }
362 break;
363 default:
364 pr_err("%s: BUG!!! UNKNOWN state: "
365 "slave:%d, port:%d\n", __func__, slave, port);
366 goto out;
367 }
368 ret = mlx4_get_slave_port_state(dev, slave, port);
369 mlx4_dbg(dev, "%s: slave: %d, current state: %d new event"
370 " :%d gen_event: %d\n",
371 __func__, slave, cur_state, event, *gen_event);
372
373out:
374 spin_unlock_irqrestore(&ctx->lock, flags);
375 return ret;
376}
377
378EXPORT_SYMBOL(set_and_calc_slave_port_state);
379
380int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr)
381{
382 struct mlx4_eqe eqe;
383
384 memset(&eqe, 0, sizeof eqe);
385
386 eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
387 eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
388 eqe.event.port_mgmt_change.port = port;
389 eqe.event.port_mgmt_change.params.port_info.changed_attr =
390 cpu_to_be32((u32) attr);
391
392 slave_event(dev, ALL_SLAVES, &eqe);
393 return 0;
394}
395EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
396
203void mlx4_master_handle_slave_flr(struct work_struct *work) 397void mlx4_master_handle_slave_flr(struct work_struct *work)
204{ 398{
205 struct mlx4_mfunc_master_ctx *master = 399 struct mlx4_mfunc_master_ctx *master =
@@ -251,6 +445,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
251 u32 flr_slave; 445 u32 flr_slave;
252 u8 update_slave_state; 446 u8 update_slave_state;
253 int i; 447 int i;
448 enum slave_port_gen_event gen_event;
254 449
255 while ((eqe = next_eqe_sw(eq))) { 450 while ((eqe = next_eqe_sw(eq))) {
256 /* 451 /*
@@ -347,35 +542,49 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
347 case MLX4_EVENT_TYPE_PORT_CHANGE: 542 case MLX4_EVENT_TYPE_PORT_CHANGE:
348 port = be32_to_cpu(eqe->event.port_change.port) >> 28; 543 port = be32_to_cpu(eqe->event.port_change.port) >> 28;
349 if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { 544 if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
350 mlx4_dispatch_event(dev, 545 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
351 MLX4_DEV_EVENT_PORT_DOWN,
352 port); 546 port);
353 mlx4_priv(dev)->sense.do_sense_port[port] = 1; 547 mlx4_priv(dev)->sense.do_sense_port[port] = 1;
354 if (mlx4_is_master(dev)) 548 if (!mlx4_is_master(dev))
355 /*change the state of all slave's port 549 break;
356 * to down:*/ 550 for (i = 0; i < dev->num_slaves; i++) {
357 for (i = 0; i < dev->num_slaves; i++) { 551 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
358 mlx4_dbg(dev, "%s: Sending " 552 if (i == mlx4_master_func_num(dev))
359 "MLX4_PORT_CHANGE_SUBTYPE_DOWN" 553 continue;
554 mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
360 " to slave: %d, port:%d\n", 555 " to slave: %d, port:%d\n",
361 __func__, i, port); 556 __func__, i, port);
362 if (i == dev->caps.function)
363 continue;
364 mlx4_slave_event(dev, i, eqe); 557 mlx4_slave_event(dev, i, eqe);
558 } else { /* IB port */
559 set_and_calc_slave_port_state(dev, i, port,
560 MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
561 &gen_event);
562 /*we can be in pending state, then do not send port_down event*/
563 if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) {
564 if (i == mlx4_master_func_num(dev))
565 continue;
566 mlx4_slave_event(dev, i, eqe);
567 }
365 } 568 }
569 }
366 } else { 570 } else {
367 mlx4_dispatch_event(dev, 571 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
368 MLX4_DEV_EVENT_PORT_UP, 572
369 port);
370 mlx4_priv(dev)->sense.do_sense_port[port] = 0; 573 mlx4_priv(dev)->sense.do_sense_port[port] = 0;
371 574
372 if (mlx4_is_master(dev)) { 575 if (!mlx4_is_master(dev))
576 break;
577 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
373 for (i = 0; i < dev->num_slaves; i++) { 578 for (i = 0; i < dev->num_slaves; i++) {
374 if (i == dev->caps.function) 579 if (i == mlx4_master_func_num(dev))
375 continue; 580 continue;
376 mlx4_slave_event(dev, i, eqe); 581 mlx4_slave_event(dev, i, eqe);
377 } 582 }
378 } 583 else /* IB port */
584 /* port-up event will be sent to a slave when the
585 * slave's alias-guid is set. This is done in alias_GUID.c
586 */
587 set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
379 } 588 }
380 break; 589 break;
381 590
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index c6964848732..4f30b99324c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -183,7 +183,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
183#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 183#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24
184#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 184#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28
185#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c 185#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c
186#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 186#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30
187 187
188#define QUERY_FUNC_CAP_FMR_FLAG 0x80 188#define QUERY_FUNC_CAP_FMR_FLAG 0x80
189#define QUERY_FUNC_CAP_FLAG_RDMA 0x40 189#define QUERY_FUNC_CAP_FLAG_RDMA 0x40
@@ -194,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
194#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 194#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8
195#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc 195#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc
196 196
197#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
198#define QUERY_FUNC_CAP_QP0_PROXY 0x14
199#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
200#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
201
197#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 202#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40
198#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 203#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80
199 204
200#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 205#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
201 206
202 if (vhcr->op_modifier == 1) { 207 if (vhcr->op_modifier == 1) {
203 field = vhcr->in_modifier;
204 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
205
206 field = 0; 208 field = 0;
207 /* ensure force vlan and force mac bits are not set */ 209 /* ensure force vlan and force mac bits are not set */
208 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); 210 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
209 /* ensure that phy_wqe_gid bit is not set */ 211 /* ensure that phy_wqe_gid bit is not set */
210 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); 212 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
211 213
214 field = vhcr->in_modifier; /* phys-port = logical-port */
215 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
216
217 /* size is now the QP number */
218 size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
219 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
220
221 size += 2;
222 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
223
224 size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
225 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
226
227 size += 2;
228 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
229
212 } else if (vhcr->op_modifier == 0) { 230 } else if (vhcr->op_modifier == 0) {
213 /* enable rdma and ethernet interfaces */ 231 /* enable rdma and ethernet interfaces */
214 field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); 232 field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA);
@@ -253,99 +271,118 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
253 return err; 271 return err;
254} 272}
255 273
256int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) 274int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
275 struct mlx4_func_cap *func_cap)
257{ 276{
258 struct mlx4_cmd_mailbox *mailbox; 277 struct mlx4_cmd_mailbox *mailbox;
259 u32 *outbox; 278 u32 *outbox;
260 u8 field; 279 u8 field, op_modifier;
261 u32 size; 280 u32 size;
262 int i;
263 int err = 0; 281 int err = 0;
264 282
283 op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
265 284
266 mailbox = mlx4_alloc_cmd_mailbox(dev); 285 mailbox = mlx4_alloc_cmd_mailbox(dev);
267 if (IS_ERR(mailbox)) 286 if (IS_ERR(mailbox))
268 return PTR_ERR(mailbox); 287 return PTR_ERR(mailbox);
269 288
270 err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP, 289 err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
290 MLX4_CMD_QUERY_FUNC_CAP,
271 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 291 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
272 if (err) 292 if (err)
273 goto out; 293 goto out;
274 294
275 outbox = mailbox->buf; 295 outbox = mailbox->buf;
276 296
277 MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); 297 if (!op_modifier) {
278 if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { 298 MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
279 mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); 299 if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
280 err = -EPROTONOSUPPORT; 300 mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
281 goto out; 301 err = -EPROTONOSUPPORT;
282 } 302 goto out;
283 func_cap->flags = field; 303 }
304 func_cap->flags = field;
305
306 MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
307 func_cap->num_ports = field;
284 308
285 MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); 309 MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
286 func_cap->num_ports = field; 310 func_cap->pf_context_behaviour = size;
287 311
288 MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); 312 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
289 func_cap->pf_context_behaviour = size; 313 func_cap->qp_quota = size & 0xFFFFFF;
290 314
291 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); 315 MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
292 func_cap->qp_quota = size & 0xFFFFFF; 316 func_cap->srq_quota = size & 0xFFFFFF;
293 317
294 MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); 318 MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
295 func_cap->srq_quota = size & 0xFFFFFF; 319 func_cap->cq_quota = size & 0xFFFFFF;
296 320
297 MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); 321 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
298 func_cap->cq_quota = size & 0xFFFFFF; 322 func_cap->max_eq = size & 0xFFFFFF;
299 323
300 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); 324 MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
301 func_cap->max_eq = size & 0xFFFFFF; 325 func_cap->reserved_eq = size & 0xFFFFFF;
302 326
303 MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); 327 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
304 func_cap->reserved_eq = size & 0xFFFFFF; 328 func_cap->mpt_quota = size & 0xFFFFFF;
305 329
306 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); 330 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
307 func_cap->mpt_quota = size & 0xFFFFFF; 331 func_cap->mtt_quota = size & 0xFFFFFF;
308 332
309 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); 333 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
310 func_cap->mtt_quota = size & 0xFFFFFF; 334 func_cap->mcg_quota = size & 0xFFFFFF;
335 goto out;
336 }
311 337
312 MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); 338 /* logical port query */
313 func_cap->mcg_quota = size & 0xFFFFFF; 339 if (gen_or_port > dev->caps.num_ports) {
340 err = -EINVAL;
341 goto out;
342 }
314 343
315 for (i = 1; i <= func_cap->num_ports; ++i) { 344 if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
316 err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, 345 MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
317 MLX4_CMD_QUERY_FUNC_CAP, 346 if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
318 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 347 mlx4_err(dev, "VLAN is enforced on this port\n");
319 if (err) 348 err = -EPROTONOSUPPORT;
320 goto out; 349 goto out;
350 }
321 351
322 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { 352 if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
323 MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); 353 mlx4_err(dev, "Force mac is enabled on this port\n");
324 if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { 354 err = -EPROTONOSUPPORT;
325 mlx4_err(dev, "VLAN is enforced on this port\n"); 355 goto out;
326 err = -EPROTONOSUPPORT;
327 goto out;
328 }
329
330 if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
331 mlx4_err(dev, "Force mac is enabled on this port\n");
332 err = -EPROTONOSUPPORT;
333 goto out;
334 }
335 } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) {
336 MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
337 if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
338 mlx4_err(dev, "phy_wqe_gid is "
339 "enforced on this ib port\n");
340 err = -EPROTONOSUPPORT;
341 goto out;
342 }
343 } 356 }
357 } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
358 MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
359 if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
360 mlx4_err(dev, "phy_wqe_gid is "
361 "enforced on this ib port\n");
362 err = -EPROTONOSUPPORT;
363 goto out;
364 }
365 }
344 366
345 MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); 367 MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
346 func_cap->physical_port[i] = field; 368 func_cap->physical_port = field;
369 if (func_cap->physical_port != gen_or_port) {
370 err = -ENOSYS;
371 goto out;
347 } 372 }
348 373
374 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
375 func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
376
377 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
378 func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
379
380 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
381 func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
382
383 MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
384 func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
385
349 /* All other resources are allocated by the master, but we still report 386 /* All other resources are allocated by the master, but we still report
350 * 'num' and 'reserved' capabilities as follows: 387 * 'num' and 'reserved' capabilities as follows:
351 * - num remains the maximum resource index 388 * - num remains the maximum resource index
@@ -559,7 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
559 dev_cap->max_pds = 1 << (field & 0x3f); 596 dev_cap->max_pds = 1 << (field & 0x3f);
560 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); 597 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
561 dev_cap->reserved_xrcds = field >> 4; 598 dev_cap->reserved_xrcds = field >> 4;
562 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); 599 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET);
563 dev_cap->max_xrcds = 1 << (field & 0x1f); 600 dev_cap->max_xrcds = 1 << (field & 0x1f);
564 601
565 MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); 602 MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
@@ -715,6 +752,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
715 struct mlx4_cmd_mailbox *outbox, 752 struct mlx4_cmd_mailbox *outbox,
716 struct mlx4_cmd_info *cmd) 753 struct mlx4_cmd_info *cmd)
717{ 754{
755 u64 flags;
718 int err = 0; 756 int err = 0;
719 u8 field; 757 u8 field;
720 758
@@ -723,6 +761,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
723 if (err) 761 if (err)
724 return err; 762 return err;
725 763
764 /* add port mng change event capability unconditionally to slaves */
765 MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
766 flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV;
767 MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
768
726 /* For guests, report Blueflame disabled */ 769 /* For guests, report Blueflame disabled */
727 MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); 770 MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
728 field &= 0x7f; 771 field &= 0x7f;
@@ -1345,6 +1388,19 @@ out:
1345 return err; 1388 return err;
1346} 1389}
1347 1390
1391/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0
1392 * and real QP0 are active, so that the paravirtualized QP0 is ready
1393 * to operate */
1394static int check_qp0_state(struct mlx4_dev *dev, int function, int port)
1395{
1396 struct mlx4_priv *priv = mlx4_priv(dev);
1397 /* irrelevant if not infiniband */
1398 if (priv->mfunc.master.qp0_state[port].proxy_qp0_active &&
1399 priv->mfunc.master.qp0_state[port].qp0_active)
1400 return 1;
1401 return 0;
1402}
1403
1348int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, 1404int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
1349 struct mlx4_vhcr *vhcr, 1405 struct mlx4_vhcr *vhcr,
1350 struct mlx4_cmd_mailbox *inbox, 1406 struct mlx4_cmd_mailbox *inbox,
@@ -1358,17 +1414,29 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
1358 if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) 1414 if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))
1359 return 0; 1415 return 0;
1360 1416
1361 if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) 1417 if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
1362 return -ENODEV; 1418 /* Enable port only if it was previously disabled */
1363 1419 if (!priv->mfunc.master.init_port_ref[port]) {
1364 /* Enable port only if it was previously disabled */ 1420 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
1365 if (!priv->mfunc.master.init_port_ref[port]) { 1421 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1366 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, 1422 if (err)
1367 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 1423 return err;
1368 if (err) 1424 }
1369 return err; 1425 priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
1426 } else {
1427 if (slave == mlx4_master_func_num(dev)) {
1428 if (check_qp0_state(dev, slave, port) &&
1429 !priv->mfunc.master.qp0_state[port].port_active) {
1430 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
1431 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1432 if (err)
1433 return err;
1434 priv->mfunc.master.qp0_state[port].port_active = 1;
1435 priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
1436 }
1437 } else
1438 priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
1370 } 1439 }
1371 priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
1372 ++priv->mfunc.master.init_port_ref[port]; 1440 ++priv->mfunc.master.init_port_ref[port];
1373 return 0; 1441 return 0;
1374} 1442}
@@ -1441,15 +1509,29 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
1441 (1 << port))) 1509 (1 << port)))
1442 return 0; 1510 return 0;
1443 1511
1444 if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) 1512 if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
1445 return -ENODEV; 1513 if (priv->mfunc.master.init_port_ref[port] == 1) {
1446 if (priv->mfunc.master.init_port_ref[port] == 1) { 1514 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
1447 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, 1515 1000, MLX4_CMD_NATIVE);
1448 MLX4_CMD_NATIVE); 1516 if (err)
1449 if (err) 1517 return err;
1450 return err; 1518 }
1519 priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
1520 } else {
1521 /* infiniband port */
1522 if (slave == mlx4_master_func_num(dev)) {
1523 if (!priv->mfunc.master.qp0_state[port].qp0_active &&
1524 priv->mfunc.master.qp0_state[port].port_active) {
1525 err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
1526 1000, MLX4_CMD_NATIVE);
1527 if (err)
1528 return err;
1529 priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
1530 priv->mfunc.master.qp0_state[port].port_active = 0;
1531 }
1532 } else
1533 priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
1451 } 1534 }
1452 priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
1453 --priv->mfunc.master.init_port_ref[port]; 1535 --priv->mfunc.master.init_port_ref[port];
1454 return 0; 1536 return 0;
1455} 1537}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 83fcbbf1b16..85abe9c11a2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -134,8 +134,12 @@ struct mlx4_func_cap {
134 int max_eq; 134 int max_eq;
135 int reserved_eq; 135 int reserved_eq;
136 int mcg_quota; 136 int mcg_quota;
137 u8 physical_port[MLX4_MAX_PORTS + 1]; 137 u32 qp0_tunnel_qpn;
138 u8 port_flags[MLX4_MAX_PORTS + 1]; 138 u32 qp0_proxy_qpn;
139 u32 qp1_tunnel_qpn;
140 u32 qp1_proxy_qpn;
141 u8 physical_port;
142 u8 port_flags;
139}; 143};
140 144
141struct mlx4_adapter { 145struct mlx4_adapter {
@@ -192,7 +196,8 @@ struct mlx4_set_ib_param {
192}; 196};
193 197
194int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); 198int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
195int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap); 199int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
200 struct mlx4_func_cap *func_cap);
196int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, 201int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
197 struct mlx4_vhcr *vhcr, 202 struct mlx4_vhcr *vhcr,
198 struct mlx4_cmd_mailbox *inbox, 203 struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2f816c6aed7..bc1e5d41c29 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,8 +95,6 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
95 " Not in use with device managed" 95 " Not in use with device managed"
96 " flow steering"); 96 " flow steering");
97 97
98#define MLX4_VF (1 << 0)
99
100#define HCA_GLOBAL_CAP_MASK 0 98#define HCA_GLOBAL_CAP_MASK 0
101#define PF_CONTEXT_BEHAVIOUR_MASK 0 99#define PF_CONTEXT_BEHAVIOUR_MASK 0
102 100
@@ -299,9 +297,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
299 mlx4_dbg(dev, "Steering mode is: %s\n", 297 mlx4_dbg(dev, "Steering mode is: %s\n",
300 mlx4_steering_mode_str(dev->caps.steering_mode)); 298 mlx4_steering_mode_str(dev->caps.steering_mode));
301 299
302 /* Sense port always allowed on supported devices for ConnectX1 and 2 */ 300 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
303 if (dev->pdev->device != 0x1003) 301 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
304 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 302 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
303 /* Don't do sense port on multifunction devices (for now at least) */
304 if (mlx4_is_mfunc(dev))
305 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
305 306
306 dev->caps.log_num_macs = log_num_mac; 307 dev->caps.log_num_macs = log_num_mac;
307 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 308 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
@@ -384,6 +385,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
384 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 385 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
385 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 386 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
386 387
388 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
387 return 0; 389 return 0;
388} 390}
389/*The function checks if there are live vf, return the num of them*/ 391/*The function checks if there are live vf, return the num of them*/
@@ -409,20 +411,54 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
409int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 411int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
410{ 412{
411 u32 qk = MLX4_RESERVED_QKEY_BASE; 413 u32 qk = MLX4_RESERVED_QKEY_BASE;
412 if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 414
413 qpn < dev->caps.sqp_start) 415 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
416 qpn < dev->phys_caps.base_proxy_sqpn)
414 return -EINVAL; 417 return -EINVAL;
415 418
416 if (qpn >= dev->caps.base_tunnel_sqpn) 419 if (qpn >= dev->phys_caps.base_tunnel_sqpn)
417 /* tunnel qp */ 420 /* tunnel qp */
418 qk += qpn - dev->caps.base_tunnel_sqpn; 421 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
419 else 422 else
420 qk += qpn - dev->caps.sqp_start; 423 qk += qpn - dev->phys_caps.base_proxy_sqpn;
421 *qkey = qk; 424 *qkey = qk;
422 return 0; 425 return 0;
423} 426}
424EXPORT_SYMBOL(mlx4_get_parav_qkey); 427EXPORT_SYMBOL(mlx4_get_parav_qkey);
425 428
429void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
430{
431 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
432
433 if (!mlx4_is_master(dev))
434 return;
435
436 priv->virt2phys_pkey[slave][port - 1][i] = val;
437}
438EXPORT_SYMBOL(mlx4_sync_pkey_table);
439
440void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
441{
442 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
443
444 if (!mlx4_is_master(dev))
445 return;
446
447 priv->slave_node_guids[slave] = guid;
448}
449EXPORT_SYMBOL(mlx4_put_slave_node_guid);
450
451__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
452{
453 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
454
455 if (!mlx4_is_master(dev))
456 return 0;
457
458 return priv->slave_node_guids[slave];
459}
460EXPORT_SYMBOL(mlx4_get_slave_node_guid);
461
426int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 462int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
427{ 463{
428 struct mlx4_priv *priv = mlx4_priv(dev); 464 struct mlx4_priv *priv = mlx4_priv(dev);
@@ -493,9 +529,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
493 } 529 }
494 530
495 memset(&func_cap, 0, sizeof(func_cap)); 531 memset(&func_cap, 0, sizeof(func_cap));
496 err = mlx4_QUERY_FUNC_CAP(dev, &func_cap); 532 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
497 if (err) { 533 if (err) {
498 mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n"); 534 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
535 err);
499 return err; 536 return err;
500 } 537 }
501 538
@@ -523,12 +560,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
523 return -ENODEV; 560 return -ENODEV;
524 } 561 }
525 562
563 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
564 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
565 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
566 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
567
568 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
569 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
570 err = -ENOMEM;
571 goto err_mem;
572 }
573
526 for (i = 1; i <= dev->caps.num_ports; ++i) { 574 for (i = 1; i <= dev->caps.num_ports; ++i) {
575 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
576 if (err) {
577 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
578 " port %d, aborting (%d).\n", i, err);
579 goto err_mem;
580 }
581 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
582 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
583 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
584 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
527 dev->caps.port_mask[i] = dev->caps.port_type[i]; 585 dev->caps.port_mask[i] = dev->caps.port_type[i];
528 if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, 586 if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
529 &dev->caps.gid_table_len[i], 587 &dev->caps.gid_table_len[i],
530 &dev->caps.pkey_table_len[i])) 588 &dev->caps.pkey_table_len[i]))
531 return -ENODEV; 589 goto err_mem;
532 } 590 }
533 591
534 if (dev->caps.uar_page_size * (dev->caps.num_uars - 592 if (dev->caps.uar_page_size * (dev->caps.num_uars -
@@ -538,10 +596,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
538 "PCI resource 2 size of 0x%llx, aborting.\n", 596 "PCI resource 2 size of 0x%llx, aborting.\n",
539 dev->caps.uar_page_size * dev->caps.num_uars, 597 dev->caps.uar_page_size * dev->caps.num_uars,
540 (unsigned long long) pci_resource_len(dev->pdev, 2)); 598 (unsigned long long) pci_resource_len(dev->pdev, 2));
541 return -ENODEV; 599 goto err_mem;
542 } 600 }
543 601
544 return 0; 602 return 0;
603
604err_mem:
605 kfree(dev->caps.qp0_tunnel);
606 kfree(dev->caps.qp0_proxy);
607 kfree(dev->caps.qp1_tunnel);
608 kfree(dev->caps.qp1_proxy);
609 dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
610 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
611
612 return err;
545} 613}
546 614
547/* 615/*
@@ -1092,10 +1160,10 @@ static void mlx4_slave_exit(struct mlx4_dev *dev)
1092{ 1160{
1093 struct mlx4_priv *priv = mlx4_priv(dev); 1161 struct mlx4_priv *priv = mlx4_priv(dev);
1094 1162
1095 down(&priv->cmd.slave_sem); 1163 mutex_lock(&priv->cmd.slave_cmd_mutex);
1096 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1164 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1097 mlx4_warn(dev, "Failed to close slave function.\n"); 1165 mlx4_warn(dev, "Failed to close slave function.\n");
1098 up(&priv->cmd.slave_sem); 1166 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1099} 1167}
1100 1168
1101static int map_bf_area(struct mlx4_dev *dev) 1169static int map_bf_area(struct mlx4_dev *dev)
@@ -1147,7 +1215,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
1147 u32 slave_read; 1215 u32 slave_read;
1148 u32 cmd_channel_ver; 1216 u32 cmd_channel_ver;
1149 1217
1150 down(&priv->cmd.slave_sem); 1218 mutex_lock(&priv->cmd.slave_cmd_mutex);
1151 priv->cmd.max_cmds = 1; 1219 priv->cmd.max_cmds = 1;
1152 mlx4_warn(dev, "Sending reset\n"); 1220 mlx4_warn(dev, "Sending reset\n");
1153 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1221 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
@@ -1196,12 +1264,13 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
1196 goto err; 1264 goto err;
1197 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1265 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1198 goto err; 1266 goto err;
1199 up(&priv->cmd.slave_sem); 1267
1268 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1200 return 0; 1269 return 0;
1201 1270
1202err: 1271err:
1203 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1272 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1204 up(&priv->cmd.slave_sem); 1273 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1205 return -EIO; 1274 return -EIO;
1206} 1275}
1207 1276
@@ -1848,7 +1917,7 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
1848 iounmap(owner); 1917 iounmap(owner);
1849} 1918}
1850 1919
1851static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) 1920static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
1852{ 1921{
1853 struct mlx4_priv *priv; 1922 struct mlx4_priv *priv;
1854 struct mlx4_dev *dev; 1923 struct mlx4_dev *dev;
@@ -1871,12 +1940,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1871 /* 1940 /*
1872 * Check for BARs. 1941 * Check for BARs.
1873 */ 1942 */
1874 if (((id == NULL) || !(id->driver_data & MLX4_VF)) && 1943 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
1875 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 1944 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
1876 dev_err(&pdev->dev, "Missing DCS, aborting." 1945 dev_err(&pdev->dev, "Missing DCS, aborting."
1877 "(id == 0X%p, id->driver_data: 0x%lx," 1946 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
1878 " pci_resource_flags(pdev, 0):0x%lx)\n", id, 1947 pci_dev_data, pci_resource_flags(pdev, 0));
1879 id ? id->driver_data : 0, pci_resource_flags(pdev, 0));
1880 err = -ENODEV; 1948 err = -ENODEV;
1881 goto err_disable_pdev; 1949 goto err_disable_pdev;
1882 } 1950 }
@@ -1941,7 +2009,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1941 2009
1942 dev->rev_id = pdev->revision; 2010 dev->rev_id = pdev->revision;
1943 /* Detect if this device is a virtual function */ 2011 /* Detect if this device is a virtual function */
1944 if (id && id->driver_data & MLX4_VF) { 2012 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
1945 /* When acting as pf, we normally skip vfs unless explicitly 2013 /* When acting as pf, we normally skip vfs unless explicitly
1946 * requested to probe them. */ 2014 * requested to probe them. */
1947 if (num_vfs && extended_func_num(pdev) > probe_vf) { 2015 if (num_vfs && extended_func_num(pdev) > probe_vf) {
@@ -1969,12 +2037,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1969 } 2037 }
1970 2038
1971 if (num_vfs) { 2039 if (num_vfs) {
1972 mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs); 2040 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs);
1973 err = pci_enable_sriov(pdev, num_vfs); 2041 err = pci_enable_sriov(pdev, num_vfs);
1974 if (err) { 2042 if (err) {
1975 mlx4_err(dev, "Failed to enable sriov," 2043 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
1976 "continuing without sriov enabled" 2044 err);
1977 " (err = %d).\n", err);
1978 err = 0; 2045 err = 0;
1979 } else { 2046 } else {
1980 mlx4_warn(dev, "Running in master mode\n"); 2047 mlx4_warn(dev, "Running in master mode\n");
@@ -2089,6 +2156,7 @@ slave_start:
2089 mlx4_sense_init(dev); 2156 mlx4_sense_init(dev);
2090 mlx4_start_sense(dev); 2157 mlx4_start_sense(dev);
2091 2158
2159 priv->pci_dev_data = pci_dev_data;
2092 pci_set_drvdata(pdev, dev); 2160 pci_set_drvdata(pdev, dev);
2093 2161
2094 return 0; 2162 return 0;
@@ -2158,7 +2226,7 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
2158{ 2226{
2159 printk_once(KERN_INFO "%s", mlx4_version); 2227 printk_once(KERN_INFO "%s", mlx4_version);
2160 2228
2161 return __mlx4_init_one(pdev, id); 2229 return __mlx4_init_one(pdev, id->driver_data);
2162} 2230}
2163 2231
2164static void mlx4_remove_one(struct pci_dev *pdev) 2232static void mlx4_remove_one(struct pci_dev *pdev)
@@ -2217,12 +2285,18 @@ static void mlx4_remove_one(struct pci_dev *pdev)
2217 if (dev->flags & MLX4_FLAG_MSI_X) 2285 if (dev->flags & MLX4_FLAG_MSI_X)
2218 pci_disable_msix(pdev); 2286 pci_disable_msix(pdev);
2219 if (dev->flags & MLX4_FLAG_SRIOV) { 2287 if (dev->flags & MLX4_FLAG_SRIOV) {
2220 mlx4_warn(dev, "Disabling sriov\n"); 2288 mlx4_warn(dev, "Disabling SR-IOV\n");
2221 pci_disable_sriov(pdev); 2289 pci_disable_sriov(pdev);
2222 } 2290 }
2223 2291
2224 if (!mlx4_is_slave(dev)) 2292 if (!mlx4_is_slave(dev))
2225 mlx4_free_ownership(dev); 2293 mlx4_free_ownership(dev);
2294
2295 kfree(dev->caps.qp0_tunnel);
2296 kfree(dev->caps.qp0_proxy);
2297 kfree(dev->caps.qp1_tunnel);
2298 kfree(dev->caps.qp1_proxy);
2299
2226 kfree(priv); 2300 kfree(priv);
2227 pci_release_regions(pdev); 2301 pci_release_regions(pdev);
2228 pci_disable_device(pdev); 2302 pci_disable_device(pdev);
@@ -2232,41 +2306,46 @@ static void mlx4_remove_one(struct pci_dev *pdev)
2232 2306
2233int mlx4_restart_one(struct pci_dev *pdev) 2307int mlx4_restart_one(struct pci_dev *pdev)
2234{ 2308{
2309 struct mlx4_dev *dev = pci_get_drvdata(pdev);
2310 struct mlx4_priv *priv = mlx4_priv(dev);
2311 int pci_dev_data;
2312
2313 pci_dev_data = priv->pci_dev_data;
2235 mlx4_remove_one(pdev); 2314 mlx4_remove_one(pdev);
2236 return __mlx4_init_one(pdev, NULL); 2315 return __mlx4_init_one(pdev, pci_dev_data);
2237} 2316}
2238 2317
2239static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 2318static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
2240 /* MT25408 "Hermon" SDR */ 2319 /* MT25408 "Hermon" SDR */
2241 { PCI_VDEVICE(MELLANOX, 0x6340), 0 }, 2320 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2242 /* MT25408 "Hermon" DDR */ 2321 /* MT25408 "Hermon" DDR */
2243 { PCI_VDEVICE(MELLANOX, 0x634a), 0 }, 2322 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2244 /* MT25408 "Hermon" QDR */ 2323 /* MT25408 "Hermon" QDR */
2245 { PCI_VDEVICE(MELLANOX, 0x6354), 0 }, 2324 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2246 /* MT25408 "Hermon" DDR PCIe gen2 */ 2325 /* MT25408 "Hermon" DDR PCIe gen2 */
2247 { PCI_VDEVICE(MELLANOX, 0x6732), 0 }, 2326 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2248 /* MT25408 "Hermon" QDR PCIe gen2 */ 2327 /* MT25408 "Hermon" QDR PCIe gen2 */
2249 { PCI_VDEVICE(MELLANOX, 0x673c), 0 }, 2328 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2250 /* MT25408 "Hermon" EN 10GigE */ 2329 /* MT25408 "Hermon" EN 10GigE */
2251 { PCI_VDEVICE(MELLANOX, 0x6368), 0 }, 2330 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2252 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 2331 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
2253 { PCI_VDEVICE(MELLANOX, 0x6750), 0 }, 2332 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2254 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 2333 /* MT25458 ConnectX EN 10GBASE-T 10GigE */
2255 { PCI_VDEVICE(MELLANOX, 0x6372), 0 }, 2334 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2256 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 2335 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
2257 { PCI_VDEVICE(MELLANOX, 0x675a), 0 }, 2336 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2258 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 2337 /* MT26468 ConnectX EN 10GigE PCIe gen2*/
2259 { PCI_VDEVICE(MELLANOX, 0x6764), 0 }, 2338 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2260 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 2339 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
2261 { PCI_VDEVICE(MELLANOX, 0x6746), 0 }, 2340 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2262 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 2341 /* MT26478 ConnectX2 40GigE PCIe gen2 */
2263 { PCI_VDEVICE(MELLANOX, 0x676e), 0 }, 2342 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2264 /* MT25400 Family [ConnectX-2 Virtual Function] */ 2343 /* MT25400 Family [ConnectX-2 Virtual Function] */
2265 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF }, 2344 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
2266 /* MT27500 Family [ConnectX-3] */ 2345 /* MT27500 Family [ConnectX-3] */
2267 { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, 2346 { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
2268 /* MT27500 Family [ConnectX-3 Virtual Function] */ 2347 /* MT27500 Family [ConnectX-3 Virtual Function] */
2269 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF }, 2348 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
2270 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ 2349 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
2271 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ 2350 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
2272 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ 2351 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
@@ -2295,7 +2374,7 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
2295 2374
2296static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 2375static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
2297{ 2376{
2298 int ret = __mlx4_init_one(pdev, NULL); 2377 int ret = __mlx4_init_one(pdev, 0);
2299 2378
2300 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 2379 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
2301} 2380}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index dba69d98734..1cf42036d7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -452,6 +452,7 @@ struct mlx4_slave_state {
452 /*initialized via the kzalloc*/ 452 /*initialized via the kzalloc*/
453 u8 is_slave_going_down; 453 u8 is_slave_going_down;
454 u32 cookie; 454 u32 cookie;
455 enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
455}; 456};
456 457
457struct slave_list { 458struct slave_list {
@@ -472,6 +473,7 @@ struct mlx4_slave_event_eq {
472 u32 eqn; 473 u32 eqn;
473 u32 cons; 474 u32 cons;
474 u32 prod; 475 u32 prod;
476 spinlock_t event_lock;
475 struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; 477 struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
476}; 478};
477 479
@@ -511,9 +513,9 @@ struct mlx4_cmd {
511 struct pci_pool *pool; 513 struct pci_pool *pool;
512 void __iomem *hcr; 514 void __iomem *hcr;
513 struct mutex hcr_mutex; 515 struct mutex hcr_mutex;
516 struct mutex slave_cmd_mutex;
514 struct semaphore poll_sem; 517 struct semaphore poll_sem;
515 struct semaphore event_sem; 518 struct semaphore event_sem;
516 struct semaphore slave_sem;
517 int max_cmds; 519 int max_cmds;
518 spinlock_t context_lock; 520 spinlock_t context_lock;
519 int free_head; 521 int free_head;
@@ -766,6 +768,11 @@ struct _rule_hw {
766 }; 768 };
767}; 769};
768 770
771enum {
772 MLX4_PCI_DEV_IS_VF = 1 << 0,
773 MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
774};
775
769struct mlx4_priv { 776struct mlx4_priv {
770 struct mlx4_dev dev; 777 struct mlx4_dev dev;
771 778
@@ -773,6 +780,8 @@ struct mlx4_priv {
773 struct list_head ctx_list; 780 struct list_head ctx_list;
774 spinlock_t ctx_lock; 781 spinlock_t ctx_lock;
775 782
783 int pci_dev_data;
784
776 struct list_head pgdir_list; 785 struct list_head pgdir_list;
777 struct mutex pgdir_mutex; 786 struct mutex pgdir_mutex;
778 787
@@ -807,6 +816,9 @@ struct mlx4_priv {
807 struct io_mapping *bf_mapping; 816 struct io_mapping *bf_mapping;
808 int reserved_mtts; 817 int reserved_mtts;
809 int fs_hash_mode; 818 int fs_hash_mode;
819 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
820 __be64 slave_node_guids[MLX4_MFUNC_MAX];
821
810}; 822};
811 823
812static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) 824static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -1011,16 +1023,61 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
1011 struct mlx4_cmd_mailbox *inbox, 1023 struct mlx4_cmd_mailbox *inbox,
1012 struct mlx4_cmd_mailbox *outbox, 1024 struct mlx4_cmd_mailbox *outbox,
1013 struct mlx4_cmd_info *cmd); 1025 struct mlx4_cmd_info *cmd);
1026int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
1027 struct mlx4_vhcr *vhcr,
1028 struct mlx4_cmd_mailbox *inbox,
1029 struct mlx4_cmd_mailbox *outbox,
1030 struct mlx4_cmd_info *cmd);
1014int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, 1031int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
1015 struct mlx4_vhcr *vhcr, 1032 struct mlx4_vhcr *vhcr,
1016 struct mlx4_cmd_mailbox *inbox, 1033 struct mlx4_cmd_mailbox *inbox,
1017 struct mlx4_cmd_mailbox *outbox, 1034 struct mlx4_cmd_mailbox *outbox,
1018 struct mlx4_cmd_info *cmd); 1035 struct mlx4_cmd_info *cmd);
1036int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
1037 struct mlx4_vhcr *vhcr,
1038 struct mlx4_cmd_mailbox *inbox,
1039 struct mlx4_cmd_mailbox *outbox,
1040 struct mlx4_cmd_info *cmd);
1041int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
1042 struct mlx4_vhcr *vhcr,
1043 struct mlx4_cmd_mailbox *inbox,
1044 struct mlx4_cmd_mailbox *outbox,
1045 struct mlx4_cmd_info *cmd);
1046int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
1047 struct mlx4_vhcr *vhcr,
1048 struct mlx4_cmd_mailbox *inbox,
1049 struct mlx4_cmd_mailbox *outbox,
1050 struct mlx4_cmd_info *cmd);
1051int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
1052 struct mlx4_vhcr *vhcr,
1053 struct mlx4_cmd_mailbox *inbox,
1054 struct mlx4_cmd_mailbox *outbox,
1055 struct mlx4_cmd_info *cmd);
1056int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
1057 struct mlx4_vhcr *vhcr,
1058 struct mlx4_cmd_mailbox *inbox,
1059 struct mlx4_cmd_mailbox *outbox,
1060 struct mlx4_cmd_info *cmd);
1061int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
1062 struct mlx4_vhcr *vhcr,
1063 struct mlx4_cmd_mailbox *inbox,
1064 struct mlx4_cmd_mailbox *outbox,
1065 struct mlx4_cmd_info *cmd);
1066int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
1067 struct mlx4_vhcr *vhcr,
1068 struct mlx4_cmd_mailbox *inbox,
1069 struct mlx4_cmd_mailbox *outbox,
1070 struct mlx4_cmd_info *cmd);
1019int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, 1071int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
1020 struct mlx4_vhcr *vhcr, 1072 struct mlx4_vhcr *vhcr,
1021 struct mlx4_cmd_mailbox *inbox, 1073 struct mlx4_cmd_mailbox *inbox,
1022 struct mlx4_cmd_mailbox *outbox, 1074 struct mlx4_cmd_mailbox *outbox,
1023 struct mlx4_cmd_info *cmd); 1075 struct mlx4_cmd_info *cmd);
1076int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
1077 struct mlx4_vhcr *vhcr,
1078 struct mlx4_cmd_mailbox *inbox,
1079 struct mlx4_cmd_mailbox *outbox,
1080 struct mlx4_cmd_info *cmd);
1024 1081
1025int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); 1082int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
1026 1083
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index e36dd0f2fa7..4c51b05efa2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -732,6 +732,16 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
732 new_cap_mask = ((__be32 *) inbox->buf)[1]; 732 new_cap_mask = ((__be32 *) inbox->buf)[1];
733 } 733 }
734 734
735 /* slave may not set the IS_SM capability for the port */
736 if (slave != mlx4_master_func_num(dev) &&
737 (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
738 return -EINVAL;
739
740 /* No DEV_MGMT in multifunc mode */
741 if (mlx4_is_mfunc(dev) &&
742 (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
743 return -EINVAL;
744
735 agg_cap_mask = 0; 745 agg_cap_mask = 0;
736 slave_cap_mask = 746 slave_cap_mask =
737 priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; 747 priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index fb2b36759cb..81e2abe07bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -67,10 +67,18 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
67 complete(&qp->free); 67 complete(&qp->free);
68} 68}
69 69
70static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp) 70/* used for INIT/CLOSE port logic */
71static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
71{ 72{
72 return qp->qpn >= dev->caps.sqp_start && 73 /* this procedure is called after we already know we are on the master */
73 qp->qpn <= dev->caps.sqp_start + 1; 74 /* qp0 is either the proxy qp0, or the real qp0 */
75 u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
76 *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
77
78 *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
79 qp->qpn <= dev->phys_caps.base_sqpn + 1;
80
81 return *real_qp0 || *proxy_qp0;
74} 82}
75 83
76static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, 84static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@@ -122,6 +130,8 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
122 struct mlx4_priv *priv = mlx4_priv(dev); 130 struct mlx4_priv *priv = mlx4_priv(dev);
123 struct mlx4_cmd_mailbox *mailbox; 131 struct mlx4_cmd_mailbox *mailbox;
124 int ret = 0; 132 int ret = 0;
133 int real_qp0 = 0;
134 int proxy_qp0 = 0;
125 u8 port; 135 u8 port;
126 136
127 if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || 137 if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
@@ -133,9 +143,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
133 MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); 143 MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
134 if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && 144 if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
135 cur_state != MLX4_QP_STATE_RST && 145 cur_state != MLX4_QP_STATE_RST &&
136 is_qp0(dev, qp)) { 146 is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
137 port = (qp->qpn & 1) + 1; 147 port = (qp->qpn & 1) + 1;
138 priv->mfunc.master.qp0_state[port].qp0_active = 0; 148 if (proxy_qp0)
149 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
150 else
151 priv->mfunc.master.qp0_state[port].qp0_active = 0;
139 } 152 }
140 return ret; 153 return ret;
141 } 154 }
@@ -162,6 +175,23 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
162 new_state == MLX4_QP_STATE_RST ? 2 : 0, 175 new_state == MLX4_QP_STATE_RST ? 2 : 0,
163 op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); 176 op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
164 177
178 if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
179 port = (qp->qpn & 1) + 1;
180 if (cur_state != MLX4_QP_STATE_ERR &&
181 cur_state != MLX4_QP_STATE_RST &&
182 new_state == MLX4_QP_STATE_ERR) {
183 if (proxy_qp0)
184 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
185 else
186 priv->mfunc.master.qp0_state[port].qp0_active = 0;
187 } else if (new_state == MLX4_QP_STATE_RTR) {
188 if (proxy_qp0)
189 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
190 else
191 priv->mfunc.master.qp0_state[port].qp0_active = 1;
192 }
193 }
194
165 mlx4_free_cmd_mailbox(dev, mailbox); 195 mlx4_free_cmd_mailbox(dev, mailbox);
166 return ret; 196 return ret;
167} 197}
@@ -392,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
392 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; 422 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
393 int err; 423 int err;
394 int reserved_from_top = 0; 424 int reserved_from_top = 0;
425 int k;
395 426
396 spin_lock_init(&qp_table->lock); 427 spin_lock_init(&qp_table->lock);
397 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); 428 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
@@ -406,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
406 * We also reserve the MSB of the 24-bit QP number to indicate 437 * We also reserve the MSB of the 24-bit QP number to indicate
407 * that a QP is an XRC QP. 438 * that a QP is an XRC QP.
408 */ 439 */
409 dev->caps.sqp_start = 440 dev->phys_caps.base_sqpn =
410 ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); 441 ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
411 442
412 { 443 {
@@ -437,13 +468,66 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
437 468
438 } 469 }
439 470
471 /* Reserve 8 real SQPs in both native and SRIOV modes.
472 * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
473 * (for all PFs and VFs), and 8 corresponding tunnel QPs.
474 * Each proxy SQP works opposite its own tunnel QP.
475 *
476 * The QPs are arranged as follows:
477 * a. 8 real SQPs
478 * b. All the proxy SQPs (8 per function)
479 * c. All the tunnel QPs (8 per function)
480 */
481
440 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, 482 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
441 (1 << 23) - 1, dev->caps.sqp_start + 8, 483 (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 +
484 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev),
442 reserved_from_top); 485 reserved_from_top);
443 if (err) 486 if (err)
444 return err; 487 return err;
445 488
446 return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); 489 if (mlx4_is_mfunc(dev)) {
490 /* for PPF use */
491 dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
492 dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
493
494 /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
495 * since the PF does not call mlx4_slave_caps */
496 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
497 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
498 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
499 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
500
501 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
502 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
503 err = -ENOMEM;
504 goto err_mem;
505 }
506
507 for (k = 0; k < dev->caps.num_ports; k++) {
508 dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
509 8 * mlx4_master_func_num(dev) + k;
510 dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
511 dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
512 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
513 dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
514 }
515 }
516
517
518 err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
519 if (err)
520 goto err_mem;
521 return 0;
522
523err_mem:
524 kfree(dev->caps.qp0_tunnel);
525 kfree(dev->caps.qp0_proxy);
526 kfree(dev->caps.qp1_tunnel);
527 kfree(dev->caps.qp1_proxy);
528 dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
529 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
530 return err;
447} 531}
448 532
449void mlx4_cleanup_qp_table(struct mlx4_dev *dev) 533void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 293c9e820c4..ba6506ff4ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -242,6 +242,15 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res)
242 return 0; 242 return 0;
243} 243}
244 244
245enum qp_transition {
246 QP_TRANS_INIT2RTR,
247 QP_TRANS_RTR2RTS,
248 QP_TRANS_RTS2RTS,
249 QP_TRANS_SQERR2RTS,
250 QP_TRANS_SQD2SQD,
251 QP_TRANS_SQD2RTS
252};
253
245/* For Debug uses */ 254/* For Debug uses */
246static const char *ResourceType(enum mlx4_resource rt) 255static const char *ResourceType(enum mlx4_resource rt)
247{ 256{
@@ -308,14 +317,41 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev,
308 } 317 }
309} 318}
310 319
311static void update_ud_gid(struct mlx4_dev *dev, 320static void update_pkey_index(struct mlx4_dev *dev, int slave,
312 struct mlx4_qp_context *qp_ctx, u8 slave) 321 struct mlx4_cmd_mailbox *inbox)
313{ 322{
314 u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; 323 u8 sched = *(u8 *)(inbox->buf + 64);
324 u8 orig_index = *(u8 *)(inbox->buf + 35);
325 u8 new_index;
326 struct mlx4_priv *priv = mlx4_priv(dev);
327 int port;
328
329 port = (sched >> 6 & 1) + 1;
330
331 new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
332 *(u8 *)(inbox->buf + 35) = new_index;
333
334 mlx4_dbg(dev, "port = %d, orig pkey index = %d, "
335 "new pkey index = %d\n", port, orig_index, new_index);
336}
337
338static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
339 u8 slave)
340{
341 struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
342 enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
343 u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
315 344
316 if (MLX4_QP_ST_UD == ts) 345 if (MLX4_QP_ST_UD == ts)
317 qp_ctx->pri_path.mgid_index = 0x80 | slave; 346 qp_ctx->pri_path.mgid_index = 0x80 | slave;
318 347
348 if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
349 if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
350 qp_ctx->pri_path.mgid_index = slave & 0x7F;
351 if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
352 qp_ctx->alt_path.mgid_index = slave & 0x7F;
353 }
354
319 mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", 355 mlx4_dbg(dev, "slave %d, new gid index: 0x%x ",
320 slave, qp_ctx->pri_path.mgid_index); 356 slave, qp_ctx->pri_path.mgid_index);
321} 357}
@@ -360,8 +396,6 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id,
360 396
361 r->from_state = r->state; 397 r->from_state = r->state;
362 r->state = RES_ANY_BUSY; 398 r->state = RES_ANY_BUSY;
363 mlx4_dbg(dev, "res %s id 0x%llx to busy\n",
364 ResourceType(type), r->res_id);
365 399
366 if (res) 400 if (res)
367 *((struct res_common **)res) = r; 401 *((struct res_common **)res) = r;
@@ -1105,7 +1139,13 @@ static void res_end_move(struct mlx4_dev *dev, int slave,
1105 1139
1106static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) 1140static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn)
1107{ 1141{
1108 return mlx4_is_qp_reserved(dev, qpn); 1142 return mlx4_is_qp_reserved(dev, qpn) &&
1143 (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn));
1144}
1145
1146static int fw_reserved(struct mlx4_dev *dev, int qpn)
1147{
1148 return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
1109} 1149}
1110 1150
1111static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, 1151static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
@@ -1145,7 +1185,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
1145 if (err) 1185 if (err)
1146 return err; 1186 return err;
1147 1187
1148 if (!valid_reserved(dev, slave, qpn)) { 1188 if (!fw_reserved(dev, qpn)) {
1149 err = __mlx4_qp_alloc_icm(dev, qpn); 1189 err = __mlx4_qp_alloc_icm(dev, qpn);
1150 if (err) { 1190 if (err) {
1151 res_abort_move(dev, slave, RES_QP, qpn); 1191 res_abort_move(dev, slave, RES_QP, qpn);
@@ -1498,7 +1538,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
1498 if (err) 1538 if (err)
1499 return err; 1539 return err;
1500 1540
1501 if (!valid_reserved(dev, slave, qpn)) 1541 if (!fw_reserved(dev, qpn))
1502 __mlx4_qp_free_icm(dev, qpn); 1542 __mlx4_qp_free_icm(dev, qpn);
1503 1543
1504 res_end_move(dev, slave, RES_QP, qpn); 1544 res_end_move(dev, slave, RES_QP, qpn);
@@ -1938,6 +1978,19 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc)
1938 return be32_to_cpu(qpc->srqn) & 0x1ffffff; 1978 return be32_to_cpu(qpc->srqn) & 0x1ffffff;
1939} 1979}
1940 1980
1981static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr,
1982 struct mlx4_qp_context *context)
1983{
1984 u32 qpn = vhcr->in_modifier & 0xffffff;
1985 u32 qkey = 0;
1986
1987 if (mlx4_get_parav_qkey(dev, qpn, &qkey))
1988 return;
1989
1990 /* adjust qkey in qp context */
1991 context->qkey = cpu_to_be32(qkey);
1992}
1993
1941int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, 1994int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
1942 struct mlx4_vhcr *vhcr, 1995 struct mlx4_vhcr *vhcr,
1943 struct mlx4_cmd_mailbox *inbox, 1996 struct mlx4_cmd_mailbox *inbox,
@@ -1990,6 +2043,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
1990 goto ex_put_scq; 2043 goto ex_put_scq;
1991 } 2044 }
1992 2045
2046 adjust_proxy_tun_qkey(dev, vhcr, qpc);
2047 update_pkey_index(dev, slave, inbox);
1993 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); 2048 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
1994 if (err) 2049 if (err)
1995 goto ex_put_srq; 2050 goto ex_put_srq;
@@ -2135,6 +2190,48 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
2135 return err; 2190 return err;
2136} 2191}
2137 2192
2193static int verify_qp_parameters(struct mlx4_dev *dev,
2194 struct mlx4_cmd_mailbox *inbox,
2195 enum qp_transition transition, u8 slave)
2196{
2197 u32 qp_type;
2198 struct mlx4_qp_context *qp_ctx;
2199 enum mlx4_qp_optpar optpar;
2200
2201 qp_ctx = inbox->buf + 8;
2202 qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
2203 optpar = be32_to_cpu(*(__be32 *) inbox->buf);
2204
2205 switch (qp_type) {
2206 case MLX4_QP_ST_RC:
2207 case MLX4_QP_ST_UC:
2208 switch (transition) {
2209 case QP_TRANS_INIT2RTR:
2210 case QP_TRANS_RTR2RTS:
2211 case QP_TRANS_RTS2RTS:
2212 case QP_TRANS_SQD2SQD:
2213 case QP_TRANS_SQD2RTS:
2214 if (slave != mlx4_master_func_num(dev))
2215 /* slaves have only gid index 0 */
2216 if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
2217 if (qp_ctx->pri_path.mgid_index)
2218 return -EINVAL;
2219 if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
2220 if (qp_ctx->alt_path.mgid_index)
2221 return -EINVAL;
2222 break;
2223 default:
2224 break;
2225 }
2226
2227 break;
2228 default:
2229 break;
2230 }
2231
2232 return 0;
2233}
2234
2138int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, 2235int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
2139 struct mlx4_vhcr *vhcr, 2236 struct mlx4_vhcr *vhcr,
2140 struct mlx4_cmd_mailbox *inbox, 2237 struct mlx4_cmd_mailbox *inbox,
@@ -2622,16 +2719,123 @@ out:
2622 return err; 2719 return err;
2623} 2720}
2624 2721
2722int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
2723 struct mlx4_vhcr *vhcr,
2724 struct mlx4_cmd_mailbox *inbox,
2725 struct mlx4_cmd_mailbox *outbox,
2726 struct mlx4_cmd_info *cmd)
2727{
2728 struct mlx4_qp_context *context = inbox->buf + 8;
2729 adjust_proxy_tun_qkey(dev, vhcr, context);
2730 update_pkey_index(dev, slave, inbox);
2731 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2732}
2733
2625int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, 2734int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
2626 struct mlx4_vhcr *vhcr, 2735 struct mlx4_vhcr *vhcr,
2627 struct mlx4_cmd_mailbox *inbox, 2736 struct mlx4_cmd_mailbox *inbox,
2628 struct mlx4_cmd_mailbox *outbox, 2737 struct mlx4_cmd_mailbox *outbox,
2629 struct mlx4_cmd_info *cmd) 2738 struct mlx4_cmd_info *cmd)
2630{ 2739{
2740 int err;
2631 struct mlx4_qp_context *qpc = inbox->buf + 8; 2741 struct mlx4_qp_context *qpc = inbox->buf + 8;
2632 2742
2633 update_ud_gid(dev, qpc, (u8)slave); 2743 err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
2744 if (err)
2745 return err;
2746
2747 update_pkey_index(dev, slave, inbox);
2748 update_gid(dev, inbox, (u8)slave);
2749 adjust_proxy_tun_qkey(dev, vhcr, qpc);
2750
2751 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2752}
2753
2754int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
2755 struct mlx4_vhcr *vhcr,
2756 struct mlx4_cmd_mailbox *inbox,
2757 struct mlx4_cmd_mailbox *outbox,
2758 struct mlx4_cmd_info *cmd)
2759{
2760 int err;
2761 struct mlx4_qp_context *context = inbox->buf + 8;
2762
2763 err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
2764 if (err)
2765 return err;
2766
2767 update_pkey_index(dev, slave, inbox);
2768 update_gid(dev, inbox, (u8)slave);
2769 adjust_proxy_tun_qkey(dev, vhcr, context);
2770 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2771}
2772
2773int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
2774 struct mlx4_vhcr *vhcr,
2775 struct mlx4_cmd_mailbox *inbox,
2776 struct mlx4_cmd_mailbox *outbox,
2777 struct mlx4_cmd_info *cmd)
2778{
2779 int err;
2780 struct mlx4_qp_context *context = inbox->buf + 8;
2781
2782 err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
2783 if (err)
2784 return err;
2785
2786 update_pkey_index(dev, slave, inbox);
2787 update_gid(dev, inbox, (u8)slave);
2788 adjust_proxy_tun_qkey(dev, vhcr, context);
2789 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2790}
2791
2792
2793int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
2794 struct mlx4_vhcr *vhcr,
2795 struct mlx4_cmd_mailbox *inbox,
2796 struct mlx4_cmd_mailbox *outbox,
2797 struct mlx4_cmd_info *cmd)
2798{
2799 struct mlx4_qp_context *context = inbox->buf + 8;
2800 adjust_proxy_tun_qkey(dev, vhcr, context);
2801 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2802}
2803
2804int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
2805 struct mlx4_vhcr *vhcr,
2806 struct mlx4_cmd_mailbox *inbox,
2807 struct mlx4_cmd_mailbox *outbox,
2808 struct mlx4_cmd_info *cmd)
2809{
2810 int err;
2811 struct mlx4_qp_context *context = inbox->buf + 8;
2812
2813 err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
2814 if (err)
2815 return err;
2816
2817 adjust_proxy_tun_qkey(dev, vhcr, context);
2818 update_gid(dev, inbox, (u8)slave);
2819 update_pkey_index(dev, slave, inbox);
2820 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2821}
2822
2823int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
2824 struct mlx4_vhcr *vhcr,
2825 struct mlx4_cmd_mailbox *inbox,
2826 struct mlx4_cmd_mailbox *outbox,
2827 struct mlx4_cmd_info *cmd)
2828{
2829 int err;
2830 struct mlx4_qp_context *context = inbox->buf + 8;
2831
2832 err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
2833 if (err)
2834 return err;
2634 2835
2836 adjust_proxy_tun_qkey(dev, vhcr, context);
2837 update_gid(dev, inbox, (u8)slave);
2838 update_pkey_index(dev, slave, inbox);
2635 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); 2839 return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2636} 2840}
2637 2841
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6e1b0f973a0..6d1acb04cd1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -54,7 +54,13 @@ enum {
54}; 54};
55 55
56enum { 56enum {
57 MLX4_MAX_PORTS = 2 57 MLX4_PORT_CAP_IS_SM = 1 << 1,
58 MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
59};
60
61enum {
62 MLX4_MAX_PORTS = 2,
63 MLX4_MAX_PORT_PKEYS = 128
58}; 64};
59 65
60/* base qkey for use in sriov tunnel-qp/proxy-qp communication. 66/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
@@ -191,6 +197,25 @@ enum {
191 MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, 197 MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
192}; 198};
193 199
200enum slave_port_state {
201 SLAVE_PORT_DOWN = 0,
202 SLAVE_PENDING_UP,
203 SLAVE_PORT_UP,
204};
205
206enum slave_port_gen_event {
207 SLAVE_PORT_GEN_EVENT_DOWN = 0,
208 SLAVE_PORT_GEN_EVENT_UP,
209 SLAVE_PORT_GEN_EVENT_NONE,
210};
211
212enum slave_port_state_event {
213 MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
214 MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
215 MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
216 MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
217};
218
194enum { 219enum {
195 MLX4_PERM_LOCAL_READ = 1 << 10, 220 MLX4_PERM_LOCAL_READ = 1 << 10,
196 MLX4_PERM_LOCAL_WRITE = 1 << 11, 221 MLX4_PERM_LOCAL_WRITE = 1 << 11,
@@ -303,6 +328,9 @@ struct mlx4_phys_caps {
303 u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; 328 u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
304 u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; 329 u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
305 u32 num_phys_eqs; 330 u32 num_phys_eqs;
331 u32 base_sqpn;
332 u32 base_proxy_sqpn;
333 u32 base_tunnel_sqpn;
306}; 334};
307 335
308struct mlx4_caps { 336struct mlx4_caps {
@@ -333,9 +361,10 @@ struct mlx4_caps {
333 int max_rq_desc_sz; 361 int max_rq_desc_sz;
334 int max_qp_init_rdma; 362 int max_qp_init_rdma;
335 int max_qp_dest_rdma; 363 int max_qp_dest_rdma;
336 int sqp_start; 364 u32 *qp0_proxy;
337 u32 base_sqpn; 365 u32 *qp1_proxy;
338 u32 base_tunnel_sqpn; 366 u32 *qp0_tunnel;
367 u32 *qp1_tunnel;
339 int num_srqs; 368 int num_srqs;
340 int max_srq_wqes; 369 int max_srq_wqes;
341 int max_srq_sge; 370 int max_srq_sge;
@@ -389,6 +418,7 @@ struct mlx4_caps {
389 enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; 418 enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
390 u32 max_counters; 419 u32 max_counters;
391 u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; 420 u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
421 u16 sqp_demux;
392}; 422};
393 423
394struct mlx4_buf_list { 424struct mlx4_buf_list {
@@ -671,6 +701,10 @@ struct mlx4_init_port_param {
671 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ 701 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
672 if ((type) == (dev)->caps.port_mask[(port)]) 702 if ((type) == (dev)->caps.port_mask[(port)])
673 703
704#define mlx4_foreach_non_ib_transport_port(port, dev) \
705 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
706 if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
707
674#define mlx4_foreach_ib_transport_port(port, dev) \ 708#define mlx4_foreach_ib_transport_port(port, dev) \
675 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ 709 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
676 if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ 710 if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
@@ -692,7 +726,18 @@ static inline int mlx4_is_master(struct mlx4_dev *dev)
692 726
693static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) 727static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
694{ 728{
695 return (qpn < dev->caps.sqp_start + 8); 729 return (qpn < dev->phys_caps.base_sqpn + 8 +
730 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
731}
732
733static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
734{
735 int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
736
737 if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
738 return 1;
739
740 return 0;
696} 741}
697 742
698static inline int mlx4_is_mfunc(struct mlx4_dev *dev) 743static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
@@ -927,6 +972,20 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
927 struct mlx4_net_trans_rule *rule, u64 *reg_id); 972 struct mlx4_net_trans_rule *rule, u64 *reg_id);
928int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); 973int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
929 974
975void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
976 int i, int val);
977
930int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); 978int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
931 979
980int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
981int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
982int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
983int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr);
984int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
985enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
986int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
987
988void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
989__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
990
932#endif /* MLX4_DEVICE_H */ 991#endif /* MLX4_DEVICE_H */
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index d813704b963..c257e1b211b 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -45,6 +45,8 @@ enum mlx4_dev_event {
45 MLX4_DEV_EVENT_PORT_DOWN, 45 MLX4_DEV_EVENT_PORT_DOWN,
46 MLX4_DEV_EVENT_PORT_REINIT, 46 MLX4_DEV_EVENT_PORT_REINIT,
47 MLX4_DEV_EVENT_PORT_MGMT_CHANGE, 47 MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
48 MLX4_DEV_EVENT_SLAVE_INIT,
49 MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
48}; 50};
49 51
50struct mlx4_interface { 52struct mlx4_interface {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 338388ba260..4b4ad6ffef9 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -126,7 +126,8 @@ struct mlx4_rss_context {
126 126
127struct mlx4_qp_path { 127struct mlx4_qp_path {
128 u8 fl; 128 u8 fl;
129 u8 reserved1[2]; 129 u8 reserved1[1];
130 u8 disable_pkey_check;
130 u8 pkey_index; 131 u8 pkey_index;
131 u8 counter_index; 132 u8 counter_index;
132 u8 grh_mylmc; 133 u8 grh_mylmc;
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 00a2b8ec327..ad9a3c28094 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -101,6 +101,22 @@ int ib_find_cached_pkey(struct ib_device *device,
101 u16 *index); 101 u16 *index);
102 102
103/** 103/**
104 * ib_find_exact_cached_pkey - Returns the PKey table index where a specified
105 * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
106 * @device: The device to query.
107 * @port_num: The port number of the device to search for the PKey.
108 * @pkey: The PKey value to search for.
109 * @index: The index into the cached PKey table where the PKey was found.
110 *
111 * ib_find_exact_cached_pkey() searches the specified PKey table in
112 * the local software cache.
113 */
114int ib_find_exact_cached_pkey(struct ib_device *device,
115 u8 port_num,
116 u16 pkey,
117 u16 *index);
118
119/**
104 * ib_get_cached_lmc - Returns a cached lmc table entry 120 * ib_get_cached_lmc - Returns a cached lmc table entry
105 * @device: The device to query. 121 * @device: The device to query.
106 * @port_num: The port number of the device to query. 122 * @port_num: The port number of the device to query.
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 07996af8265..46bc045bbe1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -614,6 +614,9 @@ enum ib_qp_type {
614enum ib_qp_create_flags { 614enum ib_qp_create_flags {
615 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, 615 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
616 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 616 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
617 /* reserve bits 26-31 for low level drivers' internal use */
618 IB_QP_CREATE_RESERVED_START = 1 << 26,
619 IB_QP_CREATE_RESERVED_END = 1 << 31,
617}; 620};
618 621
619struct ib_qp_init_attr { 622struct ib_qp_init_attr {