aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.com>2013-07-07 10:25:49 -0400
committerRoland Dreier <roland@purestorage.com>2013-07-08 13:32:24 -0400
commite126ba97dba9edeb6fafa3665b5f8497fc9cdf8c (patch)
treec886014a89a8a96b8fb171ad6683dc80ce2ff018 /drivers/infiniband
parent0134f16bc91cc15a38c867b81568b791c9b626aa (diff)
mlx5: Add driver for Mellanox Connect-IB adapters
The driver is comprised of two kernel modules: mlx5_ib and mlx5_core. This partitioning resembles what we have for mlx4, except that mlx5_ib is the pci device driver and not mlx5_core. mlx5_core is essentially a library that provides general functionality that is intended to be used by other Mellanox devices that will be introduced in the future. mlx5_ib has a similar role as any hardware device under drivers/infiniband/hw. Signed-off-by: Eli Cohen <eli@mellanox.com> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> [ Merge in coccinelle fixes from Fengguang Wu <fengguang.wu@intel.com>. - Roland ] Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig10
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c92
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c843
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c100
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c139
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1504
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c162
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h545
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1007
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2524
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c473
-rw-r--r--drivers/infiniband/hw/mlx5/user.h121
15 files changed, 7525 insertions, 0 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c85b56c28099..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig"
50source "drivers/infiniband/hw/cxgb3/Kconfig" 50source "drivers/infiniband/hw/cxgb3/Kconfig"
51source "drivers/infiniband/hw/cxgb4/Kconfig" 51source "drivers/infiniband/hw/cxgb4/Kconfig"
52source "drivers/infiniband/hw/mlx4/Kconfig" 52source "drivers/infiniband/hw/mlx4/Kconfig"
53source "drivers/infiniband/hw/mlx5/Kconfig"
53source "drivers/infiniband/hw/nes/Kconfig" 54source "drivers/infiniband/hw/nes/Kconfig"
54source "drivers/infiniband/hw/ocrdma/Kconfig" 55source "drivers/infiniband/hw/ocrdma/Kconfig"
55 56
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index b126fefe0b1c..1fe69888515f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ 7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ 8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ 9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
10obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
10obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ 11obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
11obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ 12obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
12obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 13obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644
index 000000000000..8e6aebfaf8a4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -0,0 +1,10 @@
1config MLX5_INFINIBAND
2 tristate "Mellanox Connect-IB HCA support"
3 depends on NETDEVICES && ETHERNET && PCI && X86
4 select NET_VENDOR_MELLANOX
5 select MLX5_CORE
6 ---help---
7 This driver provides low-level InfiniBand support for
8 Mellanox Connect-IB PCI Express host channel adapters (HCAs).
9 This is required to use InfiniBand protocols such as
10 IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644
index 000000000000..4ea0135af484
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
2
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644
index 000000000000..39ab0caefdf9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx5_ib.h"
34
35struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
36 struct mlx5_ib_ah *ah)
37{
38 if (ah_attr->ah_flags & IB_AH_GRH) {
39 memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
40 ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
41 (1 << 30) |
42 ah_attr->grh.sgid_index << 20);
43 ah->av.hop_limit = ah_attr->grh.hop_limit;
44 ah->av.tclass = ah_attr->grh.traffic_class;
45 }
46
47 ah->av.rlid = cpu_to_be16(ah_attr->dlid);
48 ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
49 ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
50
51 return &ah->ibah;
52}
53
54struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
55{
56 struct mlx5_ib_ah *ah;
57
58 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
59 if (!ah)
60 return ERR_PTR(-ENOMEM);
61
62 return create_ib_ah(ah_attr, ah); /* never fails */
63}
64
65int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
66{
67 struct mlx5_ib_ah *ah = to_mah(ibah);
68 u32 tmp;
69
70 memset(ah_attr, 0, sizeof(*ah_attr));
71
72 tmp = be32_to_cpu(ah->av.grh_gid_fl);
73 if (tmp & (1 << 30)) {
74 ah_attr->ah_flags = IB_AH_GRH;
75 ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
76 ah_attr->grh.flow_label = tmp & 0xfffff;
77 memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
78 ah_attr->grh.hop_limit = ah->av.hop_limit;
79 ah_attr->grh.traffic_class = ah->av.tclass;
80 }
81 ah_attr->dlid = be16_to_cpu(ah->av.rlid);
82 ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
83 ah_attr->sl = ah->av.stat_rate_sl & 0xf;
84
85 return 0;
86}
87
88int mlx5_ib_destroy_ah(struct ib_ah *ah)
89{
90 kfree(to_mah(ah));
91 return 0;
92}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644
index 000000000000..344ab03948a3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -0,0 +1,843 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
39{
40 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
41
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
46{
47 struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
48 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
49 struct ib_cq *ibcq = &cq->ibcq;
50 struct ib_event event;
51
52 if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
53 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
54 type, mcq->cqn);
55 return;
56 }
57
58 if (ibcq->event_handler) {
59 event.device = &dev->ib_dev;
60 event.event = IB_EVENT_CQ_ERR;
61 event.element.cq = ibcq;
62 ibcq->event_handler(&event, ibcq->cq_context);
63 }
64}
65
66static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
67{
68 return mlx5_buf_offset(&buf->buf, n * size);
69}
70
71static void *get_cqe(struct mlx5_ib_cq *cq, int n)
72{
73 return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
74}
75
76static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
77{
78 void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
79 struct mlx5_cqe64 *cqe64;
80
81 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
82 return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
84}
85
86static void *next_cqe_sw(struct mlx5_ib_cq *cq)
87{
88 return get_sw_cqe(cq, cq->mcq.cons_index);
89}
90
91static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
92{
93 switch (wq->wr_data[idx]) {
94 case MLX5_IB_WR_UMR:
95 return 0;
96
97 case IB_WR_LOCAL_INV:
98 return IB_WC_LOCAL_INV;
99
100 case IB_WR_FAST_REG_MR:
101 return IB_WC_FAST_REG_MR;
102
103 default:
104 pr_warn("unknown completion status\n");
105 return 0;
106 }
107}
108
109static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
110 struct mlx5_ib_wq *wq, int idx)
111{
112 wc->wc_flags = 0;
113 switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
114 case MLX5_OPCODE_RDMA_WRITE_IMM:
115 wc->wc_flags |= IB_WC_WITH_IMM;
116 case MLX5_OPCODE_RDMA_WRITE:
117 wc->opcode = IB_WC_RDMA_WRITE;
118 break;
119 case MLX5_OPCODE_SEND_IMM:
120 wc->wc_flags |= IB_WC_WITH_IMM;
121 case MLX5_OPCODE_SEND:
122 case MLX5_OPCODE_SEND_INVAL:
123 wc->opcode = IB_WC_SEND;
124 break;
125 case MLX5_OPCODE_RDMA_READ:
126 wc->opcode = IB_WC_RDMA_READ;
127 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
128 break;
129 case MLX5_OPCODE_ATOMIC_CS:
130 wc->opcode = IB_WC_COMP_SWAP;
131 wc->byte_len = 8;
132 break;
133 case MLX5_OPCODE_ATOMIC_FA:
134 wc->opcode = IB_WC_FETCH_ADD;
135 wc->byte_len = 8;
136 break;
137 case MLX5_OPCODE_ATOMIC_MASKED_CS:
138 wc->opcode = IB_WC_MASKED_COMP_SWAP;
139 wc->byte_len = 8;
140 break;
141 case MLX5_OPCODE_ATOMIC_MASKED_FA:
142 wc->opcode = IB_WC_MASKED_FETCH_ADD;
143 wc->byte_len = 8;
144 break;
145 case MLX5_OPCODE_BIND_MW:
146 wc->opcode = IB_WC_BIND_MW;
147 break;
148 case MLX5_OPCODE_UMR:
149 wc->opcode = get_umr_comp(wq, idx);
150 break;
151 }
152}
153
154enum {
155 MLX5_GRH_IN_BUFFER = 1,
156 MLX5_GRH_IN_CQE = 2,
157};
158
159static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
160 struct mlx5_ib_qp *qp)
161{
162 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
163 struct mlx5_ib_srq *srq;
164 struct mlx5_ib_wq *wq;
165 u16 wqe_ctr;
166 u8 g;
167
168 if (qp->ibqp.srq || qp->ibqp.xrcd) {
169 struct mlx5_core_srq *msrq = NULL;
170
171 if (qp->ibqp.xrcd) {
172 msrq = mlx5_core_get_srq(&dev->mdev,
173 be32_to_cpu(cqe->srqn));
174 srq = to_mibsrq(msrq);
175 } else {
176 srq = to_msrq(qp->ibqp.srq);
177 }
178 if (srq) {
179 wqe_ctr = be16_to_cpu(cqe->wqe_counter);
180 wc->wr_id = srq->wrid[wqe_ctr];
181 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
182 if (msrq && atomic_dec_and_test(&msrq->refcount))
183 complete(&msrq->free);
184 }
185 } else {
186 wq = &qp->rq;
187 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
188 ++wq->tail;
189 }
190 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
191
192 switch (cqe->op_own >> 4) {
193 case MLX5_CQE_RESP_WR_IMM:
194 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
195 wc->wc_flags = IB_WC_WITH_IMM;
196 wc->ex.imm_data = cqe->imm_inval_pkey;
197 break;
198 case MLX5_CQE_RESP_SEND:
199 wc->opcode = IB_WC_RECV;
200 wc->wc_flags = 0;
201 break;
202 case MLX5_CQE_RESP_SEND_IMM:
203 wc->opcode = IB_WC_RECV;
204 wc->wc_flags = IB_WC_WITH_IMM;
205 wc->ex.imm_data = cqe->imm_inval_pkey;
206 break;
207 case MLX5_CQE_RESP_SEND_INV:
208 wc->opcode = IB_WC_RECV;
209 wc->wc_flags = IB_WC_WITH_INVALIDATE;
210 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
211 break;
212 }
213 wc->slid = be16_to_cpu(cqe->slid);
214 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
215 wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
216 wc->dlid_path_bits = cqe->ml_path;
217 g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
218 wc->wc_flags |= g ? IB_WC_GRH : 0;
219 wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
220}
221
222static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
223{
224 __be32 *p = (__be32 *)cqe;
225 int i;
226
227 mlx5_ib_warn(dev, "dump error cqe\n");
228 for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
229 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
230 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
231 be32_to_cpu(p[3]));
232}
233
234static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
235 struct mlx5_err_cqe *cqe,
236 struct ib_wc *wc)
237{
238 int dump = 1;
239
240 switch (cqe->syndrome) {
241 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
242 wc->status = IB_WC_LOC_LEN_ERR;
243 break;
244 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
245 wc->status = IB_WC_LOC_QP_OP_ERR;
246 break;
247 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
248 wc->status = IB_WC_LOC_PROT_ERR;
249 break;
250 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
251 dump = 0;
252 wc->status = IB_WC_WR_FLUSH_ERR;
253 break;
254 case MLX5_CQE_SYNDROME_MW_BIND_ERR:
255 wc->status = IB_WC_MW_BIND_ERR;
256 break;
257 case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
258 wc->status = IB_WC_BAD_RESP_ERR;
259 break;
260 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
261 wc->status = IB_WC_LOC_ACCESS_ERR;
262 break;
263 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
264 wc->status = IB_WC_REM_INV_REQ_ERR;
265 break;
266 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
267 wc->status = IB_WC_REM_ACCESS_ERR;
268 break;
269 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
270 wc->status = IB_WC_REM_OP_ERR;
271 break;
272 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
273 wc->status = IB_WC_RETRY_EXC_ERR;
274 dump = 0;
275 break;
276 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
277 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
278 dump = 0;
279 break;
280 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
281 wc->status = IB_WC_REM_ABORT_ERR;
282 break;
283 default:
284 wc->status = IB_WC_GENERAL_ERR;
285 break;
286 }
287
288 wc->vendor_err = cqe->vendor_err_synd;
289 if (dump)
290 dump_cqe(dev, cqe);
291}
292
293static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
294{
295 /* TBD: waiting decision
296 */
297 return 0;
298}
299
300static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
301{
302 struct mlx5_wqe_data_seg *dpseg;
303 void *addr;
304
305 dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
306 sizeof(struct mlx5_wqe_raddr_seg) +
307 sizeof(struct mlx5_wqe_atomic_seg);
308 addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
309 return addr;
310}
311
312static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
313 uint16_t idx)
314{
315 void *addr;
316 int byte_count;
317 int i;
318
319 if (!is_atomic_response(qp, idx))
320 return;
321
322 byte_count = be32_to_cpu(cqe64->byte_cnt);
323 addr = mlx5_get_atomic_laddr(qp, idx);
324
325 if (byte_count == 4) {
326 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
327 } else {
328 for (i = 0; i < byte_count; i += 8) {
329 *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
330 addr += 8;
331 }
332 }
333
334 return;
335}
336
337static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
338 u16 tail, u16 head)
339{
340 int idx;
341
342 do {
343 idx = tail & (qp->sq.wqe_cnt - 1);
344 handle_atomic(qp, cqe64, idx);
345 if (idx == head)
346 break;
347
348 tail = qp->sq.w_list[idx].next;
349 } while (1);
350 tail = qp->sq.w_list[idx].next;
351 qp->sq.last_poll = tail;
352}
353
354static int mlx5_poll_one(struct mlx5_ib_cq *cq,
355 struct mlx5_ib_qp **cur_qp,
356 struct ib_wc *wc)
357{
358 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
359 struct mlx5_err_cqe *err_cqe;
360 struct mlx5_cqe64 *cqe64;
361 struct mlx5_core_qp *mqp;
362 struct mlx5_ib_wq *wq;
363 uint8_t opcode;
364 uint32_t qpn;
365 u16 wqe_ctr;
366 void *cqe;
367 int idx;
368
369 cqe = next_cqe_sw(cq);
370 if (!cqe)
371 return -EAGAIN;
372
373 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
374
375 ++cq->mcq.cons_index;
376
377 /* Make sure we read CQ entry contents after we've checked the
378 * ownership bit.
379 */
380 rmb();
381
382 /* TBD: resize CQ */
383
384 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
385 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
386 /* We do not have to take the QP table lock here,
387 * because CQs will be locked while QPs are removed
388 * from the table.
389 */
390 mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
391 if (unlikely(!mqp)) {
392 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
393 cq->mcq.cqn, qpn);
394 return -EINVAL;
395 }
396
397 *cur_qp = to_mibqp(mqp);
398 }
399
400 wc->qp = &(*cur_qp)->ibqp;
401 opcode = cqe64->op_own >> 4;
402 switch (opcode) {
403 case MLX5_CQE_REQ:
404 wq = &(*cur_qp)->sq;
405 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
406 idx = wqe_ctr & (wq->wqe_cnt - 1);
407 handle_good_req(wc, cqe64, wq, idx);
408 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
409 wc->wr_id = wq->wrid[idx];
410 wq->tail = wq->wqe_head[idx] + 1;
411 wc->status = IB_WC_SUCCESS;
412 break;
413 case MLX5_CQE_RESP_WR_IMM:
414 case MLX5_CQE_RESP_SEND:
415 case MLX5_CQE_RESP_SEND_IMM:
416 case MLX5_CQE_RESP_SEND_INV:
417 handle_responder(wc, cqe64, *cur_qp);
418 wc->status = IB_WC_SUCCESS;
419 break;
420 case MLX5_CQE_RESIZE_CQ:
421 break;
422 case MLX5_CQE_REQ_ERR:
423 case MLX5_CQE_RESP_ERR:
424 err_cqe = (struct mlx5_err_cqe *)cqe64;
425 mlx5_handle_error_cqe(dev, err_cqe, wc);
426 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
427 opcode == MLX5_CQE_REQ_ERR ?
428 "Requestor" : "Responder", cq->mcq.cqn);
429 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
430 err_cqe->syndrome, err_cqe->vendor_err_synd);
431 if (opcode == MLX5_CQE_REQ_ERR) {
432 wq = &(*cur_qp)->sq;
433 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
434 idx = wqe_ctr & (wq->wqe_cnt - 1);
435 wc->wr_id = wq->wrid[idx];
436 wq->tail = wq->wqe_head[idx] + 1;
437 } else {
438 struct mlx5_ib_srq *srq;
439
440 if ((*cur_qp)->ibqp.srq) {
441 srq = to_msrq((*cur_qp)->ibqp.srq);
442 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
443 wc->wr_id = srq->wrid[wqe_ctr];
444 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
445 } else {
446 wq = &(*cur_qp)->rq;
447 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
448 ++wq->tail;
449 }
450 }
451 break;
452 }
453
454 return 0;
455}
456
457int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
458{
459 struct mlx5_ib_cq *cq = to_mcq(ibcq);
460 struct mlx5_ib_qp *cur_qp = NULL;
461 unsigned long flags;
462 int npolled;
463 int err = 0;
464
465 spin_lock_irqsave(&cq->lock, flags);
466
467 for (npolled = 0; npolled < num_entries; npolled++) {
468 err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
469 if (err)
470 break;
471 }
472
473 if (npolled)
474 mlx5_cq_set_ci(&cq->mcq);
475
476 spin_unlock_irqrestore(&cq->lock, flags);
477
478 if (err == 0 || err == -EAGAIN)
479 return npolled;
480 else
481 return err;
482}
483
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
485{
486 mlx5_cq_arm(&to_mcq(ibcq)->mcq,
487 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
488 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
489 to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
490 MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
491
492 return 0;
493}
494
495static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
496 int nent, int cqe_size)
497{
498 int err;
499
500 err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
501 PAGE_SIZE * 2, &buf->buf);
502 if (err)
503 return err;
504
505 buf->cqe_size = cqe_size;
506
507 return 0;
508}
509
510static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
511{
512 mlx5_buf_free(&dev->mdev, &buf->buf);
513}
514
515static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
516 struct ib_ucontext *context, struct mlx5_ib_cq *cq,
517 int entries, struct mlx5_create_cq_mbox_in **cqb,
518 int *cqe_size, int *index, int *inlen)
519{
520 struct mlx5_ib_create_cq ucmd;
521 int page_shift;
522 int npages;
523 int ncont;
524 int err;
525
526 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
527 return -EFAULT;
528
529 if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
530 return -EINVAL;
531
532 *cqe_size = ucmd.cqe_size;
533
534 cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
535 entries * ucmd.cqe_size,
536 IB_ACCESS_LOCAL_WRITE, 1);
537 if (IS_ERR(cq->buf.umem)) {
538 err = PTR_ERR(cq->buf.umem);
539 return err;
540 }
541
542 err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
543 &cq->db);
544 if (err)
545 goto err_umem;
546
547 mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
548 &ncont, NULL);
549 mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
550 ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
551
552 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
553 *cqb = mlx5_vzalloc(*inlen);
554 if (!*cqb) {
555 err = -ENOMEM;
556 goto err_db;
557 }
558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
559 (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
560
561 *index = to_mucontext(context)->uuari.uars[0].index;
562
563 return 0;
564
565err_db:
566 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
567
568err_umem:
569 ib_umem_release(cq->buf.umem);
570 return err;
571}
572
573static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
574{
575 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
576 ib_umem_release(cq->buf.umem);
577}
578
579static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
580{
581 int i;
582 void *cqe;
583 struct mlx5_cqe64 *cqe64;
584
585 for (i = 0; i < nent; i++) {
586 cqe = get_cqe(cq, i);
587 cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
588 cqe64->op_own = 0xf1;
589 }
590}
591
592static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
593 int entries, int cqe_size,
594 struct mlx5_create_cq_mbox_in **cqb,
595 int *index, int *inlen)
596{
597 int err;
598
599 err = mlx5_db_alloc(&dev->mdev, &cq->db);
600 if (err)
601 return err;
602
603 cq->mcq.set_ci_db = cq->db.db;
604 cq->mcq.arm_db = cq->db.db + 1;
605 *cq->mcq.set_ci_db = 0;
606 *cq->mcq.arm_db = 0;
607 cq->mcq.cqe_sz = cqe_size;
608
609 err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
610 if (err)
611 goto err_db;
612
613 init_cq_buf(cq, entries);
614
615 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
616 *cqb = mlx5_vzalloc(*inlen);
617 if (!*cqb) {
618 err = -ENOMEM;
619 goto err_buf;
620 }
621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
622
623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
624 *index = dev->mdev.priv.uuari.uars[0].index;
625
626 return 0;
627
628err_buf:
629 free_cq_buf(dev, &cq->buf);
630
631err_db:
632 mlx5_db_free(&dev->mdev, &cq->db);
633 return err;
634}
635
636static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
637{
638 free_cq_buf(dev, &cq->buf);
639 mlx5_db_free(&dev->mdev, &cq->db);
640}
641
642struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
643 int vector, struct ib_ucontext *context,
644 struct ib_udata *udata)
645{
646 struct mlx5_create_cq_mbox_in *cqb = NULL;
647 struct mlx5_ib_dev *dev = to_mdev(ibdev);
648 struct mlx5_ib_cq *cq;
649 int uninitialized_var(index);
650 int uninitialized_var(inlen);
651 int cqe_size;
652 int irqn;
653 int eqn;
654 int err;
655
656 entries = roundup_pow_of_two(entries + 1);
657 if (entries < 1 || entries > dev->mdev.caps.max_cqes)
658 return ERR_PTR(-EINVAL);
659
660 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
661 if (!cq)
662 return ERR_PTR(-ENOMEM);
663
664 cq->ibcq.cqe = entries - 1;
665 mutex_init(&cq->resize_mutex);
666 spin_lock_init(&cq->lock);
667 cq->resize_buf = NULL;
668 cq->resize_umem = NULL;
669
670 if (context) {
671 err = create_cq_user(dev, udata, context, cq, entries,
672 &cqb, &cqe_size, &index, &inlen);
673 if (err)
674 goto err_create;
675 } else {
676 /* for now choose 64 bytes till we have a proper interface */
677 cqe_size = 64;
678 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
679 &index, &inlen);
680 if (err)
681 goto err_create;
682 }
683
684 cq->cqe_size = cqe_size;
685 cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
686 cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
687 err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
688 if (err)
689 goto err_cqb;
690
691 cqb->ctx.c_eqn = cpu_to_be16(eqn);
692 cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
693
694 err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
695 if (err)
696 goto err_cqb;
697
698 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
699 cq->mcq.irqn = irqn;
700 cq->mcq.comp = mlx5_ib_cq_comp;
701 cq->mcq.event = mlx5_ib_cq_event;
702
703 if (context)
704 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
705 err = -EFAULT;
706 goto err_cmd;
707 }
708
709
710 mlx5_vfree(cqb);
711 return &cq->ibcq;
712
713err_cmd:
714 mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
715
716err_cqb:
717 mlx5_vfree(cqb);
718 if (context)
719 destroy_cq_user(cq, context);
720 else
721 destroy_cq_kernel(dev, cq);
722
723err_create:
724 kfree(cq);
725
726 return ERR_PTR(err);
727}
728
729
730int mlx5_ib_destroy_cq(struct ib_cq *cq)
731{
732 struct mlx5_ib_dev *dev = to_mdev(cq->device);
733 struct mlx5_ib_cq *mcq = to_mcq(cq);
734 struct ib_ucontext *context = NULL;
735
736 if (cq->uobject)
737 context = cq->uobject->context;
738
739 mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
740 if (context)
741 destroy_cq_user(mcq, context);
742 else
743 destroy_cq_kernel(dev, mcq);
744
745 kfree(mcq);
746
747 return 0;
748}
749
750static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
751 u32 rsn)
752{
753 u32 lrsn;
754
755 if (srq)
756 lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
757 else
758 lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
759
760 return rsn == lrsn;
761}
762
763void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
764{
765 struct mlx5_cqe64 *cqe64, *dest64;
766 void *cqe, *dest;
767 u32 prod_index;
768 int nfreed = 0;
769 u8 owner_bit;
770
771 if (!cq)
772 return;
773
774 /* First we need to find the current producer index, so we
775 * know where to start cleaning from. It doesn't matter if HW
776 * adds new entries after this loop -- the QP we're worried
777 * about is already in RESET, so the new entries won't come
778 * from our QP and therefore don't need to be checked.
779 */
780 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
781 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
782 break;
783
784 /* Now sweep backwards through the CQ, removing CQ entries
785 * that match our QP by copying older entries on top of them.
786 */
787 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
788 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
789 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
790 if (is_equal_rsn(cqe64, srq, rsn)) {
791 if (srq)
792 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
793 ++nfreed;
794 } else if (nfreed) {
795 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
796 dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
797 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
798 memcpy(dest, cqe, cq->mcq.cqe_sz);
799 dest64->op_own = owner_bit |
800 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
801 }
802 }
803
804 if (nfreed) {
805 cq->mcq.cons_index += nfreed;
806 /* Make sure update of buffer contents is done before
807 * updating consumer index.
808 */
809 wmb();
810 mlx5_cq_set_ci(&cq->mcq);
811 }
812}
813
814void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
815{
816 if (!cq)
817 return;
818
819 spin_lock_irq(&cq->lock);
820 __mlx5_ib_cq_clean(cq, qpn, srq);
821 spin_unlock_irq(&cq->lock);
822}
823
824int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
825{
826 return -ENOSYS;
827}
828
829int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
830{
831 return -ENOSYS;
832}
833
834int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
835{
836 struct mlx5_ib_cq *cq;
837
838 if (!ibcq)
839 return 128;
840
841 cq = to_mcq(ibcq);
842 return cq->cqe_size;
843}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644
index 000000000000..256a23344f28
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <linux/slab.h>
35#include <rdma/ib_umem.h>
36
37#include "mlx5_ib.h"
38
39struct mlx5_ib_user_db_page {
40 struct list_head list;
41 struct ib_umem *umem;
42 unsigned long user_virt;
43 int refcnt;
44};
45
46int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
47 struct mlx5_db *db)
48{
49 struct mlx5_ib_user_db_page *page;
50 struct ib_umem_chunk *chunk;
51 int err = 0;
52
53 mutex_lock(&context->db_page_mutex);
54
55 list_for_each_entry(page, &context->db_page_list, list)
56 if (page->user_virt == (virt & PAGE_MASK))
57 goto found;
58
59 page = kmalloc(sizeof(*page), GFP_KERNEL);
60 if (!page) {
61 err = -ENOMEM;
62 goto out;
63 }
64
65 page->user_virt = (virt & PAGE_MASK);
66 page->refcnt = 0;
67 page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
68 PAGE_SIZE, 0, 0);
69 if (IS_ERR(page->umem)) {
70 err = PTR_ERR(page->umem);
71 kfree(page);
72 goto out;
73 }
74
75 list_add(&page->list, &context->db_page_list);
76
77found:
78 chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
79 db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
80 db->u.user_page = page;
81 ++page->refcnt;
82
83out:
84 mutex_unlock(&context->db_page_mutex);
85
86 return err;
87}
88
89void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
90{
91 mutex_lock(&context->db_page_mutex);
92
93 if (!--db->u.user_page->refcnt) {
94 list_del(&db->u.user_page->list);
95 ib_umem_release(db->u.user_page->umem);
96 kfree(db->u.user_page);
97 }
98
99 mutex_unlock(&context->db_page_mutex);
100}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644
index 000000000000..5c8938be0e08
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx5/cmd.h>
34#include <rdma/ib_mad.h>
35#include <rdma/ib_smi.h>
36#include "mlx5_ib.h"
37
38enum {
39 MLX5_IB_VENDOR_CLASS1 = 0x9,
40 MLX5_IB_VENDOR_CLASS2 = 0xa
41};
42
43int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
44 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
45 void *in_mad, void *response_mad)
46{
47 u8 op_modifier = 0;
48
49 /* Key check traps can't be generated unless we have in_wc to
50 * tell us where to send the trap.
51 */
52 if (ignore_mkey || !in_wc)
53 op_modifier |= 0x1;
54 if (ignore_bkey || !in_wc)
55 op_modifier |= 0x2;
56
57 return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
58}
59
60int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
61 struct ib_wc *in_wc, struct ib_grh *in_grh,
62 struct ib_mad *in_mad, struct ib_mad *out_mad)
63{
64 u16 slid;
65 int err;
66
67 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
68
69 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
70 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
71
72 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
73 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
74 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
75 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
76 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
77 return IB_MAD_RESULT_SUCCESS;
78
79 /* Don't process SMInfo queries -- the SMA can't handle them.
80 */
81 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
82 return IB_MAD_RESULT_SUCCESS;
83 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
84 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
85 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
86 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
87 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
88 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
89 return IB_MAD_RESULT_SUCCESS;
90 } else {
91 return IB_MAD_RESULT_SUCCESS;
92 }
93
94 err = mlx5_MAD_IFC(to_mdev(ibdev),
95 mad_flags & IB_MAD_IGNORE_MKEY,
96 mad_flags & IB_MAD_IGNORE_BKEY,
97 port_num, in_wc, in_grh, in_mad, out_mad);
98 if (err)
99 return IB_MAD_RESULT_FAILURE;
100
101 /* set return bit in status of directed route responses */
102 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
103 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
104
105 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
106 /* no response for trap repress */
107 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
108
109 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
110}
111
112int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
113{
114 struct ib_smp *in_mad = NULL;
115 struct ib_smp *out_mad = NULL;
116 int err = -ENOMEM;
117 u16 packet_error;
118
119 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
120 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
121 if (!in_mad || !out_mad)
122 goto out;
123
124 init_query_mad(in_mad);
125 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
126 in_mad->attr_mod = cpu_to_be32(port);
127
128 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
129
130 packet_error = be16_to_cpu(out_mad->status);
131
132 dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
133 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
134
135out:
136 kfree(in_mad);
137 kfree(out_mad);
138 return err;
139}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644
index 000000000000..6b1007f9bc29
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -0,0 +1,1504 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/pci.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <linux/io-mapping.h>
41#include <linux/sched.h>
42#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_smi.h>
44#include <rdma/ib_umem.h>
45#include "user.h"
46#include "mlx5_ib.h"
47
48#define DRIVER_NAME "mlx5_ib"
49#define DRIVER_VERSION "1.0"
50#define DRIVER_RELDATE "June 2013"
51
52MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
53MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
54MODULE_LICENSE("Dual BSD/GPL");
55MODULE_VERSION(DRIVER_VERSION);
56
57static int prof_sel = 2;
58module_param_named(prof_sel, prof_sel, int, 0444);
59MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
60
61static char mlx5_version[] =
62 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
63 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
64
65struct mlx5_profile profile[] = {
66 [0] = {
67 .mask = 0,
68 },
69 [1] = {
70 .mask = MLX5_PROF_MASK_QP_SIZE,
71 .log_max_qp = 12,
72 },
73 [2] = {
74 .mask = MLX5_PROF_MASK_QP_SIZE |
75 MLX5_PROF_MASK_MR_CACHE,
76 .log_max_qp = 17,
77 .mr_cache[0] = {
78 .size = 500,
79 .limit = 250
80 },
81 .mr_cache[1] = {
82 .size = 500,
83 .limit = 250
84 },
85 .mr_cache[2] = {
86 .size = 500,
87 .limit = 250
88 },
89 .mr_cache[3] = {
90 .size = 500,
91 .limit = 250
92 },
93 .mr_cache[4] = {
94 .size = 500,
95 .limit = 250
96 },
97 .mr_cache[5] = {
98 .size = 500,
99 .limit = 250
100 },
101 .mr_cache[6] = {
102 .size = 500,
103 .limit = 250
104 },
105 .mr_cache[7] = {
106 .size = 500,
107 .limit = 250
108 },
109 .mr_cache[8] = {
110 .size = 500,
111 .limit = 250
112 },
113 .mr_cache[9] = {
114 .size = 500,
115 .limit = 250
116 },
117 .mr_cache[10] = {
118 .size = 500,
119 .limit = 250
120 },
121 .mr_cache[11] = {
122 .size = 500,
123 .limit = 250
124 },
125 .mr_cache[12] = {
126 .size = 64,
127 .limit = 32
128 },
129 .mr_cache[13] = {
130 .size = 32,
131 .limit = 16
132 },
133 .mr_cache[14] = {
134 .size = 16,
135 .limit = 8
136 },
137 .mr_cache[15] = {
138 .size = 8,
139 .limit = 4
140 },
141 },
142};
143
144int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
145{
146 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
147 struct mlx5_eq *eq, *n;
148 int err = -ENOENT;
149
150 spin_lock(&table->lock);
151 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
152 if (eq->index == vector) {
153 *eqn = eq->eqn;
154 *irqn = eq->irqn;
155 err = 0;
156 break;
157 }
158 }
159 spin_unlock(&table->lock);
160
161 return err;
162}
163
164static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
165{
166 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
167 struct mlx5_eq *eq, *n;
168 int ncomp_vec;
169 int nent;
170 int err;
171 int i;
172
173 INIT_LIST_HEAD(&dev->eqs_list);
174 ncomp_vec = table->num_comp_vectors;
175 nent = MLX5_COMP_EQ_SIZE;
176 for (i = 0; i < ncomp_vec; i++) {
177 eq = kzalloc(sizeof(*eq), GFP_KERNEL);
178 if (!eq) {
179 err = -ENOMEM;
180 goto clean;
181 }
182
183 snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
184 err = mlx5_create_map_eq(&dev->mdev, eq,
185 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
186 eq->name,
187 &dev->mdev.priv.uuari.uars[0]);
188 if (err) {
189 kfree(eq);
190 goto clean;
191 }
192 mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
193 eq->index = i;
194 spin_lock(&table->lock);
195 list_add_tail(&eq->list, &dev->eqs_list);
196 spin_unlock(&table->lock);
197 }
198
199 dev->num_comp_vectors = ncomp_vec;
200 return 0;
201
202clean:
203 spin_lock(&table->lock);
204 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
205 list_del(&eq->list);
206 spin_unlock(&table->lock);
207 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
208 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
209 kfree(eq);
210 spin_lock(&table->lock);
211 }
212 spin_unlock(&table->lock);
213 return err;
214}
215
216static void free_comp_eqs(struct mlx5_ib_dev *dev)
217{
218 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
219 struct mlx5_eq *eq, *n;
220
221 spin_lock(&table->lock);
222 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
223 list_del(&eq->list);
224 spin_unlock(&table->lock);
225 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
226 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
227 kfree(eq);
228 spin_lock(&table->lock);
229 }
230 spin_unlock(&table->lock);
231}
232
233static int mlx5_ib_query_device(struct ib_device *ibdev,
234 struct ib_device_attr *props)
235{
236 struct mlx5_ib_dev *dev = to_mdev(ibdev);
237 struct ib_smp *in_mad = NULL;
238 struct ib_smp *out_mad = NULL;
239 int err = -ENOMEM;
240 int max_rq_sg;
241 int max_sq_sg;
242 u64 flags;
243
244 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
245 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
246 if (!in_mad || !out_mad)
247 goto out;
248
249 init_query_mad(in_mad);
250 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
251
252 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
253 if (err)
254 goto out;
255
256 memset(props, 0, sizeof(*props));
257
258 props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
259 (fw_rev_min(&dev->mdev) << 16) |
260 fw_rev_sub(&dev->mdev);
261 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
262 IB_DEVICE_PORT_ACTIVE_EVENT |
263 IB_DEVICE_SYS_IMAGE_GUID |
264 IB_DEVICE_RC_RNR_NAK_GEN |
265 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
266 flags = dev->mdev.caps.flags;
267 if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
268 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
269 if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
270 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
271 if (flags & MLX5_DEV_CAP_FLAG_APM)
272 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
273 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
274 if (flags & MLX5_DEV_CAP_FLAG_XRC)
275 props->device_cap_flags |= IB_DEVICE_XRC;
276 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
277
278 props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
279 0xffffff;
280 props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30));
281 props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32));
282 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
283
284 props->max_mr_size = ~0ull;
285 props->page_size_cap = dev->mdev.caps.min_page_sz;
286 props->max_qp = 1 << dev->mdev.caps.log_max_qp;
287 props->max_qp_wr = dev->mdev.caps.max_wqes;
288 max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
289 max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
290 sizeof(struct mlx5_wqe_data_seg);
291 props->max_sge = min(max_rq_sg, max_sq_sg);
292 props->max_cq = 1 << dev->mdev.caps.log_max_cq;
293 props->max_cqe = dev->mdev.caps.max_cqes - 1;
294 props->max_mr = 1 << dev->mdev.caps.log_max_mkey;
295 props->max_pd = 1 << dev->mdev.caps.log_max_pd;
296 props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp;
297 props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
298 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
299 props->max_srq = 1 << dev->mdev.caps.log_max_srq;
300 props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1;
301 props->max_srq_sge = max_rq_sg - 1;
302 props->max_fast_reg_page_list_len = (unsigned int)-1;
303 props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
304 props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
305 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
306 props->masked_atomic_cap = IB_ATOMIC_HCA;
307 props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
308 props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
309 props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
310 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
311 props->max_mcast_grp;
312 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
313
314out:
315 kfree(in_mad);
316 kfree(out_mad);
317
318 return err;
319}
320
321int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
322 struct ib_port_attr *props)
323{
324 struct mlx5_ib_dev *dev = to_mdev(ibdev);
325 struct ib_smp *in_mad = NULL;
326 struct ib_smp *out_mad = NULL;
327 int ext_active_speed;
328 int err = -ENOMEM;
329
330 if (port < 1 || port > dev->mdev.caps.num_ports) {
331 mlx5_ib_warn(dev, "invalid port number %d\n", port);
332 return -EINVAL;
333 }
334
335 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
336 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
337 if (!in_mad || !out_mad)
338 goto out;
339
340 memset(props, 0, sizeof(*props));
341
342 init_query_mad(in_mad);
343 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
344 in_mad->attr_mod = cpu_to_be32(port);
345
346 err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
347 if (err) {
348 mlx5_ib_warn(dev, "err %d\n", err);
349 goto out;
350 }
351
352
353 props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16));
354 props->lmc = out_mad->data[34] & 0x7;
355 props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18));
356 props->sm_sl = out_mad->data[36] & 0xf;
357 props->state = out_mad->data[32] & 0xf;
358 props->phys_state = out_mad->data[33] >> 4;
359 props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
360 props->gid_tbl_len = out_mad->data[50];
361 props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
362 props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
363 props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
364 props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
365 props->active_width = out_mad->data[31] & 0xf;
366 props->active_speed = out_mad->data[35] >> 4;
367 props->max_mtu = out_mad->data[41] & 0xf;
368 props->active_mtu = out_mad->data[36] >> 4;
369 props->subnet_timeout = out_mad->data[51] & 0x1f;
370 props->max_vl_num = out_mad->data[37] >> 4;
371 props->init_type_reply = out_mad->data[41] >> 4;
372
373 /* Check if extended speeds (EDR/FDR/...) are supported */
374 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
375 ext_active_speed = out_mad->data[62] >> 4;
376
377 switch (ext_active_speed) {
378 case 1:
379 props->active_speed = 16; /* FDR */
380 break;
381 case 2:
382 props->active_speed = 32; /* EDR */
383 break;
384 }
385 }
386
387 /* If reported active speed is QDR, check if is FDR-10 */
388 if (props->active_speed == 4) {
389 if (dev->mdev.caps.ext_port_cap[port - 1] &
390 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
391 init_query_mad(in_mad);
392 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
393 in_mad->attr_mod = cpu_to_be32(port);
394
395 err = mlx5_MAD_IFC(dev, 1, 1, port,
396 NULL, NULL, in_mad, out_mad);
397 if (err)
398 goto out;
399
400 /* Checking LinkSpeedActive for FDR-10 */
401 if (out_mad->data[15] & 0x1)
402 props->active_speed = 8;
403 }
404 }
405
406out:
407 kfree(in_mad);
408 kfree(out_mad);
409
410 return err;
411}
412
413static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
414 union ib_gid *gid)
415{
416 struct ib_smp *in_mad = NULL;
417 struct ib_smp *out_mad = NULL;
418 int err = -ENOMEM;
419
420 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
421 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
422 if (!in_mad || !out_mad)
423 goto out;
424
425 init_query_mad(in_mad);
426 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
427 in_mad->attr_mod = cpu_to_be32(port);
428
429 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
430 if (err)
431 goto out;
432
433 memcpy(gid->raw, out_mad->data + 8, 8);
434
435 init_query_mad(in_mad);
436 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
437 in_mad->attr_mod = cpu_to_be32(index / 8);
438
439 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
440 if (err)
441 goto out;
442
443 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
444
445out:
446 kfree(in_mad);
447 kfree(out_mad);
448 return err;
449}
450
451static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
452 u16 *pkey)
453{
454 struct ib_smp *in_mad = NULL;
455 struct ib_smp *out_mad = NULL;
456 int err = -ENOMEM;
457
458 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
459 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
460 if (!in_mad || !out_mad)
461 goto out;
462
463 init_query_mad(in_mad);
464 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
465 in_mad->attr_mod = cpu_to_be32(index / 32);
466
467 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
468 if (err)
469 goto out;
470
471 *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
472
473out:
474 kfree(in_mad);
475 kfree(out_mad);
476 return err;
477}
478
479struct mlx5_reg_node_desc {
480 u8 desc[64];
481};
482
483static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
484 struct ib_device_modify *props)
485{
486 struct mlx5_ib_dev *dev = to_mdev(ibdev);
487 struct mlx5_reg_node_desc in;
488 struct mlx5_reg_node_desc out;
489 int err;
490
491 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
492 return -EOPNOTSUPP;
493
494 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
495 return 0;
496
497 /*
498 * If possible, pass node desc to FW, so it can generate
499 * a 144 trap. If cmd fails, just ignore.
500 */
501 memcpy(&in, props->node_desc, 64);
502 err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
503 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
504 if (err)
505 return err;
506
507 memcpy(ibdev->node_desc, props->node_desc, 64);
508
509 return err;
510}
511
512static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
513 struct ib_port_modify *props)
514{
515 struct mlx5_ib_dev *dev = to_mdev(ibdev);
516 struct ib_port_attr attr;
517 u32 tmp;
518 int err;
519
520 mutex_lock(&dev->cap_mask_mutex);
521
522 err = mlx5_ib_query_port(ibdev, port, &attr);
523 if (err)
524 goto out;
525
526 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
527 ~props->clr_port_cap_mask;
528
529 err = mlx5_set_port_caps(&dev->mdev, port, tmp);
530
531out:
532 mutex_unlock(&dev->cap_mask_mutex);
533 return err;
534}
535
536static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
537 struct ib_udata *udata)
538{
539 struct mlx5_ib_dev *dev = to_mdev(ibdev);
540 struct mlx5_ib_alloc_ucontext_req req;
541 struct mlx5_ib_alloc_ucontext_resp resp;
542 struct mlx5_ib_ucontext *context;
543 struct mlx5_uuar_info *uuari;
544 struct mlx5_uar *uars;
545 int num_uars;
546 int uuarn;
547 int err;
548 int i;
549
550 if (!dev->ib_active)
551 return ERR_PTR(-EAGAIN);
552
553 err = ib_copy_from_udata(&req, udata, sizeof(req));
554 if (err)
555 return ERR_PTR(err);
556
557 if (req.total_num_uuars > MLX5_MAX_UUARS)
558 return ERR_PTR(-ENOMEM);
559
560 if (req.total_num_uuars == 0)
561 return ERR_PTR(-EINVAL);
562
563 req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
564 if (req.num_low_latency_uuars > req.total_num_uuars - 1)
565 return ERR_PTR(-EINVAL);
566
567 num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
568 resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
569 resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
570 resp.cache_line_size = L1_CACHE_BYTES;
571 resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
572 resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
573 resp.max_send_wqebb = dev->mdev.caps.max_wqes;
574 resp.max_recv_wr = dev->mdev.caps.max_wqes;
575 resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
576
577 context = kzalloc(sizeof(*context), GFP_KERNEL);
578 if (!context)
579 return ERR_PTR(-ENOMEM);
580
581 uuari = &context->uuari;
582 mutex_init(&uuari->lock);
583 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
584 if (!uars) {
585 err = -ENOMEM;
586 goto out_ctx;
587 }
588
589 uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
590 sizeof(*uuari->bitmap),
591 GFP_KERNEL);
592 if (!uuari->bitmap) {
593 err = -ENOMEM;
594 goto out_uar_ctx;
595 }
596 /*
597 * clear all fast path uuars
598 */
599 for (i = 0; i < req.total_num_uuars; i++) {
600 uuarn = i & 3;
601 if (uuarn == 2 || uuarn == 3)
602 set_bit(i, uuari->bitmap);
603 }
604
605 uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
606 if (!uuari->count) {
607 err = -ENOMEM;
608 goto out_bitmap;
609 }
610
611 for (i = 0; i < num_uars; i++) {
612 err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
613 if (err)
614 goto out_count;
615 }
616
617 INIT_LIST_HEAD(&context->db_page_list);
618 mutex_init(&context->db_page_mutex);
619
620 resp.tot_uuars = req.total_num_uuars;
621 resp.num_ports = dev->mdev.caps.num_ports;
622 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
623 if (err)
624 goto out_uars;
625
626 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
627 uuari->uars = uars;
628 uuari->num_uars = num_uars;
629 return &context->ibucontext;
630
631out_uars:
632 for (i--; i >= 0; i--)
633 mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
634out_count:
635 kfree(uuari->count);
636
637out_bitmap:
638 kfree(uuari->bitmap);
639
640out_uar_ctx:
641 kfree(uars);
642
643out_ctx:
644 kfree(context);
645 return ERR_PTR(err);
646}
647
648static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
649{
650 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
651 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
652 struct mlx5_uuar_info *uuari = &context->uuari;
653 int i;
654
655 for (i = 0; i < uuari->num_uars; i++) {
656 if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
657 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
658 }
659
660 kfree(uuari->count);
661 kfree(uuari->bitmap);
662 kfree(uuari->uars);
663 kfree(context);
664
665 return 0;
666}
667
668static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
669{
670 return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
671}
672
673static int get_command(unsigned long offset)
674{
675 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
676}
677
678static int get_arg(unsigned long offset)
679{
680 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
681}
682
683static int get_index(unsigned long offset)
684{
685 return get_arg(offset);
686}
687
688static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
689{
690 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
691 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
692 struct mlx5_uuar_info *uuari = &context->uuari;
693 unsigned long command;
694 unsigned long idx;
695 phys_addr_t pfn;
696
697 command = get_command(vma->vm_pgoff);
698 switch (command) {
699 case MLX5_IB_MMAP_REGULAR_PAGE:
700 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
701 return -EINVAL;
702
703 idx = get_index(vma->vm_pgoff);
704 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
705 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
706 (unsigned long long)pfn);
707
708 if (idx >= uuari->num_uars)
709 return -EINVAL;
710
711 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
712 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
713 PAGE_SIZE, vma->vm_page_prot))
714 return -EAGAIN;
715
716 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
717 vma->vm_start,
718 (unsigned long long)pfn << PAGE_SHIFT);
719 break;
720
721 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
722 return -ENOSYS;
723
724 default:
725 return -EINVAL;
726 }
727
728 return 0;
729}
730
731static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
732{
733 struct mlx5_create_mkey_mbox_in *in;
734 struct mlx5_mkey_seg *seg;
735 struct mlx5_core_mr mr;
736 int err;
737
738 in = kzalloc(sizeof(*in), GFP_KERNEL);
739 if (!in)
740 return -ENOMEM;
741
742 seg = &in->seg;
743 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
744 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746 seg->start_addr = 0;
747
748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
749 if (err) {
750 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
751 goto err_in;
752 }
753
754 kfree(in);
755 *key = mr.key;
756
757 return 0;
758
759err_in:
760 kfree(in);
761
762 return err;
763}
764
765static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
766{
767 struct mlx5_core_mr mr;
768 int err;
769
770 memset(&mr, 0, sizeof(mr));
771 mr.key = key;
772 err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
773 if (err)
774 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
775}
776
777static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
778 struct ib_ucontext *context,
779 struct ib_udata *udata)
780{
781 struct mlx5_ib_alloc_pd_resp resp;
782 struct mlx5_ib_pd *pd;
783 int err;
784
785 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
786 if (!pd)
787 return ERR_PTR(-ENOMEM);
788
789 err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
790 if (err) {
791 kfree(pd);
792 return ERR_PTR(err);
793 }
794
795 if (context) {
796 resp.pdn = pd->pdn;
797 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
798 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
799 kfree(pd);
800 return ERR_PTR(-EFAULT);
801 }
802 } else {
803 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
804 if (err) {
805 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
806 kfree(pd);
807 return ERR_PTR(err);
808 }
809 }
810
811 return &pd->ibpd;
812}
813
814static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
815{
816 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
817 struct mlx5_ib_pd *mpd = to_mpd(pd);
818
819 if (!pd->uobject)
820 free_pa_mkey(mdev, mpd->pa_lkey);
821
822 mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
823 kfree(mpd);
824
825 return 0;
826}
827
828static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
829{
830 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
831 int err;
832
833 err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
834 if (err)
835 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
836 ibqp->qp_num, gid->raw);
837
838 return err;
839}
840
841static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
842{
843 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
844 int err;
845
846 err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
847 if (err)
848 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
849 ibqp->qp_num, gid->raw);
850
851 return err;
852}
853
854static int init_node_data(struct mlx5_ib_dev *dev)
855{
856 struct ib_smp *in_mad = NULL;
857 struct ib_smp *out_mad = NULL;
858 int err = -ENOMEM;
859
860 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
861 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
862 if (!in_mad || !out_mad)
863 goto out;
864
865 init_query_mad(in_mad);
866 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
867
868 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
869 if (err)
870 goto out;
871
872 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
873
874 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
875
876 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
877 if (err)
878 goto out;
879
880 dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
881 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
882
883out:
884 kfree(in_mad);
885 kfree(out_mad);
886 return err;
887}
888
889static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
890 char *buf)
891{
892 struct mlx5_ib_dev *dev =
893 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
894
895 return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
896}
897
898static ssize_t show_reg_pages(struct device *device,
899 struct device_attribute *attr, char *buf)
900{
901 struct mlx5_ib_dev *dev =
902 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
903
904 return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
905}
906
907static ssize_t show_hca(struct device *device, struct device_attribute *attr,
908 char *buf)
909{
910 struct mlx5_ib_dev *dev =
911 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
912 return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
913}
914
915static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
916 char *buf)
917{
918 struct mlx5_ib_dev *dev =
919 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
920 return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
921 fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
922}
923
924static ssize_t show_rev(struct device *device, struct device_attribute *attr,
925 char *buf)
926{
927 struct mlx5_ib_dev *dev =
928 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
929 return sprintf(buf, "%x\n", dev->mdev.rev_id);
930}
931
932static ssize_t show_board(struct device *device, struct device_attribute *attr,
933 char *buf)
934{
935 struct mlx5_ib_dev *dev =
936 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
937 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
938 dev->mdev.board_id);
939}
940
941static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
942static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
943static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
944static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
945static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
946static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
947
948static struct device_attribute *mlx5_class_attributes[] = {
949 &dev_attr_hw_rev,
950 &dev_attr_fw_ver,
951 &dev_attr_hca_type,
952 &dev_attr_board_id,
953 &dev_attr_fw_pages,
954 &dev_attr_reg_pages,
955};
956
957static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
958 void *data)
959{
960 struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
961 struct ib_event ibev;
962 u8 port = 0;
963
964 switch (event) {
965 case MLX5_DEV_EVENT_SYS_ERROR:
966 ibdev->ib_active = false;
967 ibev.event = IB_EVENT_DEVICE_FATAL;
968 break;
969
970 case MLX5_DEV_EVENT_PORT_UP:
971 ibev.event = IB_EVENT_PORT_ACTIVE;
972 port = *(u8 *)data;
973 break;
974
975 case MLX5_DEV_EVENT_PORT_DOWN:
976 ibev.event = IB_EVENT_PORT_ERR;
977 port = *(u8 *)data;
978 break;
979
980 case MLX5_DEV_EVENT_PORT_INITIALIZED:
981 /* not used by ULPs */
982 return;
983
984 case MLX5_DEV_EVENT_LID_CHANGE:
985 ibev.event = IB_EVENT_LID_CHANGE;
986 port = *(u8 *)data;
987 break;
988
989 case MLX5_DEV_EVENT_PKEY_CHANGE:
990 ibev.event = IB_EVENT_PKEY_CHANGE;
991 port = *(u8 *)data;
992 break;
993
994 case MLX5_DEV_EVENT_GUID_CHANGE:
995 ibev.event = IB_EVENT_GID_CHANGE;
996 port = *(u8 *)data;
997 break;
998
999 case MLX5_DEV_EVENT_CLIENT_REREG:
1000 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1001 port = *(u8 *)data;
1002 break;
1003 }
1004
1005 ibev.device = &ibdev->ib_dev;
1006 ibev.element.port_num = port;
1007
1008 if (ibdev->ib_active)
1009 ib_dispatch_event(&ibev);
1010}
1011
1012static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1013{
1014 int port;
1015
1016 for (port = 1; port <= dev->mdev.caps.num_ports; port++)
1017 mlx5_query_ext_port_caps(dev, port);
1018}
1019
1020static int get_port_caps(struct mlx5_ib_dev *dev)
1021{
1022 struct ib_device_attr *dprops = NULL;
1023 struct ib_port_attr *pprops = NULL;
1024 int err = 0;
1025 int port;
1026
1027 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1028 if (!pprops)
1029 goto out;
1030
1031 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1032 if (!dprops)
1033 goto out;
1034
1035 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1036 if (err) {
1037 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1038 goto out;
1039 }
1040
1041 for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
1042 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1043 if (err) {
1044 mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
1045 break;
1046 }
1047 dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
1048 dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
1049 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1050 dprops->max_pkeys, pprops->gid_tbl_len);
1051 }
1052
1053out:
1054 kfree(pprops);
1055 kfree(dprops);
1056
1057 return err;
1058}
1059
1060static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1061{
1062 int err;
1063
1064 err = mlx5_mr_cache_cleanup(dev);
1065 if (err)
1066 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1067
1068 mlx5_ib_destroy_qp(dev->umrc.qp);
1069 ib_destroy_cq(dev->umrc.cq);
1070 ib_dereg_mr(dev->umrc.mr);
1071 ib_dealloc_pd(dev->umrc.pd);
1072}
1073
1074enum {
1075 MAX_UMR_WR = 128,
1076};
1077
1078static int create_umr_res(struct mlx5_ib_dev *dev)
1079{
1080 struct ib_qp_init_attr *init_attr = NULL;
1081 struct ib_qp_attr *attr = NULL;
1082 struct ib_pd *pd;
1083 struct ib_cq *cq;
1084 struct ib_qp *qp;
1085 struct ib_mr *mr;
1086 int ret;
1087
1088 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1089 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1090 if (!attr || !init_attr) {
1091 ret = -ENOMEM;
1092 goto error_0;
1093 }
1094
1095 pd = ib_alloc_pd(&dev->ib_dev);
1096 if (IS_ERR(pd)) {
1097 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1098 ret = PTR_ERR(pd);
1099 goto error_0;
1100 }
1101
1102 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1103 if (IS_ERR(mr)) {
1104 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1105 ret = PTR_ERR(mr);
1106 goto error_1;
1107 }
1108
1109 cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
1110 0);
1111 if (IS_ERR(cq)) {
1112 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1113 ret = PTR_ERR(cq);
1114 goto error_2;
1115 }
1116 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1117
1118 init_attr->send_cq = cq;
1119 init_attr->recv_cq = cq;
1120 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1121 init_attr->cap.max_send_wr = MAX_UMR_WR;
1122 init_attr->cap.max_send_sge = 1;
1123 init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1124 init_attr->port_num = 1;
1125 qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1126 if (IS_ERR(qp)) {
1127 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1128 ret = PTR_ERR(qp);
1129 goto error_3;
1130 }
1131 qp->device = &dev->ib_dev;
1132 qp->real_qp = qp;
1133 qp->uobject = NULL;
1134 qp->qp_type = MLX5_IB_QPT_REG_UMR;
1135
1136 attr->qp_state = IB_QPS_INIT;
1137 attr->port_num = 1;
1138 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1139 IB_QP_PORT, NULL);
1140 if (ret) {
1141 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1142 goto error_4;
1143 }
1144
1145 memset(attr, 0, sizeof(*attr));
1146 attr->qp_state = IB_QPS_RTR;
1147 attr->path_mtu = IB_MTU_256;
1148
1149 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1150 if (ret) {
1151 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1152 goto error_4;
1153 }
1154
1155 memset(attr, 0, sizeof(*attr));
1156 attr->qp_state = IB_QPS_RTS;
1157 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1158 if (ret) {
1159 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1160 goto error_4;
1161 }
1162
1163 dev->umrc.qp = qp;
1164 dev->umrc.cq = cq;
1165 dev->umrc.mr = mr;
1166 dev->umrc.pd = pd;
1167
1168 sema_init(&dev->umrc.sem, MAX_UMR_WR);
1169 ret = mlx5_mr_cache_init(dev);
1170 if (ret) {
1171 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1172 goto error_4;
1173 }
1174
1175 kfree(attr);
1176 kfree(init_attr);
1177
1178 return 0;
1179
1180error_4:
1181 mlx5_ib_destroy_qp(qp);
1182
1183error_3:
1184 ib_destroy_cq(cq);
1185
1186error_2:
1187 ib_dereg_mr(mr);
1188
1189error_1:
1190 ib_dealloc_pd(pd);
1191
1192error_0:
1193 kfree(attr);
1194 kfree(init_attr);
1195 return ret;
1196}
1197
1198static int create_dev_resources(struct mlx5_ib_resources *devr)
1199{
1200 struct ib_srq_init_attr attr;
1201 struct mlx5_ib_dev *dev;
1202 int ret = 0;
1203
1204 dev = container_of(devr, struct mlx5_ib_dev, devr);
1205
1206 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1207 if (IS_ERR(devr->p0)) {
1208 ret = PTR_ERR(devr->p0);
1209 goto error0;
1210 }
1211 devr->p0->device = &dev->ib_dev;
1212 devr->p0->uobject = NULL;
1213 atomic_set(&devr->p0->usecnt, 0);
1214
1215 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1216 if (IS_ERR(devr->c0)) {
1217 ret = PTR_ERR(devr->c0);
1218 goto error1;
1219 }
1220 devr->c0->device = &dev->ib_dev;
1221 devr->c0->uobject = NULL;
1222 devr->c0->comp_handler = NULL;
1223 devr->c0->event_handler = NULL;
1224 devr->c0->cq_context = NULL;
1225 atomic_set(&devr->c0->usecnt, 0);
1226
1227 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1228 if (IS_ERR(devr->x0)) {
1229 ret = PTR_ERR(devr->x0);
1230 goto error2;
1231 }
1232 devr->x0->device = &dev->ib_dev;
1233 devr->x0->inode = NULL;
1234 atomic_set(&devr->x0->usecnt, 0);
1235 mutex_init(&devr->x0->tgt_qp_mutex);
1236 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1237
1238 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1239 if (IS_ERR(devr->x1)) {
1240 ret = PTR_ERR(devr->x1);
1241 goto error3;
1242 }
1243 devr->x1->device = &dev->ib_dev;
1244 devr->x1->inode = NULL;
1245 atomic_set(&devr->x1->usecnt, 0);
1246 mutex_init(&devr->x1->tgt_qp_mutex);
1247 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1248
1249 memset(&attr, 0, sizeof(attr));
1250 attr.attr.max_sge = 1;
1251 attr.attr.max_wr = 1;
1252 attr.srq_type = IB_SRQT_XRC;
1253 attr.ext.xrc.cq = devr->c0;
1254 attr.ext.xrc.xrcd = devr->x0;
1255
1256 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1257 if (IS_ERR(devr->s0)) {
1258 ret = PTR_ERR(devr->s0);
1259 goto error4;
1260 }
1261 devr->s0->device = &dev->ib_dev;
1262 devr->s0->pd = devr->p0;
1263 devr->s0->uobject = NULL;
1264 devr->s0->event_handler = NULL;
1265 devr->s0->srq_context = NULL;
1266 devr->s0->srq_type = IB_SRQT_XRC;
1267 devr->s0->ext.xrc.xrcd = devr->x0;
1268 devr->s0->ext.xrc.cq = devr->c0;
1269 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1270 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1271 atomic_inc(&devr->p0->usecnt);
1272 atomic_set(&devr->s0->usecnt, 0);
1273
1274 return 0;
1275
1276error4:
1277 mlx5_ib_dealloc_xrcd(devr->x1);
1278error3:
1279 mlx5_ib_dealloc_xrcd(devr->x0);
1280error2:
1281 mlx5_ib_destroy_cq(devr->c0);
1282error1:
1283 mlx5_ib_dealloc_pd(devr->p0);
1284error0:
1285 return ret;
1286}
1287
1288static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1289{
1290 mlx5_ib_destroy_srq(devr->s0);
1291 mlx5_ib_dealloc_xrcd(devr->x0);
1292 mlx5_ib_dealloc_xrcd(devr->x1);
1293 mlx5_ib_destroy_cq(devr->c0);
1294 mlx5_ib_dealloc_pd(devr->p0);
1295}
1296
1297static int init_one(struct pci_dev *pdev,
1298 const struct pci_device_id *id)
1299{
1300 struct mlx5_core_dev *mdev;
1301 struct mlx5_ib_dev *dev;
1302 int err;
1303 int i;
1304
1305 printk_once(KERN_INFO "%s", mlx5_version);
1306
1307 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1308 if (!dev)
1309 return -ENOMEM;
1310
1311 mdev = &dev->mdev;
1312 mdev->event = mlx5_ib_event;
1313 if (prof_sel >= ARRAY_SIZE(profile)) {
1314 pr_warn("selected pofile out of range, selceting default\n");
1315 prof_sel = 0;
1316 }
1317 mdev->profile = &profile[prof_sel];
1318 err = mlx5_dev_init(mdev, pdev);
1319 if (err)
1320 goto err_free;
1321
1322 err = get_port_caps(dev);
1323 if (err)
1324 goto err_cleanup;
1325
1326 get_ext_port_caps(dev);
1327
1328 err = alloc_comp_eqs(dev);
1329 if (err)
1330 goto err_cleanup;
1331
1332 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1333
1334 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1335 dev->ib_dev.owner = THIS_MODULE;
1336 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1337 dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
1338 dev->num_ports = mdev->caps.num_ports;
1339 dev->ib_dev.phys_port_cnt = dev->num_ports;
1340 dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
1341 dev->ib_dev.dma_device = &mdev->pdev->dev;
1342
1343 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
1344 dev->ib_dev.uverbs_cmd_mask =
1345 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1346 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1347 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1348 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1349 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1350 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1351 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1352 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1353 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1354 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
1355 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1356 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1357 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1358 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
1359 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1360 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1361 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1362 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1363 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1364 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1365 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1366 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
1367 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1368
1369 dev->ib_dev.query_device = mlx5_ib_query_device;
1370 dev->ib_dev.query_port = mlx5_ib_query_port;
1371 dev->ib_dev.query_gid = mlx5_ib_query_gid;
1372 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
1373 dev->ib_dev.modify_device = mlx5_ib_modify_device;
1374 dev->ib_dev.modify_port = mlx5_ib_modify_port;
1375 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
1376 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
1377 dev->ib_dev.mmap = mlx5_ib_mmap;
1378 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
1379 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
1380 dev->ib_dev.create_ah = mlx5_ib_create_ah;
1381 dev->ib_dev.query_ah = mlx5_ib_query_ah;
1382 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
1383 dev->ib_dev.create_srq = mlx5_ib_create_srq;
1384 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
1385 dev->ib_dev.query_srq = mlx5_ib_query_srq;
1386 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
1387 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
1388 dev->ib_dev.create_qp = mlx5_ib_create_qp;
1389 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
1390 dev->ib_dev.query_qp = mlx5_ib_query_qp;
1391 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
1392 dev->ib_dev.post_send = mlx5_ib_post_send;
1393 dev->ib_dev.post_recv = mlx5_ib_post_recv;
1394 dev->ib_dev.create_cq = mlx5_ib_create_cq;
1395 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
1396 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
1397 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
1398 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
1399 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
1400 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
1401 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
1402 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
1403 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
1404 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
1405 dev->ib_dev.process_mad = mlx5_ib_process_mad;
1406 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
1407 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
1408 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
1409
1410 if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
1411 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1412 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1413 dev->ib_dev.uverbs_cmd_mask |=
1414 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1415 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1416 }
1417
1418 err = init_node_data(dev);
1419 if (err)
1420 goto err_eqs;
1421
1422 mutex_init(&dev->cap_mask_mutex);
1423 spin_lock_init(&dev->mr_lock);
1424
1425 err = create_dev_resources(&dev->devr);
1426 if (err)
1427 goto err_eqs;
1428
1429 if (ib_register_device(&dev->ib_dev, NULL))
1430 goto err_rsrc;
1431
1432 err = create_umr_res(dev);
1433 if (err)
1434 goto err_dev;
1435
1436 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1437 if (device_create_file(&dev->ib_dev.dev,
1438 mlx5_class_attributes[i]))
1439 goto err_umrc;
1440 }
1441
1442 dev->ib_active = true;
1443
1444 return 0;
1445
1446err_umrc:
1447 destroy_umrc_res(dev);
1448
1449err_dev:
1450 ib_unregister_device(&dev->ib_dev);
1451
1452err_rsrc:
1453 destroy_dev_resources(&dev->devr);
1454
1455err_eqs:
1456 free_comp_eqs(dev);
1457
1458err_cleanup:
1459 mlx5_dev_cleanup(mdev);
1460
1461err_free:
1462 ib_dealloc_device((struct ib_device *)dev);
1463
1464 return err;
1465}
1466
1467static void remove_one(struct pci_dev *pdev)
1468{
1469 struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
1470
1471 destroy_umrc_res(dev);
1472 ib_unregister_device(&dev->ib_dev);
1473 destroy_dev_resources(&dev->devr);
1474 free_comp_eqs(dev);
1475 mlx5_dev_cleanup(&dev->mdev);
1476 ib_dealloc_device(&dev->ib_dev);
1477}
1478
1479static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
1480 { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
1481 { 0, }
1482};
1483
1484MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
1485
1486static struct pci_driver mlx5_ib_driver = {
1487 .name = DRIVER_NAME,
1488 .id_table = mlx5_ib_pci_table,
1489 .probe = init_one,
1490 .remove = remove_one
1491};
1492
1493static int __init mlx5_ib_init(void)
1494{
1495 return pci_register_driver(&mlx5_ib_driver);
1496}
1497
1498static void __exit mlx5_ib_cleanup(void)
1499{
1500 pci_unregister_driver(&mlx5_ib_driver);
1501}
1502
1503module_init(mlx5_ib_init);
1504module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644
index 000000000000..3a5322870b96
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -0,0 +1,162 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36
37/* @umem: umem object to scan
38 * @addr: ib virtual address requested by the user
39 * @count: number of PAGE_SIZE pages covered by umem
40 * @shift: page shift for the compound pages found in the region
41 * @ncont: number of compund pages
42 * @order: log2 of the number of compound pages
43 */
44void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
45 int *ncont, int *order)
46{
47 struct ib_umem_chunk *chunk;
48 unsigned long tmp;
49 unsigned long m;
50 int i, j, k;
51 u64 base = 0;
52 int p = 0;
53 int skip;
54 int mask;
55 u64 len;
56 u64 pfn;
57
58 addr = addr >> PAGE_SHIFT;
59 tmp = (unsigned long)addr;
60 m = find_first_bit(&tmp, sizeof(tmp));
61 skip = 1 << m;
62 mask = skip - 1;
63 i = 0;
64 list_for_each_entry(chunk, &umem->chunk_list, list)
65 for (j = 0; j < chunk->nmap; j++) {
66 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
67 pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
68 for (k = 0; k < len; k++) {
69 if (!(i & mask)) {
70 tmp = (unsigned long)pfn;
71 m = min(m, find_first_bit(&tmp, sizeof(tmp)));
72 skip = 1 << m;
73 mask = skip - 1;
74 base = pfn;
75 p = 0;
76 } else {
77 if (base + p != pfn) {
78 tmp = (unsigned long)p;
79 m = find_first_bit(&tmp, sizeof(tmp));
80 skip = 1 << m;
81 mask = skip - 1;
82 base = pfn;
83 p = 0;
84 }
85 }
86 p++;
87 i++;
88 }
89 }
90
91 if (i) {
92 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
93
94 if (order)
95 *order = ilog2(roundup_pow_of_two(i) >> m);
96
97 *ncont = DIV_ROUND_UP(i, (1 << m));
98 } else {
99 m = 0;
100
101 if (order)
102 *order = 0;
103
104 *ncont = 0;
105 }
106 *shift = PAGE_SHIFT + m;
107 *count = i;
108}
109
110void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
111 int page_shift, __be64 *pas, int umr)
112{
113 int shift = page_shift - PAGE_SHIFT;
114 int mask = (1 << shift) - 1;
115 struct ib_umem_chunk *chunk;
116 int i, j, k;
117 u64 cur = 0;
118 u64 base;
119 int len;
120
121 i = 0;
122 list_for_each_entry(chunk, &umem->chunk_list, list)
123 for (j = 0; j < chunk->nmap; j++) {
124 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
125 base = sg_dma_address(&chunk->page_list[j]);
126 for (k = 0; k < len; k++) {
127 if (!(i & mask)) {
128 cur = base + (k << PAGE_SHIFT);
129 if (umr)
130 cur |= 3;
131
132 pas[i >> shift] = cpu_to_be64(cur);
133 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
134 i >> shift, be64_to_cpu(pas[i >> shift]));
135 } else
136 mlx5_ib_dbg(dev, "=====> 0x%llx\n",
137 base + (k << PAGE_SHIFT));
138 i++;
139 }
140 }
141}
142
143int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
144{
145 u64 page_size;
146 u64 page_mask;
147 u64 off_size;
148 u64 off_mask;
149 u64 buf_off;
150
151 page_size = 1 << page_shift;
152 page_mask = page_size - 1;
153 buf_off = addr & page_mask;
154 off_size = page_size >> 6;
155 off_mask = off_size - 1;
156
157 if (buf_off & off_mask)
158 return -EINVAL;
159
160 *offset = buf_off >> ilog2(off_size);
161 return 0;
162}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644
index 000000000000..836be9157242
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -0,0 +1,545 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_H
34#define MLX5_IB_H
35
36#include <linux/kernel.h>
37#include <linux/sched.h>
38#include <rdma/ib_verbs.h>
39#include <rdma/ib_smi.h>
40#include <linux/mlx5/driver.h>
41#include <linux/mlx5/cq.h>
42#include <linux/mlx5/qp.h>
43#include <linux/mlx5/srq.h>
44#include <linux/types.h>
45
46#define mlx5_ib_dbg(dev, format, arg...) \
47pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
48 __LINE__, current->pid, ##arg)
49
50#define mlx5_ib_err(dev, format, arg...) \
51pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
52 __LINE__, current->pid, ##arg)
53
54#define mlx5_ib_warn(dev, format, arg...) \
55pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
56 __LINE__, current->pid, ##arg)
57
58enum {
59 MLX5_IB_MMAP_CMD_SHIFT = 8,
60 MLX5_IB_MMAP_CMD_MASK = 0xff,
61};
62
63enum mlx5_ib_mmap_cmd {
64 MLX5_IB_MMAP_REGULAR_PAGE = 0,
65 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
66};
67
68enum {
69 MLX5_RES_SCAT_DATA32_CQE = 0x1,
70 MLX5_RES_SCAT_DATA64_CQE = 0x2,
71 MLX5_REQ_SCAT_DATA32_CQE = 0x11,
72 MLX5_REQ_SCAT_DATA64_CQE = 0x22,
73};
74
75enum mlx5_ib_latency_class {
76 MLX5_IB_LATENCY_CLASS_LOW,
77 MLX5_IB_LATENCY_CLASS_MEDIUM,
78 MLX5_IB_LATENCY_CLASS_HIGH,
79 MLX5_IB_LATENCY_CLASS_FAST_PATH
80};
81
82enum mlx5_ib_mad_ifc_flags {
83 MLX5_MAD_IFC_IGNORE_MKEY = 1,
84 MLX5_MAD_IFC_IGNORE_BKEY = 2,
85 MLX5_MAD_IFC_NET_VIEW = 4,
86};
87
88struct mlx5_ib_ucontext {
89 struct ib_ucontext ibucontext;
90 struct list_head db_page_list;
91
92 /* protect doorbell record alloc/free
93 */
94 struct mutex db_page_mutex;
95 struct mlx5_uuar_info uuari;
96};
97
98static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
99{
100 return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
101}
102
103struct mlx5_ib_pd {
104 struct ib_pd ibpd;
105 u32 pdn;
106 u32 pa_lkey;
107};
108
109/* Use macros here so that don't have to duplicate
110 * enum ib_send_flags and enum ib_qp_type for low-level driver
111 */
112
113#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
114#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
115#define MLX5_IB_WR_UMR IB_WR_RESERVED1
116
117struct wr_list {
118 u16 opcode;
119 u16 next;
120};
121
122struct mlx5_ib_wq {
123 u64 *wrid;
124 u32 *wr_data;
125 struct wr_list *w_list;
126 unsigned *wqe_head;
127 u16 unsig_count;
128
129 /* serialize post to the work queue
130 */
131 spinlock_t lock;
132 int wqe_cnt;
133 int max_post;
134 int max_gs;
135 int offset;
136 int wqe_shift;
137 unsigned head;
138 unsigned tail;
139 u16 cur_post;
140 u16 last_poll;
141 void *qend;
142};
143
144enum {
145 MLX5_QP_USER,
146 MLX5_QP_KERNEL,
147 MLX5_QP_EMPTY
148};
149
150struct mlx5_ib_qp {
151 struct ib_qp ibqp;
152 struct mlx5_core_qp mqp;
153 struct mlx5_buf buf;
154
155 struct mlx5_db db;
156 struct mlx5_ib_wq rq;
157
158 u32 doorbell_qpn;
159 u8 sq_signal_bits;
160 u8 fm_cache;
161 int sq_max_wqes_per_wr;
162 int sq_spare_wqes;
163 struct mlx5_ib_wq sq;
164
165 struct ib_umem *umem;
166 int buf_size;
167
168 /* serialize qp state modifications
169 */
170 struct mutex mutex;
171 u16 xrcdn;
172 u32 flags;
173 u8 port;
174 u8 alt_port;
175 u8 atomic_rd_en;
176 u8 resp_depth;
177 u8 state;
178 int mlx_type;
179 int wq_sig;
180 int scat_cqe;
181 int max_inline_data;
182 struct mlx5_bf *bf;
183 int has_rq;
184
185 /* only for user space QPs. For kernel
186 * we have it from the bf object
187 */
188 int uuarn;
189
190 int create_type;
191 u32 pa_lkey;
192};
193
194struct mlx5_ib_cq_buf {
195 struct mlx5_buf buf;
196 struct ib_umem *umem;
197 int cqe_size;
198};
199
200enum mlx5_ib_qp_flags {
201 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
202 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
203};
204
205struct mlx5_shared_mr_info {
206 int mr_id;
207 struct ib_umem *umem;
208};
209
210struct mlx5_ib_cq {
211 struct ib_cq ibcq;
212 struct mlx5_core_cq mcq;
213 struct mlx5_ib_cq_buf buf;
214 struct mlx5_db db;
215
216 /* serialize access to the CQ
217 */
218 spinlock_t lock;
219
220 /* protect resize cq
221 */
222 struct mutex resize_mutex;
223 struct mlx5_ib_cq_resize *resize_buf;
224 struct ib_umem *resize_umem;
225 int cqe_size;
226};
227
228struct mlx5_ib_srq {
229 struct ib_srq ibsrq;
230 struct mlx5_core_srq msrq;
231 struct mlx5_buf buf;
232 struct mlx5_db db;
233 u64 *wrid;
234 /* protect SRQ hanlding
235 */
236 spinlock_t lock;
237 int head;
238 int tail;
239 u16 wqe_ctr;
240 struct ib_umem *umem;
241 /* serialize arming a SRQ
242 */
243 struct mutex mutex;
244 int wq_sig;
245};
246
247struct mlx5_ib_xrcd {
248 struct ib_xrcd ibxrcd;
249 u32 xrcdn;
250};
251
252struct mlx5_ib_mr {
253 struct ib_mr ibmr;
254 struct mlx5_core_mr mmr;
255 struct ib_umem *umem;
256 struct mlx5_shared_mr_info *smr_info;
257 struct list_head list;
258 int order;
259 int umred;
260 __be64 *pas;
261 dma_addr_t dma;
262 int npages;
263 struct completion done;
264 enum ib_wc_status status;
265};
266
267struct mlx5_ib_fast_reg_page_list {
268 struct ib_fast_reg_page_list ibfrpl;
269 __be64 *mapped_page_list;
270 dma_addr_t map;
271};
272
273struct umr_common {
274 struct ib_pd *pd;
275 struct ib_cq *cq;
276 struct ib_qp *qp;
277 struct ib_mr *mr;
278 /* control access to UMR QP
279 */
280 struct semaphore sem;
281};
282
283enum {
284 MLX5_FMR_INVALID,
285 MLX5_FMR_VALID,
286 MLX5_FMR_BUSY,
287};
288
289struct mlx5_ib_fmr {
290 struct ib_fmr ibfmr;
291 struct mlx5_core_mr mr;
292 int access_flags;
293 int state;
294 /* protect fmr state
295 */
296 spinlock_t lock;
297 u64 wrid;
298 struct ib_send_wr wr[2];
299 u8 page_shift;
300 struct ib_fast_reg_page_list page_list;
301};
302
303struct mlx5_cache_ent {
304 struct list_head head;
305 /* sync access to the cahce entry
306 */
307 spinlock_t lock;
308
309
310 struct dentry *dir;
311 char name[4];
312 u32 order;
313 u32 size;
314 u32 cur;
315 u32 miss;
316 u32 limit;
317
318 struct dentry *fsize;
319 struct dentry *fcur;
320 struct dentry *fmiss;
321 struct dentry *flimit;
322
323 struct mlx5_ib_dev *dev;
324 struct work_struct work;
325 struct delayed_work dwork;
326};
327
328struct mlx5_mr_cache {
329 struct workqueue_struct *wq;
330 struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
331 int stopped;
332 struct dentry *root;
333 unsigned long last_add;
334};
335
336struct mlx5_ib_resources {
337 struct ib_cq *c0;
338 struct ib_xrcd *x0;
339 struct ib_xrcd *x1;
340 struct ib_pd *p0;
341 struct ib_srq *s0;
342};
343
344struct mlx5_ib_dev {
345 struct ib_device ib_dev;
346 struct mlx5_core_dev mdev;
347 MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
348 struct list_head eqs_list;
349 int num_ports;
350 int num_comp_vectors;
351 /* serialize update of capability mask
352 */
353 struct mutex cap_mask_mutex;
354 bool ib_active;
355 struct umr_common umrc;
356 /* sync used page count stats
357 */
358 spinlock_t mr_lock;
359 struct mlx5_ib_resources devr;
360 struct mlx5_mr_cache cache;
361};
362
363static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
364{
365 return container_of(mcq, struct mlx5_ib_cq, mcq);
366}
367
368static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
369{
370 return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
371}
372
373static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
374{
375 return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
376}
377
378static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
379{
380 return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
381}
382
383static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
384{
385 return container_of(ibcq, struct mlx5_ib_cq, ibcq);
386}
387
388static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
389{
390 return container_of(mqp, struct mlx5_ib_qp, mqp);
391}
392
393static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
394{
395 return container_of(ibpd, struct mlx5_ib_pd, ibpd);
396}
397
398static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
399{
400 return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
401}
402
403static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
404{
405 return container_of(ibqp, struct mlx5_ib_qp, ibqp);
406}
407
408static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
409{
410 return container_of(msrq, struct mlx5_ib_srq, msrq);
411}
412
413static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
414{
415 return container_of(ibmr, struct mlx5_ib_mr, ibmr);
416}
417
418static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
419{
420 return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
421}
422
423struct mlx5_ib_ah {
424 struct ib_ah ibah;
425 struct mlx5_av av;
426};
427
428static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
429{
430 return container_of(ibah, struct mlx5_ib_ah, ibah);
431}
432
433static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
434{
435 return container_of(dev, struct mlx5_ib_dev, mdev);
436}
437
438static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
439{
440 return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
441}
442
443int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
444 struct mlx5_db *db);
445void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
446void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
447void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
448void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
449int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
450 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
451 void *in_mad, void *response_mad);
452struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
453 struct mlx5_ib_ah *ah);
454struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
455int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
456int mlx5_ib_destroy_ah(struct ib_ah *ah);
457struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
458 struct ib_srq_init_attr *init_attr,
459 struct ib_udata *udata);
460int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
461 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
462int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
463int mlx5_ib_destroy_srq(struct ib_srq *srq);
464int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
465 struct ib_recv_wr **bad_wr);
466struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
467 struct ib_qp_init_attr *init_attr,
468 struct ib_udata *udata);
469int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
470 int attr_mask, struct ib_udata *udata);
471int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
472 struct ib_qp_init_attr *qp_init_attr);
473int mlx5_ib_destroy_qp(struct ib_qp *qp);
474int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
475 struct ib_send_wr **bad_wr);
476int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
477 struct ib_recv_wr **bad_wr);
478void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
479struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
480 int vector, struct ib_ucontext *context,
481 struct ib_udata *udata);
482int mlx5_ib_destroy_cq(struct ib_cq *cq);
483int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
485int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
486int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
487struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
488struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
489 u64 virt_addr, int access_flags,
490 struct ib_udata *udata);
491int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
492struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
493 int max_page_list_len);
494struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
495 int page_list_len);
496void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
497struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
498 struct ib_fmr_attr *fmr_attr);
499int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
500 int npages, u64 iova);
501int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
502int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
503int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
504 struct ib_wc *in_wc, struct ib_grh *in_grh,
505 struct ib_mad *in_mad, struct ib_mad *out_mad);
506struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
507 struct ib_ucontext *context,
508 struct ib_udata *udata);
509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
510int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
511int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
512int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
513int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
514 struct ib_port_attr *props);
515int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
516void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
517void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
518 int *ncont, int *order);
519void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
520 int page_shift, __be64 *pas, int umr);
521void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
522int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
523int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
524int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
525int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
526void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
527
528static inline void init_query_mad(struct ib_smp *mad)
529{
530 mad->base_version = 1;
531 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
532 mad->class_version = 1;
533 mad->method = IB_MGMT_METHOD_GET;
534}
535
536static inline u8 convert_access(int acc)
537{
538 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
539 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
540 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
541 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
542 MLX5_PERM_LOCAL_READ;
543}
544
545#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644
index 000000000000..e2daa8f02476
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -0,0 +1,1007 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <rdma/ib_umem.h>
39#include "mlx5_ib.h"
40
41enum {
42 DEF_CACHE_SIZE = 10,
43};
44
45static __be64 *mr_align(__be64 *ptr, int align)
46{
47 unsigned long mask = align - 1;
48
49 return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
50}
51
52static int order2idx(struct mlx5_ib_dev *dev, int order)
53{
54 struct mlx5_mr_cache *cache = &dev->cache;
55
56 if (order < cache->ent[0].order)
57 return 0;
58 else
59 return order - cache->ent[0].order;
60}
61
62static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
63{
64 struct device *ddev = dev->ib_dev.dma_device;
65 struct mlx5_mr_cache *cache = &dev->cache;
66 struct mlx5_cache_ent *ent = &cache->ent[c];
67 struct mlx5_create_mkey_mbox_in *in;
68 struct mlx5_ib_mr *mr;
69 int npages = 1 << ent->order;
70 int size = sizeof(u64) * npages;
71 int err = 0;
72 int i;
73
74 in = kzalloc(sizeof(*in), GFP_KERNEL);
75 if (!in)
76 return -ENOMEM;
77
78 for (i = 0; i < num; i++) {
79 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
80 if (!mr) {
81 err = -ENOMEM;
82 goto out;
83 }
84 mr->order = ent->order;
85 mr->umred = 1;
86 mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
87 if (!mr->pas) {
88 kfree(mr);
89 err = -ENOMEM;
90 goto out;
91 }
92 mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
93 DMA_TO_DEVICE);
94 if (dma_mapping_error(ddev, mr->dma)) {
95 kfree(mr->pas);
96 kfree(mr);
97 err = -ENOMEM;
98 goto out;
99 }
100
101 in->seg.status = 1 << 6;
102 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
103 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
104 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
105 in->seg.log2_page_size = 12;
106
107 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
108 sizeof(*in));
109 if (err) {
110 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
111 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
112 kfree(mr->pas);
113 kfree(mr);
114 goto out;
115 }
116 cache->last_add = jiffies;
117
118 spin_lock(&ent->lock);
119 list_add_tail(&mr->list, &ent->head);
120 ent->cur++;
121 ent->size++;
122 spin_unlock(&ent->lock);
123 }
124
125out:
126 kfree(in);
127 return err;
128}
129
130static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
131{
132 struct device *ddev = dev->ib_dev.dma_device;
133 struct mlx5_mr_cache *cache = &dev->cache;
134 struct mlx5_cache_ent *ent = &cache->ent[c];
135 struct mlx5_ib_mr *mr;
136 int size;
137 int err;
138 int i;
139
140 for (i = 0; i < num; i++) {
141 spin_lock(&ent->lock);
142 if (list_empty(&ent->head)) {
143 spin_unlock(&ent->lock);
144 return;
145 }
146 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
147 list_del(&mr->list);
148 ent->cur--;
149 ent->size--;
150 spin_unlock(&ent->lock);
151 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
152 if (err) {
153 mlx5_ib_warn(dev, "failed destroy mkey\n");
154 } else {
155 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
156 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
157 kfree(mr->pas);
158 kfree(mr);
159 }
160 }
161}
162
163static ssize_t size_write(struct file *filp, const char __user *buf,
164 size_t count, loff_t *pos)
165{
166 struct mlx5_cache_ent *ent = filp->private_data;
167 struct mlx5_ib_dev *dev = ent->dev;
168 char lbuf[20];
169 u32 var;
170 int err;
171 int c;
172
173 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
174 return -EPERM;
175
176 c = order2idx(dev, ent->order);
177 lbuf[sizeof(lbuf) - 1] = 0;
178
179 if (sscanf(lbuf, "%u", &var) != 1)
180 return -EINVAL;
181
182 if (var < ent->limit)
183 return -EINVAL;
184
185 if (var > ent->size) {
186 err = add_keys(dev, c, var - ent->size);
187 if (err)
188 return err;
189 } else if (var < ent->size) {
190 remove_keys(dev, c, ent->size - var);
191 }
192
193 return count;
194}
195
196static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
197 loff_t *pos)
198{
199 struct mlx5_cache_ent *ent = filp->private_data;
200 char lbuf[20];
201 int err;
202
203 if (*pos)
204 return 0;
205
206 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
207 if (err < 0)
208 return err;
209
210 if (copy_to_user(buf, lbuf, err))
211 return -EPERM;
212
213 *pos += err;
214
215 return err;
216}
217
218static const struct file_operations size_fops = {
219 .owner = THIS_MODULE,
220 .open = simple_open,
221 .write = size_write,
222 .read = size_read,
223};
224
225static ssize_t limit_write(struct file *filp, const char __user *buf,
226 size_t count, loff_t *pos)
227{
228 struct mlx5_cache_ent *ent = filp->private_data;
229 struct mlx5_ib_dev *dev = ent->dev;
230 char lbuf[20];
231 u32 var;
232 int err;
233 int c;
234
235 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
236 return -EPERM;
237
238 c = order2idx(dev, ent->order);
239 lbuf[sizeof(lbuf) - 1] = 0;
240
241 if (sscanf(lbuf, "%u", &var) != 1)
242 return -EINVAL;
243
244 if (var > ent->size)
245 return -EINVAL;
246
247 ent->limit = var;
248
249 if (ent->cur < ent->limit) {
250 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
251 if (err)
252 return err;
253 }
254
255 return count;
256}
257
258static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
259 loff_t *pos)
260{
261 struct mlx5_cache_ent *ent = filp->private_data;
262 char lbuf[20];
263 int err;
264
265 if (*pos)
266 return 0;
267
268 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
269 if (err < 0)
270 return err;
271
272 if (copy_to_user(buf, lbuf, err))
273 return -EPERM;
274
275 *pos += err;
276
277 return err;
278}
279
280static const struct file_operations limit_fops = {
281 .owner = THIS_MODULE,
282 .open = simple_open,
283 .write = limit_write,
284 .read = limit_read,
285};
286
287static int someone_adding(struct mlx5_mr_cache *cache)
288{
289 int i;
290
291 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
292 if (cache->ent[i].cur < cache->ent[i].limit)
293 return 1;
294 }
295
296 return 0;
297}
298
299static void __cache_work_func(struct mlx5_cache_ent *ent)
300{
301 struct mlx5_ib_dev *dev = ent->dev;
302 struct mlx5_mr_cache *cache = &dev->cache;
303 int i = order2idx(dev, ent->order);
304
305 if (cache->stopped)
306 return;
307
308 ent = &dev->cache.ent[i];
309 if (ent->cur < 2 * ent->limit) {
310 add_keys(dev, i, 1);
311 if (ent->cur < 2 * ent->limit)
312 queue_work(cache->wq, &ent->work);
313 } else if (ent->cur > 2 * ent->limit) {
314 if (!someone_adding(cache) &&
315 time_after(jiffies, cache->last_add + 60 * HZ)) {
316 remove_keys(dev, i, 1);
317 if (ent->cur > ent->limit)
318 queue_work(cache->wq, &ent->work);
319 } else {
320 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
321 }
322 }
323}
324
325static void delayed_cache_work_func(struct work_struct *work)
326{
327 struct mlx5_cache_ent *ent;
328
329 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
330 __cache_work_func(ent);
331}
332
333static void cache_work_func(struct work_struct *work)
334{
335 struct mlx5_cache_ent *ent;
336
337 ent = container_of(work, struct mlx5_cache_ent, work);
338 __cache_work_func(ent);
339}
340
341static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
342{
343 struct mlx5_mr_cache *cache = &dev->cache;
344 struct mlx5_ib_mr *mr = NULL;
345 struct mlx5_cache_ent *ent;
346 int c;
347 int i;
348
349 c = order2idx(dev, order);
350 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
351 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
352 return NULL;
353 }
354
355 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
356 ent = &cache->ent[i];
357
358 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
359
360 spin_lock(&ent->lock);
361 if (!list_empty(&ent->head)) {
362 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
363 list);
364 list_del(&mr->list);
365 ent->cur--;
366 spin_unlock(&ent->lock);
367 if (ent->cur < ent->limit)
368 queue_work(cache->wq, &ent->work);
369 break;
370 }
371 spin_unlock(&ent->lock);
372
373 queue_work(cache->wq, &ent->work);
374
375 if (mr)
376 break;
377 }
378
379 if (!mr)
380 cache->ent[c].miss++;
381
382 return mr;
383}
384
385static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
386{
387 struct mlx5_mr_cache *cache = &dev->cache;
388 struct mlx5_cache_ent *ent;
389 int shrink = 0;
390 int c;
391
392 c = order2idx(dev, mr->order);
393 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
394 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
395 return;
396 }
397 ent = &cache->ent[c];
398 spin_lock(&ent->lock);
399 list_add_tail(&mr->list, &ent->head);
400 ent->cur++;
401 if (ent->cur > 2 * ent->limit)
402 shrink = 1;
403 spin_unlock(&ent->lock);
404
405 if (shrink)
406 queue_work(cache->wq, &ent->work);
407}
408
409static void clean_keys(struct mlx5_ib_dev *dev, int c)
410{
411 struct device *ddev = dev->ib_dev.dma_device;
412 struct mlx5_mr_cache *cache = &dev->cache;
413 struct mlx5_cache_ent *ent = &cache->ent[c];
414 struct mlx5_ib_mr *mr;
415 int size;
416 int err;
417
418 while (1) {
419 spin_lock(&ent->lock);
420 if (list_empty(&ent->head)) {
421 spin_unlock(&ent->lock);
422 return;
423 }
424 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
425 list_del(&mr->list);
426 ent->cur--;
427 ent->size--;
428 spin_unlock(&ent->lock);
429 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
430 if (err) {
431 mlx5_ib_warn(dev, "failed destroy mkey\n");
432 } else {
433 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
434 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
435 kfree(mr->pas);
436 kfree(mr);
437 }
438 }
439}
440
441static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
442{
443 struct mlx5_mr_cache *cache = &dev->cache;
444 struct mlx5_cache_ent *ent;
445 int i;
446
447 if (!mlx5_debugfs_root)
448 return 0;
449
450 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
451 if (!cache->root)
452 return -ENOMEM;
453
454 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
455 ent = &cache->ent[i];
456 sprintf(ent->name, "%d", ent->order);
457 ent->dir = debugfs_create_dir(ent->name, cache->root);
458 if (!ent->dir)
459 return -ENOMEM;
460
461 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
462 &size_fops);
463 if (!ent->fsize)
464 return -ENOMEM;
465
466 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
467 &limit_fops);
468 if (!ent->flimit)
469 return -ENOMEM;
470
471 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
472 &ent->cur);
473 if (!ent->fcur)
474 return -ENOMEM;
475
476 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
477 &ent->miss);
478 if (!ent->fmiss)
479 return -ENOMEM;
480 }
481
482 return 0;
483}
484
485static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
486{
487 if (!mlx5_debugfs_root)
488 return;
489
490 debugfs_remove_recursive(dev->cache.root);
491}
492
493int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
494{
495 struct mlx5_mr_cache *cache = &dev->cache;
496 struct mlx5_cache_ent *ent;
497 int limit;
498 int size;
499 int err;
500 int i;
501
502 cache->wq = create_singlethread_workqueue("mkey_cache");
503 if (!cache->wq) {
504 mlx5_ib_warn(dev, "failed to create work queue\n");
505 return -ENOMEM;
506 }
507
508 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
509 INIT_LIST_HEAD(&cache->ent[i].head);
510 spin_lock_init(&cache->ent[i].lock);
511
512 ent = &cache->ent[i];
513 INIT_LIST_HEAD(&ent->head);
514 spin_lock_init(&ent->lock);
515 ent->order = i + 2;
516 ent->dev = dev;
517
518 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
519 size = dev->mdev.profile->mr_cache[i].size;
520 limit = dev->mdev.profile->mr_cache[i].limit;
521 } else {
522 size = DEF_CACHE_SIZE;
523 limit = 0;
524 }
525 INIT_WORK(&ent->work, cache_work_func);
526 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
527 ent->limit = limit;
528 queue_work(cache->wq, &ent->work);
529 }
530
531 err = mlx5_mr_cache_debugfs_init(dev);
532 if (err)
533 mlx5_ib_warn(dev, "cache debugfs failure\n");
534
535 return 0;
536}
537
538int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
539{
540 int i;
541
542 dev->cache.stopped = 1;
543 destroy_workqueue(dev->cache.wq);
544
545 mlx5_mr_cache_debugfs_cleanup(dev);
546
547 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
548 clean_keys(dev, i);
549
550 return 0;
551}
552
553struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
554{
555 struct mlx5_ib_dev *dev = to_mdev(pd->device);
556 struct mlx5_core_dev *mdev = &dev->mdev;
557 struct mlx5_create_mkey_mbox_in *in;
558 struct mlx5_mkey_seg *seg;
559 struct mlx5_ib_mr *mr;
560 int err;
561
562 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
563 if (!mr)
564 return ERR_PTR(-ENOMEM);
565
566 in = kzalloc(sizeof(*in), GFP_KERNEL);
567 if (!in) {
568 err = -ENOMEM;
569 goto err_free;
570 }
571
572 seg = &in->seg;
573 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
574 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
575 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
576 seg->start_addr = 0;
577
578 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
579 if (err)
580 goto err_in;
581
582 kfree(in);
583 mr->ibmr.lkey = mr->mmr.key;
584 mr->ibmr.rkey = mr->mmr.key;
585 mr->umem = NULL;
586
587 return &mr->ibmr;
588
589err_in:
590 kfree(in);
591
592err_free:
593 kfree(mr);
594
595 return ERR_PTR(err);
596}
597
598static int get_octo_len(u64 addr, u64 len, int page_size)
599{
600 u64 offset;
601 int npages;
602
603 offset = addr & (page_size - 1);
604 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
605 return (npages + 1) / 2;
606}
607
608static int use_umr(int order)
609{
610 return order <= 17;
611}
612
613static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
614 struct ib_sge *sg, u64 dma, int n, u32 key,
615 int page_shift, u64 virt_addr, u64 len,
616 int access_flags)
617{
618 struct mlx5_ib_dev *dev = to_mdev(pd->device);
619 struct ib_mr *mr = dev->umrc.mr;
620
621 sg->addr = dma;
622 sg->length = ALIGN(sizeof(u64) * n, 64);
623 sg->lkey = mr->lkey;
624
625 wr->next = NULL;
626 wr->send_flags = 0;
627 wr->sg_list = sg;
628 if (n)
629 wr->num_sge = 1;
630 else
631 wr->num_sge = 0;
632
633 wr->opcode = MLX5_IB_WR_UMR;
634 wr->wr.fast_reg.page_list_len = n;
635 wr->wr.fast_reg.page_shift = page_shift;
636 wr->wr.fast_reg.rkey = key;
637 wr->wr.fast_reg.iova_start = virt_addr;
638 wr->wr.fast_reg.length = len;
639 wr->wr.fast_reg.access_flags = access_flags;
640 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
641}
642
643static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
644 struct ib_send_wr *wr, u32 key)
645{
646 wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
647 wr->opcode = MLX5_IB_WR_UMR;
648 wr->wr.fast_reg.rkey = key;
649}
650
651void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
652{
653 struct mlx5_ib_mr *mr;
654 struct ib_wc wc;
655 int err;
656
657 while (1) {
658 err = ib_poll_cq(cq, 1, &wc);
659 if (err < 0) {
660 pr_warn("poll cq error %d\n", err);
661 return;
662 }
663 if (err == 0)
664 break;
665
666 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
667 mr->status = wc.status;
668 complete(&mr->done);
669 }
670 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
671}
672
673static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
674 u64 virt_addr, u64 len, int npages,
675 int page_shift, int order, int access_flags)
676{
677 struct mlx5_ib_dev *dev = to_mdev(pd->device);
678 struct umr_common *umrc = &dev->umrc;
679 struct ib_send_wr wr, *bad;
680 struct mlx5_ib_mr *mr;
681 struct ib_sge sg;
682 int err;
683 int i;
684
685 for (i = 0; i < 10; i++) {
686 mr = alloc_cached_mr(dev, order);
687 if (mr)
688 break;
689
690 err = add_keys(dev, order2idx(dev, order), 1);
691 if (err) {
692 mlx5_ib_warn(dev, "add_keys failed\n");
693 break;
694 }
695 }
696
697 if (!mr)
698 return ERR_PTR(-EAGAIN);
699
700 mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
701
702 memset(&wr, 0, sizeof(wr));
703 wr.wr_id = (u64)(unsigned long)mr;
704 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
705
706 /* We serialize polls so one process does not kidnap another's
707 * completion. This is not a problem since wr is completed in
708 * around 1 usec
709 */
710 down(&umrc->sem);
711 init_completion(&mr->done);
712 err = ib_post_send(umrc->qp, &wr, &bad);
713 if (err) {
714 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
715 up(&umrc->sem);
716 goto error;
717 }
718 wait_for_completion(&mr->done);
719 up(&umrc->sem);
720
721 if (mr->status != IB_WC_SUCCESS) {
722 mlx5_ib_warn(dev, "reg umr failed\n");
723 err = -EFAULT;
724 goto error;
725 }
726
727 return mr;
728
729error:
730 free_cached_mr(dev, mr);
731 return ERR_PTR(err);
732}
733
734static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
735 u64 length, struct ib_umem *umem,
736 int npages, int page_shift,
737 int access_flags)
738{
739 struct mlx5_ib_dev *dev = to_mdev(pd->device);
740 struct mlx5_create_mkey_mbox_in *in;
741 struct mlx5_ib_mr *mr;
742 int inlen;
743 int err;
744
745 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
746 if (!mr)
747 return ERR_PTR(-ENOMEM);
748
749 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
750 in = mlx5_vzalloc(inlen);
751 if (!in) {
752 err = -ENOMEM;
753 goto err_1;
754 }
755 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
756
757 in->seg.flags = convert_access(access_flags) |
758 MLX5_ACCESS_MODE_MTT;
759 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
760 in->seg.start_addr = cpu_to_be64(virt_addr);
761 in->seg.len = cpu_to_be64(length);
762 in->seg.bsfs_octo_size = 0;
763 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
764 in->seg.log2_page_size = page_shift;
765 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
766 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
767 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
768 if (err) {
769 mlx5_ib_warn(dev, "create mkey failed\n");
770 goto err_2;
771 }
772 mr->umem = umem;
773 mlx5_vfree(in);
774
775 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
776
777 return mr;
778
779err_2:
780 mlx5_vfree(in);
781
782err_1:
783 kfree(mr);
784
785 return ERR_PTR(err);
786}
787
788struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
789 u64 virt_addr, int access_flags,
790 struct ib_udata *udata)
791{
792 struct mlx5_ib_dev *dev = to_mdev(pd->device);
793 struct mlx5_ib_mr *mr = NULL;
794 struct ib_umem *umem;
795 int page_shift;
796 int npages;
797 int ncont;
798 int order;
799 int err;
800
801 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
802 start, virt_addr, length);
803 umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
804 0);
805 if (IS_ERR(umem)) {
806 mlx5_ib_dbg(dev, "umem get failed\n");
807 return (void *)umem;
808 }
809
810 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
811 if (!npages) {
812 mlx5_ib_warn(dev, "avoid zero region\n");
813 err = -EINVAL;
814 goto error;
815 }
816
817 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
818 npages, ncont, order, page_shift);
819
820 if (use_umr(order)) {
821 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
822 order, access_flags);
823 if (PTR_ERR(mr) == -EAGAIN) {
824 mlx5_ib_dbg(dev, "cache empty for order %d", order);
825 mr = NULL;
826 }
827 }
828
829 if (!mr)
830 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
831 access_flags);
832
833 if (IS_ERR(mr)) {
834 err = PTR_ERR(mr);
835 goto error;
836 }
837
838 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
839
840 mr->umem = umem;
841 mr->npages = npages;
842 spin_lock(&dev->mr_lock);
843 dev->mdev.priv.reg_pages += npages;
844 spin_unlock(&dev->mr_lock);
845 mr->ibmr.lkey = mr->mmr.key;
846 mr->ibmr.rkey = mr->mmr.key;
847
848 return &mr->ibmr;
849
850error:
851 ib_umem_release(umem);
852 return ERR_PTR(err);
853}
854
855static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
856{
857 struct umr_common *umrc = &dev->umrc;
858 struct ib_send_wr wr, *bad;
859 int err;
860
861 memset(&wr, 0, sizeof(wr));
862 wr.wr_id = (u64)(unsigned long)mr;
863 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
864
865 down(&umrc->sem);
866 init_completion(&mr->done);
867 err = ib_post_send(umrc->qp, &wr, &bad);
868 if (err) {
869 up(&umrc->sem);
870 mlx5_ib_dbg(dev, "err %d\n", err);
871 goto error;
872 }
873 wait_for_completion(&mr->done);
874 up(&umrc->sem);
875 if (mr->status != IB_WC_SUCCESS) {
876 mlx5_ib_warn(dev, "unreg umr failed\n");
877 err = -EFAULT;
878 goto error;
879 }
880 return 0;
881
882error:
883 return err;
884}
885
886int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
887{
888 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
889 struct mlx5_ib_mr *mr = to_mmr(ibmr);
890 struct ib_umem *umem = mr->umem;
891 int npages = mr->npages;
892 int umred = mr->umred;
893 int err;
894
895 if (!umred) {
896 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
897 if (err) {
898 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
899 mr->mmr.key, err);
900 return err;
901 }
902 } else {
903 err = unreg_umr(dev, mr);
904 if (err) {
905 mlx5_ib_warn(dev, "failed unregister\n");
906 return err;
907 }
908 free_cached_mr(dev, mr);
909 }
910
911 if (umem) {
912 ib_umem_release(umem);
913 spin_lock(&dev->mr_lock);
914 dev->mdev.priv.reg_pages -= npages;
915 spin_unlock(&dev->mr_lock);
916 }
917
918 if (!umred)
919 kfree(mr);
920
921 return 0;
922}
923
924struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
925 int max_page_list_len)
926{
927 struct mlx5_ib_dev *dev = to_mdev(pd->device);
928 struct mlx5_create_mkey_mbox_in *in;
929 struct mlx5_ib_mr *mr;
930 int err;
931
932 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
933 if (!mr)
934 return ERR_PTR(-ENOMEM);
935
936 in = kzalloc(sizeof(*in), GFP_KERNEL);
937 if (!in) {
938 err = -ENOMEM;
939 goto err_free;
940 }
941
942 in->seg.status = 1 << 6; /* free */
943 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
944 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
945 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
946 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
947 /*
948 * TBD not needed - issue 197292 */
949 in->seg.log2_page_size = PAGE_SHIFT;
950
951 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
952 kfree(in);
953 if (err)
954 goto err_free;
955
956 mr->ibmr.lkey = mr->mmr.key;
957 mr->ibmr.rkey = mr->mmr.key;
958 mr->umem = NULL;
959
960 return &mr->ibmr;
961
962err_free:
963 kfree(mr);
964 return ERR_PTR(err);
965}
966
967struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
968 int page_list_len)
969{
970 struct mlx5_ib_fast_reg_page_list *mfrpl;
971 int size = page_list_len * sizeof(u64);
972
973 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
974 if (!mfrpl)
975 return ERR_PTR(-ENOMEM);
976
977 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
978 if (!mfrpl->ibfrpl.page_list)
979 goto err_free;
980
981 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
982 size, &mfrpl->map,
983 GFP_KERNEL);
984 if (!mfrpl->mapped_page_list)
985 goto err_free;
986
987 WARN_ON(mfrpl->map & 0x3f);
988
989 return &mfrpl->ibfrpl;
990
991err_free:
992 kfree(mfrpl->ibfrpl.page_list);
993 kfree(mfrpl);
994 return ERR_PTR(-ENOMEM);
995}
996
997void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
998{
999 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1000 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1001 int size = page_list->max_page_list_len * sizeof(u64);
1002
1003 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1004 mfrpl->map);
1005 kfree(mfrpl->ibfrpl.page_list);
1006 kfree(mfrpl);
1007}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644
index 000000000000..16ac54c9819f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -0,0 +1,2524 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38/* not supported currently */
39static int wq_signature;
40
41enum {
42 MLX5_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
48 MLX5_IB_LINK_TYPE_IB = 0,
49 MLX5_IB_LINK_TYPE_ETH = 1
50};
51
52enum {
53 MLX5_IB_SQ_STRIDE = 6,
54 MLX5_IB_CACHE_LINE_SIZE = 64,
55};
56
57static const u32 mlx5_ib_opcode[] = {
58 [IB_WR_SEND] = MLX5_OPCODE_SEND,
59 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
60 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
61 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
62 [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
63 [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
64 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
65 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
66 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
67 [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
68 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
69 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
70 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
71};
72
73struct umr_wr {
74 u64 virt_addr;
75 struct ib_pd *pd;
76 unsigned int page_shift;
77 unsigned int npages;
78 u32 length;
79 int access_flags;
80 u32 mkey;
81};
82
83static int is_qp0(enum ib_qp_type qp_type)
84{
85 return qp_type == IB_QPT_SMI;
86}
87
88static int is_qp1(enum ib_qp_type qp_type)
89{
90 return qp_type == IB_QPT_GSI;
91}
92
93static int is_sqp(enum ib_qp_type qp_type)
94{
95 return is_qp0(qp_type) || is_qp1(qp_type);
96}
97
98static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
99{
100 return mlx5_buf_offset(&qp->buf, offset);
101}
102
103static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
104{
105 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
106}
107
108void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
109{
110 return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
111}
112
113static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
114{
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116 struct ib_event event;
117
118 if (type == MLX5_EVENT_TYPE_PATH_MIG)
119 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
120
121 if (ibqp->event_handler) {
122 event.device = ibqp->device;
123 event.element.qp = ibqp;
124 switch (type) {
125 case MLX5_EVENT_TYPE_PATH_MIG:
126 event.event = IB_EVENT_PATH_MIG;
127 break;
128 case MLX5_EVENT_TYPE_COMM_EST:
129 event.event = IB_EVENT_COMM_EST;
130 break;
131 case MLX5_EVENT_TYPE_SQ_DRAINED:
132 event.event = IB_EVENT_SQ_DRAINED;
133 break;
134 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
135 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
136 break;
137 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
138 event.event = IB_EVENT_QP_FATAL;
139 break;
140 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
141 event.event = IB_EVENT_PATH_MIG_ERR;
142 break;
143 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
144 event.event = IB_EVENT_QP_REQ_ERR;
145 break;
146 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
147 event.event = IB_EVENT_QP_ACCESS_ERR;
148 break;
149 default:
150 pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
159 int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
160{
161 int wqe_size;
162 int wq_size;
163
164 /* Sanity check RQ size before proceeding */
165 if (cap->max_recv_wr > dev->mdev.caps.max_wqes)
166 return -EINVAL;
167
168 if (!has_rq) {
169 qp->rq.max_gs = 0;
170 qp->rq.wqe_cnt = 0;
171 qp->rq.wqe_shift = 0;
172 } else {
173 if (ucmd) {
174 qp->rq.wqe_cnt = ucmd->rq_wqe_count;
175 qp->rq.wqe_shift = ucmd->rq_wqe_shift;
176 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
177 qp->rq.max_post = qp->rq.wqe_cnt;
178 } else {
179 wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
180 wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
181 wqe_size = roundup_pow_of_two(wqe_size);
182 wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
183 wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
184 qp->rq.wqe_cnt = wq_size / wqe_size;
185 if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
186 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
187 wqe_size,
188 dev->mdev.caps.max_rq_desc_sz);
189 return -EINVAL;
190 }
191 qp->rq.wqe_shift = ilog2(wqe_size);
192 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
193 qp->rq.max_post = qp->rq.wqe_cnt;
194 }
195 }
196
197 return 0;
198}
199
200static int sq_overhead(enum ib_qp_type qp_type)
201{
202 int size;
203
204 switch (qp_type) {
205 case IB_QPT_XRC_INI:
206 size = sizeof(struct mlx5_wqe_xrc_seg);
207 /* fall through */
208 case IB_QPT_RC:
209 size += sizeof(struct mlx5_wqe_ctrl_seg) +
210 sizeof(struct mlx5_wqe_atomic_seg) +
211 sizeof(struct mlx5_wqe_raddr_seg);
212 break;
213
214 case IB_QPT_UC:
215 size = sizeof(struct mlx5_wqe_ctrl_seg) +
216 sizeof(struct mlx5_wqe_raddr_seg);
217 break;
218
219 case IB_QPT_UD:
220 case IB_QPT_SMI:
221 case IB_QPT_GSI:
222 size = sizeof(struct mlx5_wqe_ctrl_seg) +
223 sizeof(struct mlx5_wqe_datagram_seg);
224 break;
225
226 case MLX5_IB_QPT_REG_UMR:
227 size = sizeof(struct mlx5_wqe_ctrl_seg) +
228 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
229 sizeof(struct mlx5_mkey_seg);
230 break;
231
232 default:
233 return -EINVAL;
234 }
235
236 return size;
237}
238
239static int calc_send_wqe(struct ib_qp_init_attr *attr)
240{
241 int inl_size = 0;
242 int size;
243
244 size = sq_overhead(attr->qp_type);
245 if (size < 0)
246 return size;
247
248 if (attr->cap.max_inline_data) {
249 inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
250 attr->cap.max_inline_data;
251 }
252
253 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
254
255 return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
256}
257
258static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
259 struct mlx5_ib_qp *qp)
260{
261 int wqe_size;
262 int wq_size;
263
264 if (!attr->cap.max_send_wr)
265 return 0;
266
267 wqe_size = calc_send_wqe(attr);
268 mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
269 if (wqe_size < 0)
270 return wqe_size;
271
272 if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
273 mlx5_ib_dbg(dev, "\n");
274 return -EINVAL;
275 }
276
277 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
278 sizeof(struct mlx5_wqe_inline_seg);
279 attr->cap.max_inline_data = qp->max_inline_data;
280
281 wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
282 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
283 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
284 qp->sq.max_gs = attr->cap.max_send_sge;
285 qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
286
287 return wq_size;
288}
289
290static int set_user_buf_size(struct mlx5_ib_dev *dev,
291 struct mlx5_ib_qp *qp,
292 struct mlx5_ib_create_qp *ucmd)
293{
294 int desc_sz = 1 << qp->sq.wqe_shift;
295
296 if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
297 mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
298 desc_sz, dev->mdev.caps.max_sq_desc_sz);
299 return -EINVAL;
300 }
301
302 if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
303 mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
304 ucmd->sq_wqe_count, ucmd->sq_wqe_count);
305 return -EINVAL;
306 }
307
308 qp->sq.wqe_cnt = ucmd->sq_wqe_count;
309
310 if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
311 mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
312 qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
313 return -EINVAL;
314 }
315
316 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
317 (qp->sq.wqe_cnt << 6);
318
319 return 0;
320}
321
322static int qp_has_rq(struct ib_qp_init_attr *attr)
323{
324 if (attr->qp_type == IB_QPT_XRC_INI ||
325 attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
326 attr->qp_type == MLX5_IB_QPT_REG_UMR ||
327 !attr->cap.max_recv_wr)
328 return 0;
329
330 return 1;
331}
332
333static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
334{
335 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
336 int start_uuar;
337 int i;
338
339 start_uuar = nuuars - uuari->num_low_latency_uuars;
340 for (i = start_uuar; i < nuuars; i++) {
341 if (!test_bit(i, uuari->bitmap)) {
342 set_bit(i, uuari->bitmap);
343 uuari->count[i]++;
344 return i;
345 }
346 }
347
348 return -ENOMEM;
349}
350
351static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
352{
353 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
354 int minidx = 1;
355 int uuarn;
356 int end;
357 int i;
358
359 end = nuuars - uuari->num_low_latency_uuars;
360
361 for (i = 1; i < end; i++) {
362 uuarn = i & 3;
363 if (uuarn == 2 || uuarn == 3)
364 continue;
365
366 if (uuari->count[i] < uuari->count[minidx])
367 minidx = i;
368 }
369
370 uuari->count[minidx]++;
371 return minidx;
372}
373
374static int alloc_uuar(struct mlx5_uuar_info *uuari,
375 enum mlx5_ib_latency_class lat)
376{
377 int uuarn = -EINVAL;
378
379 mutex_lock(&uuari->lock);
380 switch (lat) {
381 case MLX5_IB_LATENCY_CLASS_LOW:
382 uuarn = 0;
383 uuari->count[uuarn]++;
384 break;
385
386 case MLX5_IB_LATENCY_CLASS_MEDIUM:
387 uuarn = alloc_med_class_uuar(uuari);
388 break;
389
390 case MLX5_IB_LATENCY_CLASS_HIGH:
391 uuarn = alloc_high_class_uuar(uuari);
392 break;
393
394 case MLX5_IB_LATENCY_CLASS_FAST_PATH:
395 uuarn = 2;
396 break;
397 }
398 mutex_unlock(&uuari->lock);
399
400 return uuarn;
401}
402
403static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
404{
405 clear_bit(uuarn, uuari->bitmap);
406 --uuari->count[uuarn];
407}
408
409static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
410{
411 clear_bit(uuarn, uuari->bitmap);
412 --uuari->count[uuarn];
413}
414
415static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
416{
417 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
418 int high_uuar = nuuars - uuari->num_low_latency_uuars;
419
420 mutex_lock(&uuari->lock);
421 if (uuarn == 0) {
422 --uuari->count[uuarn];
423 goto out;
424 }
425
426 if (uuarn < high_uuar) {
427 free_med_class_uuar(uuari, uuarn);
428 goto out;
429 }
430
431 free_high_class_uuar(uuari, uuarn);
432
433out:
434 mutex_unlock(&uuari->lock);
435}
436
437static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
438{
439 switch (state) {
440 case IB_QPS_RESET: return MLX5_QP_STATE_RST;
441 case IB_QPS_INIT: return MLX5_QP_STATE_INIT;
442 case IB_QPS_RTR: return MLX5_QP_STATE_RTR;
443 case IB_QPS_RTS: return MLX5_QP_STATE_RTS;
444 case IB_QPS_SQD: return MLX5_QP_STATE_SQD;
445 case IB_QPS_SQE: return MLX5_QP_STATE_SQER;
446 case IB_QPS_ERR: return MLX5_QP_STATE_ERR;
447 default: return -1;
448 }
449}
450
451static int to_mlx5_st(enum ib_qp_type type)
452{
453 switch (type) {
454 case IB_QPT_RC: return MLX5_QP_ST_RC;
455 case IB_QPT_UC: return MLX5_QP_ST_UC;
456 case IB_QPT_UD: return MLX5_QP_ST_UD;
457 case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR;
458 case IB_QPT_XRC_INI:
459 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
460 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
461 case IB_QPT_GSI: return MLX5_QP_ST_QP1;
462 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
463 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
464 case IB_QPT_RAW_PACKET:
465 case IB_QPT_MAX:
466 default: return -EINVAL;
467 }
468}
469
470static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
471{
472 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
473}
474
475static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
476 struct mlx5_ib_qp *qp, struct ib_udata *udata,
477 struct mlx5_create_qp_mbox_in **in,
478 struct mlx5_ib_create_qp_resp *resp, int *inlen)
479{
480 struct mlx5_ib_ucontext *context;
481 struct mlx5_ib_create_qp ucmd;
482 int page_shift;
483 int uar_index;
484 int npages;
485 u32 offset;
486 int uuarn;
487 int ncont;
488 int err;
489
490 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
491 if (err) {
492 mlx5_ib_dbg(dev, "copy failed\n");
493 return err;
494 }
495
496 context = to_mucontext(pd->uobject->context);
497 /*
498 * TBD: should come from the verbs when we have the API
499 */
500 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
501 if (uuarn < 0) {
502 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
503 mlx5_ib_dbg(dev, "reverting to high latency\n");
504 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
505 if (uuarn < 0) {
506 mlx5_ib_dbg(dev, "uuar allocation failed\n");
507 return uuarn;
508 }
509 }
510
511 uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
512 mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
513
514 err = set_user_buf_size(dev, qp, &ucmd);
515 if (err)
516 goto err_uuar;
517
518 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
519 qp->buf_size, 0, 0);
520 if (IS_ERR(qp->umem)) {
521 mlx5_ib_dbg(dev, "umem_get failed\n");
522 err = PTR_ERR(qp->umem);
523 goto err_uuar;
524 }
525
526 mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
527 &ncont, NULL);
528 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
529 if (err) {
530 mlx5_ib_warn(dev, "bad offset\n");
531 goto err_umem;
532 }
533 mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
534 ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
535
536 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
537 *in = mlx5_vzalloc(*inlen);
538 if (!*in) {
539 err = -ENOMEM;
540 goto err_umem;
541 }
542 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
543 (*in)->ctx.log_pg_sz_remote_qpn =
544 cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
545 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
546
547 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
548 resp->uuar_index = uuarn;
549 qp->uuarn = uuarn;
550
551 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
552 if (err) {
553 mlx5_ib_dbg(dev, "map failed\n");
554 goto err_free;
555 }
556
557 err = ib_copy_to_udata(udata, resp, sizeof(*resp));
558 if (err) {
559 mlx5_ib_dbg(dev, "copy failed\n");
560 goto err_unmap;
561 }
562 qp->create_type = MLX5_QP_USER;
563
564 return 0;
565
566err_unmap:
567 mlx5_ib_db_unmap_user(context, &qp->db);
568
569err_free:
570 mlx5_vfree(*in);
571
572err_umem:
573 ib_umem_release(qp->umem);
574
575err_uuar:
576 free_uuar(&context->uuari, uuarn);
577 return err;
578}
579
580static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
581{
582 struct mlx5_ib_ucontext *context;
583
584 context = to_mucontext(pd->uobject->context);
585 mlx5_ib_db_unmap_user(context, &qp->db);
586 ib_umem_release(qp->umem);
587 free_uuar(&context->uuari, qp->uuarn);
588}
589
590static int create_kernel_qp(struct mlx5_ib_dev *dev,
591 struct ib_qp_init_attr *init_attr,
592 struct mlx5_ib_qp *qp,
593 struct mlx5_create_qp_mbox_in **in, int *inlen)
594{
595 enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
596 struct mlx5_uuar_info *uuari;
597 int uar_index;
598 int uuarn;
599 int err;
600
601 uuari = &dev->mdev.priv.uuari;
602 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
603 qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
604
605 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
606 lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
607
608 uuarn = alloc_uuar(uuari, lc);
609 if (uuarn < 0) {
610 mlx5_ib_dbg(dev, "\n");
611 return -ENOMEM;
612 }
613
614 qp->bf = &uuari->bfs[uuarn];
615 uar_index = qp->bf->uar->index;
616
617 err = calc_sq_size(dev, init_attr, qp);
618 if (err < 0) {
619 mlx5_ib_dbg(dev, "err %d\n", err);
620 goto err_uuar;
621 }
622
623 qp->rq.offset = 0;
624 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
625 qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
626
627 err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
628 if (err) {
629 mlx5_ib_dbg(dev, "err %d\n", err);
630 goto err_uuar;
631 }
632
633 qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
634 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
635 *in = mlx5_vzalloc(*inlen);
636 if (!*in) {
637 err = -ENOMEM;
638 goto err_buf;
639 }
640 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
641 (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
642 /* Set "fast registration enabled" for all kernel QPs */
643 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
644 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
645
646 mlx5_fill_page_array(&qp->buf, (*in)->pas);
647
648 err = mlx5_db_alloc(&dev->mdev, &qp->db);
649 if (err) {
650 mlx5_ib_dbg(dev, "err %d\n", err);
651 goto err_free;
652 }
653
654 qp->db.db[0] = 0;
655 qp->db.db[1] = 0;
656
657 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
658 qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
659 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
660 qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
661 qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
662
663 if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
664 !qp->sq.w_list || !qp->sq.wqe_head) {
665 err = -ENOMEM;
666 goto err_wrid;
667 }
668 qp->create_type = MLX5_QP_KERNEL;
669
670 return 0;
671
672err_wrid:
673 mlx5_db_free(&dev->mdev, &qp->db);
674 kfree(qp->sq.wqe_head);
675 kfree(qp->sq.w_list);
676 kfree(qp->sq.wrid);
677 kfree(qp->sq.wr_data);
678 kfree(qp->rq.wrid);
679
680err_free:
681 mlx5_vfree(*in);
682
683err_buf:
684 mlx5_buf_free(&dev->mdev, &qp->buf);
685
686err_uuar:
687 free_uuar(&dev->mdev.priv.uuari, uuarn);
688 return err;
689}
690
691static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
692{
693 mlx5_db_free(&dev->mdev, &qp->db);
694 kfree(qp->sq.wqe_head);
695 kfree(qp->sq.w_list);
696 kfree(qp->sq.wrid);
697 kfree(qp->sq.wr_data);
698 kfree(qp->rq.wrid);
699 mlx5_buf_free(&dev->mdev, &qp->buf);
700 free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
701}
702
703static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
704{
705 if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
706 (attr->qp_type == IB_QPT_XRC_INI))
707 return cpu_to_be32(MLX5_SRQ_RQ);
708 else if (!qp->has_rq)
709 return cpu_to_be32(MLX5_ZERO_LEN_RQ);
710 else
711 return cpu_to_be32(MLX5_NON_ZERO_RQ);
712}
713
714static int is_connected(enum ib_qp_type qp_type)
715{
716 if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
717 return 1;
718
719 return 0;
720}
721
722static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
723 struct ib_qp_init_attr *init_attr,
724 struct ib_udata *udata, struct mlx5_ib_qp *qp)
725{
726 struct mlx5_ib_resources *devr = &dev->devr;
727 struct mlx5_ib_create_qp_resp resp;
728 struct mlx5_create_qp_mbox_in *in;
729 struct mlx5_ib_create_qp ucmd;
730 int inlen = sizeof(*in);
731 int err;
732
733 mutex_init(&qp->mutex);
734 spin_lock_init(&qp->sq.lock);
735 spin_lock_init(&qp->rq.lock);
736
737 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
738 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
739
740 if (pd && pd->uobject) {
741 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
742 mlx5_ib_dbg(dev, "copy failed\n");
743 return -EFAULT;
744 }
745
746 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
747 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
748 } else {
749 qp->wq_sig = !!wq_signature;
750 }
751
752 qp->has_rq = qp_has_rq(init_attr);
753 err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
754 qp, (pd && pd->uobject) ? &ucmd : NULL);
755 if (err) {
756 mlx5_ib_dbg(dev, "err %d\n", err);
757 return err;
758 }
759
760 if (pd) {
761 if (pd->uobject) {
762 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
763 if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
764 ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
765 mlx5_ib_dbg(dev, "invalid rq params\n");
766 return -EINVAL;
767 }
768 if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
769 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
770 ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
771 return -EINVAL;
772 }
773 err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
774 if (err)
775 mlx5_ib_dbg(dev, "err %d\n", err);
776 } else {
777 err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
778 if (err)
779 mlx5_ib_dbg(dev, "err %d\n", err);
780 else
781 qp->pa_lkey = to_mpd(pd)->pa_lkey;
782 }
783
784 if (err)
785 return err;
786 } else {
787 in = mlx5_vzalloc(sizeof(*in));
788 if (!in)
789 return -ENOMEM;
790
791 qp->create_type = MLX5_QP_EMPTY;
792 }
793
794 if (is_sqp(init_attr->qp_type))
795 qp->port = init_attr->port_num;
796
797 in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
798 MLX5_QP_PM_MIGRATED << 11);
799
800 if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
801 in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
802 else
803 in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
804
805 if (qp->wq_sig)
806 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
807
808 if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
809 int rcqe_sz;
810 int scqe_sz;
811
812 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
813 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
814
815 if (rcqe_sz == 128)
816 in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
817 else
818 in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
819
820 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
821 if (scqe_sz == 128)
822 in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
823 else
824 in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
825 }
826 }
827
828 if (qp->rq.wqe_cnt) {
829 in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
830 in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
831 }
832
833 in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
834
835 if (qp->sq.wqe_cnt)
836 in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
837 else
838 in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
839
840 /* Set default resources */
841 switch (init_attr->qp_type) {
842 case IB_QPT_XRC_TGT:
843 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
844 in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
845 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
846 in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
847 break;
848 case IB_QPT_XRC_INI:
849 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
850 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
851 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
852 break;
853 default:
854 if (init_attr->srq) {
855 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
856 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
857 } else {
858 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
859 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
860 }
861 }
862
863 if (init_attr->send_cq)
864 in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
865
866 if (init_attr->recv_cq)
867 in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
868
869 in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
870
871 err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
872 if (err) {
873 mlx5_ib_dbg(dev, "create qp failed\n");
874 goto err_create;
875 }
876
877 mlx5_vfree(in);
878 /* Hardware wants QPN written in big-endian order (after
879 * shifting) for send doorbell. Precompute this value to save
880 * a little bit when posting sends.
881 */
882 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
883
884 qp->mqp.event = mlx5_ib_qp_event;
885
886 return 0;
887
888err_create:
889 if (qp->create_type == MLX5_QP_USER)
890 destroy_qp_user(pd, qp);
891 else if (qp->create_type == MLX5_QP_KERNEL)
892 destroy_qp_kernel(dev, qp);
893
894 mlx5_vfree(in);
895 return err;
896}
897
898static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
899 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
900{
901 if (send_cq) {
902 if (recv_cq) {
903 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
904 spin_lock_irq(&send_cq->lock);
905 spin_lock_nested(&recv_cq->lock,
906 SINGLE_DEPTH_NESTING);
907 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
908 spin_lock_irq(&send_cq->lock);
909 __acquire(&recv_cq->lock);
910 } else {
911 spin_lock_irq(&recv_cq->lock);
912 spin_lock_nested(&send_cq->lock,
913 SINGLE_DEPTH_NESTING);
914 }
915 } else {
916 spin_lock_irq(&send_cq->lock);
917 }
918 } else if (recv_cq) {
919 spin_lock_irq(&recv_cq->lock);
920 }
921}
922
923static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
924 __releases(&send_cq->lock) __releases(&recv_cq->lock)
925{
926 if (send_cq) {
927 if (recv_cq) {
928 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
929 spin_unlock(&recv_cq->lock);
930 spin_unlock_irq(&send_cq->lock);
931 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
932 __release(&recv_cq->lock);
933 spin_unlock_irq(&send_cq->lock);
934 } else {
935 spin_unlock(&send_cq->lock);
936 spin_unlock_irq(&recv_cq->lock);
937 }
938 } else {
939 spin_unlock_irq(&send_cq->lock);
940 }
941 } else if (recv_cq) {
942 spin_unlock_irq(&recv_cq->lock);
943 }
944}
945
946static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
947{
948 return to_mpd(qp->ibqp.pd);
949}
950
951static void get_cqs(struct mlx5_ib_qp *qp,
952 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
953{
954 switch (qp->ibqp.qp_type) {
955 case IB_QPT_XRC_TGT:
956 *send_cq = NULL;
957 *recv_cq = NULL;
958 break;
959 case MLX5_IB_QPT_REG_UMR:
960 case IB_QPT_XRC_INI:
961 *send_cq = to_mcq(qp->ibqp.send_cq);
962 *recv_cq = NULL;
963 break;
964
965 case IB_QPT_SMI:
966 case IB_QPT_GSI:
967 case IB_QPT_RC:
968 case IB_QPT_UC:
969 case IB_QPT_UD:
970 case IB_QPT_RAW_IPV6:
971 case IB_QPT_RAW_ETHERTYPE:
972 *send_cq = to_mcq(qp->ibqp.send_cq);
973 *recv_cq = to_mcq(qp->ibqp.recv_cq);
974 break;
975
976 case IB_QPT_RAW_PACKET:
977 case IB_QPT_MAX:
978 default:
979 *send_cq = NULL;
980 *recv_cq = NULL;
981 break;
982 }
983}
984
985static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
986{
987 struct mlx5_ib_cq *send_cq, *recv_cq;
988 struct mlx5_modify_qp_mbox_in *in;
989 int err;
990
991 in = kzalloc(sizeof(*in), GFP_KERNEL);
992 if (!in)
993 return;
994 if (qp->state != IB_QPS_RESET)
995 if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
996 MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
997 mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
998 qp->mqp.qpn);
999
1000 get_cqs(qp, &send_cq, &recv_cq);
1001
1002 if (qp->create_type == MLX5_QP_KERNEL) {
1003 mlx5_ib_lock_cqs(send_cq, recv_cq);
1004 __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1005 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1006 if (send_cq != recv_cq)
1007 __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1008 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1009 }
1010
1011 err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
1012 if (err)
1013 mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
1014 kfree(in);
1015
1016
1017 if (qp->create_type == MLX5_QP_KERNEL)
1018 destroy_qp_kernel(dev, qp);
1019 else if (qp->create_type == MLX5_QP_USER)
1020 destroy_qp_user(&get_pd(qp)->ibpd, qp);
1021}
1022
1023static const char *ib_qp_type_str(enum ib_qp_type type)
1024{
1025 switch (type) {
1026 case IB_QPT_SMI:
1027 return "IB_QPT_SMI";
1028 case IB_QPT_GSI:
1029 return "IB_QPT_GSI";
1030 case IB_QPT_RC:
1031 return "IB_QPT_RC";
1032 case IB_QPT_UC:
1033 return "IB_QPT_UC";
1034 case IB_QPT_UD:
1035 return "IB_QPT_UD";
1036 case IB_QPT_RAW_IPV6:
1037 return "IB_QPT_RAW_IPV6";
1038 case IB_QPT_RAW_ETHERTYPE:
1039 return "IB_QPT_RAW_ETHERTYPE";
1040 case IB_QPT_XRC_INI:
1041 return "IB_QPT_XRC_INI";
1042 case IB_QPT_XRC_TGT:
1043 return "IB_QPT_XRC_TGT";
1044 case IB_QPT_RAW_PACKET:
1045 return "IB_QPT_RAW_PACKET";
1046 case MLX5_IB_QPT_REG_UMR:
1047 return "MLX5_IB_QPT_REG_UMR";
1048 case IB_QPT_MAX:
1049 default:
1050 return "Invalid QP type";
1051 }
1052}
1053
1054struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1055 struct ib_qp_init_attr *init_attr,
1056 struct ib_udata *udata)
1057{
1058 struct mlx5_ib_dev *dev;
1059 struct mlx5_ib_qp *qp;
1060 u16 xrcdn = 0;
1061 int err;
1062
1063 if (pd) {
1064 dev = to_mdev(pd->device);
1065 } else {
1066 /* being cautious here */
1067 if (init_attr->qp_type != IB_QPT_XRC_TGT &&
1068 init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
1069 pr_warn("%s: no PD for transport %s\n", __func__,
1070 ib_qp_type_str(init_attr->qp_type));
1071 return ERR_PTR(-EINVAL);
1072 }
1073 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1074 }
1075
1076 switch (init_attr->qp_type) {
1077 case IB_QPT_XRC_TGT:
1078 case IB_QPT_XRC_INI:
1079 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
1080 mlx5_ib_dbg(dev, "XRC not supported\n");
1081 return ERR_PTR(-ENOSYS);
1082 }
1083 init_attr->recv_cq = NULL;
1084 if (init_attr->qp_type == IB_QPT_XRC_TGT) {
1085 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1086 init_attr->send_cq = NULL;
1087 }
1088
1089 /* fall through */
1090 case IB_QPT_RC:
1091 case IB_QPT_UC:
1092 case IB_QPT_UD:
1093 case IB_QPT_SMI:
1094 case IB_QPT_GSI:
1095 case MLX5_IB_QPT_REG_UMR:
1096 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1097 if (!qp)
1098 return ERR_PTR(-ENOMEM);
1099
1100 err = create_qp_common(dev, pd, init_attr, udata, qp);
1101 if (err) {
1102 mlx5_ib_dbg(dev, "create_qp_common failed\n");
1103 kfree(qp);
1104 return ERR_PTR(err);
1105 }
1106
1107 if (is_qp0(init_attr->qp_type))
1108 qp->ibqp.qp_num = 0;
1109 else if (is_qp1(init_attr->qp_type))
1110 qp->ibqp.qp_num = 1;
1111 else
1112 qp->ibqp.qp_num = qp->mqp.qpn;
1113
1114 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1115 qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
1116 to_mcq(init_attr->send_cq)->mcq.cqn);
1117
1118 qp->xrcdn = xrcdn;
1119
1120 break;
1121
1122 case IB_QPT_RAW_IPV6:
1123 case IB_QPT_RAW_ETHERTYPE:
1124 case IB_QPT_RAW_PACKET:
1125 case IB_QPT_MAX:
1126 default:
1127 mlx5_ib_dbg(dev, "unsupported qp type %d\n",
1128 init_attr->qp_type);
1129 /* Don't support raw QPs */
1130 return ERR_PTR(-EINVAL);
1131 }
1132
1133 return &qp->ibqp;
1134}
1135
1136int mlx5_ib_destroy_qp(struct ib_qp *qp)
1137{
1138 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1139 struct mlx5_ib_qp *mqp = to_mqp(qp);
1140
1141 destroy_qp_common(dev, mqp);
1142
1143 kfree(mqp);
1144
1145 return 0;
1146}
1147
1148static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
1149 int attr_mask)
1150{
1151 u32 hw_access_flags = 0;
1152 u8 dest_rd_atomic;
1153 u32 access_flags;
1154
1155 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1156 dest_rd_atomic = attr->max_dest_rd_atomic;
1157 else
1158 dest_rd_atomic = qp->resp_depth;
1159
1160 if (attr_mask & IB_QP_ACCESS_FLAGS)
1161 access_flags = attr->qp_access_flags;
1162 else
1163 access_flags = qp->atomic_rd_en;
1164
1165 if (!dest_rd_atomic)
1166 access_flags &= IB_ACCESS_REMOTE_WRITE;
1167
1168 if (access_flags & IB_ACCESS_REMOTE_READ)
1169 hw_access_flags |= MLX5_QP_BIT_RRE;
1170 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1171 hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
1172 if (access_flags & IB_ACCESS_REMOTE_WRITE)
1173 hw_access_flags |= MLX5_QP_BIT_RWE;
1174
1175 return cpu_to_be32(hw_access_flags);
1176}
1177
1178enum {
1179 MLX5_PATH_FLAG_FL = 1 << 0,
1180 MLX5_PATH_FLAG_FREE_AR = 1 << 1,
1181 MLX5_PATH_FLAG_COUNTER = 1 << 2,
1182};
1183
1184static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1185{
1186 if (rate == IB_RATE_PORT_CURRENT) {
1187 return 0;
1188 } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
1189 return -EINVAL;
1190 } else {
1191 while (rate != IB_RATE_2_5_GBPS &&
1192 !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
1193 dev->mdev.caps.stat_rate_support))
1194 --rate;
1195 }
1196
1197 return rate + MLX5_STAT_RATE_OFFSET;
1198}
1199
1200static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1201 struct mlx5_qp_path *path, u8 port, int attr_mask,
1202 u32 path_flags, const struct ib_qp_attr *attr)
1203{
1204 int err;
1205
1206 path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1207 path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
1208
1209 if (attr_mask & IB_QP_PKEY_INDEX)
1210 path->pkey_index = attr->pkey_index;
1211
1212 path->grh_mlid = ah->src_path_bits & 0x7f;
1213 path->rlid = cpu_to_be16(ah->dlid);
1214
1215 if (ah->ah_flags & IB_AH_GRH) {
1216 path->grh_mlid |= 1 << 7;
1217 path->mgid_index = ah->grh.sgid_index;
1218 path->hop_limit = ah->grh.hop_limit;
1219 path->tclass_flowlabel =
1220 cpu_to_be32((ah->grh.traffic_class << 20) |
1221 (ah->grh.flow_label));
1222 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1223 }
1224
1225 err = ib_rate_to_mlx5(dev, ah->static_rate);
1226 if (err < 0)
1227 return err;
1228 path->static_rate = err;
1229 path->port = port;
1230
1231 if (ah->ah_flags & IB_AH_GRH) {
1232 if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
1233 pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
1234 ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
1235 return -EINVAL;
1236 }
1237
1238 path->grh_mlid |= 1 << 7;
1239 path->mgid_index = ah->grh.sgid_index;
1240 path->hop_limit = ah->grh.hop_limit;
1241 path->tclass_flowlabel =
1242 cpu_to_be32((ah->grh.traffic_class << 20) |
1243 (ah->grh.flow_label));
1244 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1245 }
1246
1247 if (attr_mask & IB_QP_TIMEOUT)
1248 path->ackto_lt = attr->timeout << 3;
1249
1250 path->sl = ah->sl & 0xf;
1251
1252 return 0;
1253}
1254
1255static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
1256 [MLX5_QP_STATE_INIT] = {
1257 [MLX5_QP_STATE_INIT] = {
1258 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1259 MLX5_QP_OPTPAR_RAE |
1260 MLX5_QP_OPTPAR_RWE |
1261 MLX5_QP_OPTPAR_PKEY_INDEX |
1262 MLX5_QP_OPTPAR_PRI_PORT,
1263 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1264 MLX5_QP_OPTPAR_PKEY_INDEX |
1265 MLX5_QP_OPTPAR_PRI_PORT,
1266 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1267 MLX5_QP_OPTPAR_Q_KEY |
1268 MLX5_QP_OPTPAR_PRI_PORT,
1269 },
1270 [MLX5_QP_STATE_RTR] = {
1271 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1272 MLX5_QP_OPTPAR_RRE |
1273 MLX5_QP_OPTPAR_RAE |
1274 MLX5_QP_OPTPAR_RWE |
1275 MLX5_QP_OPTPAR_PKEY_INDEX,
1276 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1277 MLX5_QP_OPTPAR_RWE |
1278 MLX5_QP_OPTPAR_PKEY_INDEX,
1279 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1280 MLX5_QP_OPTPAR_Q_KEY,
1281 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
1282 MLX5_QP_OPTPAR_Q_KEY,
1283 },
1284 },
1285 [MLX5_QP_STATE_RTR] = {
1286 [MLX5_QP_STATE_RTS] = {
1287 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1288 MLX5_QP_OPTPAR_RRE |
1289 MLX5_QP_OPTPAR_RAE |
1290 MLX5_QP_OPTPAR_RWE |
1291 MLX5_QP_OPTPAR_PM_STATE |
1292 MLX5_QP_OPTPAR_RNR_TIMEOUT,
1293 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1294 MLX5_QP_OPTPAR_RWE |
1295 MLX5_QP_OPTPAR_PM_STATE,
1296 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1297 },
1298 },
1299 [MLX5_QP_STATE_RTS] = {
1300 [MLX5_QP_STATE_RTS] = {
1301 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1302 MLX5_QP_OPTPAR_RAE |
1303 MLX5_QP_OPTPAR_RWE |
1304 MLX5_QP_OPTPAR_RNR_TIMEOUT |
1305 MLX5_QP_OPTPAR_PM_STATE,
1306 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1307 MLX5_QP_OPTPAR_PM_STATE,
1308 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
1309 MLX5_QP_OPTPAR_SRQN |
1310 MLX5_QP_OPTPAR_CQN_RCV,
1311 },
1312 },
1313 [MLX5_QP_STATE_SQER] = {
1314 [MLX5_QP_STATE_RTS] = {
1315 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1316 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1317 },
1318 },
1319};
1320
1321static int ib_nr_to_mlx5_nr(int ib_mask)
1322{
1323 switch (ib_mask) {
1324 case IB_QP_STATE:
1325 return 0;
1326 case IB_QP_CUR_STATE:
1327 return 0;
1328 case IB_QP_EN_SQD_ASYNC_NOTIFY:
1329 return 0;
1330 case IB_QP_ACCESS_FLAGS:
1331 return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
1332 MLX5_QP_OPTPAR_RAE;
1333 case IB_QP_PKEY_INDEX:
1334 return MLX5_QP_OPTPAR_PKEY_INDEX;
1335 case IB_QP_PORT:
1336 return MLX5_QP_OPTPAR_PRI_PORT;
1337 case IB_QP_QKEY:
1338 return MLX5_QP_OPTPAR_Q_KEY;
1339 case IB_QP_AV:
1340 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
1341 MLX5_QP_OPTPAR_PRI_PORT;
1342 case IB_QP_PATH_MTU:
1343 return 0;
1344 case IB_QP_TIMEOUT:
1345 return MLX5_QP_OPTPAR_ACK_TIMEOUT;
1346 case IB_QP_RETRY_CNT:
1347 return MLX5_QP_OPTPAR_RETRY_COUNT;
1348 case IB_QP_RNR_RETRY:
1349 return MLX5_QP_OPTPAR_RNR_RETRY;
1350 case IB_QP_RQ_PSN:
1351 return 0;
1352 case IB_QP_MAX_QP_RD_ATOMIC:
1353 return MLX5_QP_OPTPAR_SRA_MAX;
1354 case IB_QP_ALT_PATH:
1355 return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
1356 case IB_QP_MIN_RNR_TIMER:
1357 return MLX5_QP_OPTPAR_RNR_TIMEOUT;
1358 case IB_QP_SQ_PSN:
1359 return 0;
1360 case IB_QP_MAX_DEST_RD_ATOMIC:
1361 return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
1362 MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
1363 case IB_QP_PATH_MIG_STATE:
1364 return MLX5_QP_OPTPAR_PM_STATE;
1365 case IB_QP_CAP:
1366 return 0;
1367 case IB_QP_DEST_QPN:
1368 return 0;
1369 }
1370 return 0;
1371}
1372
1373static int ib_mask_to_mlx5_opt(int ib_mask)
1374{
1375 int result = 0;
1376 int i;
1377
1378 for (i = 0; i < 8 * sizeof(int); i++) {
1379 if ((1 << i) & ib_mask)
1380 result |= ib_nr_to_mlx5_nr(1 << i);
1381 }
1382
1383 return result;
1384}
1385
1386static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1387 const struct ib_qp_attr *attr, int attr_mask,
1388 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1389{
1390 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1391 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1392 struct mlx5_ib_cq *send_cq, *recv_cq;
1393 struct mlx5_qp_context *context;
1394 struct mlx5_modify_qp_mbox_in *in;
1395 struct mlx5_ib_pd *pd;
1396 enum mlx5_qp_state mlx5_cur, mlx5_new;
1397 enum mlx5_qp_optpar optpar;
1398 int sqd_event;
1399 int mlx5_st;
1400 int err;
1401
1402 in = kzalloc(sizeof(*in), GFP_KERNEL);
1403 if (!in)
1404 return -ENOMEM;
1405
1406 context = &in->ctx;
1407 err = to_mlx5_st(ibqp->qp_type);
1408 if (err < 0)
1409 goto out;
1410
1411 context->flags = cpu_to_be32(err << 16);
1412
1413 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
1414 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1415 } else {
1416 switch (attr->path_mig_state) {
1417 case IB_MIG_MIGRATED:
1418 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1419 break;
1420 case IB_MIG_REARM:
1421 context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
1422 break;
1423 case IB_MIG_ARMED:
1424 context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
1425 break;
1426 }
1427 }
1428
1429 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1430 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
1431 } else if (ibqp->qp_type == IB_QPT_UD ||
1432 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
1433 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1434 } else if (attr_mask & IB_QP_PATH_MTU) {
1435 if (attr->path_mtu < IB_MTU_256 ||
1436 attr->path_mtu > IB_MTU_4096) {
1437 mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
1438 err = -EINVAL;
1439 goto out;
1440 }
1441 context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
1442 }
1443
1444 if (attr_mask & IB_QP_DEST_QPN)
1445 context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
1446
1447 if (attr_mask & IB_QP_PKEY_INDEX)
1448 context->pri_path.pkey_index = attr->pkey_index;
1449
1450 /* todo implement counter_index functionality */
1451
1452 if (is_sqp(ibqp->qp_type))
1453 context->pri_path.port = qp->port;
1454
1455 if (attr_mask & IB_QP_PORT)
1456 context->pri_path.port = attr->port_num;
1457
1458 if (attr_mask & IB_QP_AV) {
1459 err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
1460 attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1461 attr_mask, 0, attr);
1462 if (err)
1463 goto out;
1464 }
1465
1466 if (attr_mask & IB_QP_TIMEOUT)
1467 context->pri_path.ackto_lt |= attr->timeout << 3;
1468
1469 if (attr_mask & IB_QP_ALT_PATH) {
1470 err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1471 attr->alt_port_num, attr_mask, 0, attr);
1472 if (err)
1473 goto out;
1474 }
1475
1476 pd = get_pd(qp);
1477 get_cqs(qp, &send_cq, &recv_cq);
1478
1479 context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
1480 context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
1481 context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
1482 context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
1483
1484 if (attr_mask & IB_QP_RNR_RETRY)
1485 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1486
1487 if (attr_mask & IB_QP_RETRY_CNT)
1488 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1489
1490 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1491 if (attr->max_rd_atomic)
1492 context->params1 |=
1493 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1494 }
1495
1496 if (attr_mask & IB_QP_SQ_PSN)
1497 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1498
1499 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1500 if (attr->max_dest_rd_atomic)
1501 context->params2 |=
1502 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1503 }
1504
1505 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
1506 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
1507
1508 if (attr_mask & IB_QP_MIN_RNR_TIMER)
1509 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1510
1511 if (attr_mask & IB_QP_RQ_PSN)
1512 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1513
1514 if (attr_mask & IB_QP_QKEY)
1515 context->qkey = cpu_to_be32(attr->qkey);
1516
1517 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1518 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1519
1520 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
1521 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1522 sqd_event = 1;
1523 else
1524 sqd_event = 0;
1525
1526 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1527 context->sq_crq_size |= cpu_to_be16(1 << 4);
1528
1529
1530 mlx5_cur = to_mlx5_state(cur_state);
1531 mlx5_new = to_mlx5_state(new_state);
1532 mlx5_st = to_mlx5_st(ibqp->qp_type);
1533 if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
1534 goto out;
1535
1536 optpar = ib_mask_to_mlx5_opt(attr_mask);
1537 optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
1538 in->optparam = cpu_to_be32(optpar);
1539 err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
1540 to_mlx5_state(new_state), in, sqd_event,
1541 &qp->mqp);
1542 if (err)
1543 goto out;
1544
1545 qp->state = new_state;
1546
1547 if (attr_mask & IB_QP_ACCESS_FLAGS)
1548 qp->atomic_rd_en = attr->qp_access_flags;
1549 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1550 qp->resp_depth = attr->max_dest_rd_atomic;
1551 if (attr_mask & IB_QP_PORT)
1552 qp->port = attr->port_num;
1553 if (attr_mask & IB_QP_ALT_PATH)
1554 qp->alt_port = attr->alt_port_num;
1555
1556 /*
1557 * If we moved a kernel QP to RESET, clean up all old CQ
1558 * entries and reinitialize the QP.
1559 */
1560 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1561 mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1562 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1563 if (send_cq != recv_cq)
1564 mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1565
1566 qp->rq.head = 0;
1567 qp->rq.tail = 0;
1568 qp->sq.head = 0;
1569 qp->sq.tail = 0;
1570 qp->sq.cur_post = 0;
1571 qp->sq.last_poll = 0;
1572 qp->db.db[MLX5_RCV_DBR] = 0;
1573 qp->db.db[MLX5_SND_DBR] = 0;
1574 }
1575
1576out:
1577 kfree(in);
1578 return err;
1579}
1580
1581int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1582 int attr_mask, struct ib_udata *udata)
1583{
1584 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1585 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1586 enum ib_qp_state cur_state, new_state;
1587 int err = -EINVAL;
1588 int port;
1589
1590 mutex_lock(&qp->mutex);
1591
1592 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1593 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1594
1595 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
1596 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
1597 goto out;
1598
1599 if ((attr_mask & IB_QP_PORT) &&
1600 (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
1601 goto out;
1602
1603 if (attr_mask & IB_QP_PKEY_INDEX) {
1604 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1605 if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
1606 goto out;
1607 }
1608
1609 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1610 attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
1611 goto out;
1612
1613 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1614 attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
1615 goto out;
1616
1617 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1618 err = 0;
1619 goto out;
1620 }
1621
1622 err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1623
1624out:
1625 mutex_unlock(&qp->mutex);
1626 return err;
1627}
1628
1629static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1630{
1631 struct mlx5_ib_cq *cq;
1632 unsigned cur;
1633
1634 cur = wq->head - wq->tail;
1635 if (likely(cur + nreq < wq->max_post))
1636 return 0;
1637
1638 cq = to_mcq(ib_cq);
1639 spin_lock(&cq->lock);
1640 cur = wq->head - wq->tail;
1641 spin_unlock(&cq->lock);
1642
1643 return cur + nreq >= wq->max_post;
1644}
1645
1646static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
1647 u64 remote_addr, u32 rkey)
1648{
1649 rseg->raddr = cpu_to_be64(remote_addr);
1650 rseg->rkey = cpu_to_be32(rkey);
1651 rseg->reserved = 0;
1652}
1653
1654static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
1655{
1656 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1657 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1658 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1659 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
1660 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1661 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1662 } else {
1663 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1664 aseg->compare = 0;
1665 }
1666}
1667
1668static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
1669 struct ib_send_wr *wr)
1670{
1671 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1672 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
1673 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1674 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1675}
1676
1677static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
1678 struct ib_send_wr *wr)
1679{
1680 memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
1681 dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
1682 dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1683}
1684
1685static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
1686{
1687 dseg->byte_count = cpu_to_be32(sg->length);
1688 dseg->lkey = cpu_to_be32(sg->lkey);
1689 dseg->addr = cpu_to_be64(sg->addr);
1690}
1691
1692static __be16 get_klm_octo(int npages)
1693{
1694 return cpu_to_be16(ALIGN(npages, 8) / 2);
1695}
1696
1697static __be64 frwr_mkey_mask(void)
1698{
1699 u64 result;
1700
1701 result = MLX5_MKEY_MASK_LEN |
1702 MLX5_MKEY_MASK_PAGE_SIZE |
1703 MLX5_MKEY_MASK_START_ADDR |
1704 MLX5_MKEY_MASK_EN_RINVAL |
1705 MLX5_MKEY_MASK_KEY |
1706 MLX5_MKEY_MASK_LR |
1707 MLX5_MKEY_MASK_LW |
1708 MLX5_MKEY_MASK_RR |
1709 MLX5_MKEY_MASK_RW |
1710 MLX5_MKEY_MASK_A |
1711 MLX5_MKEY_MASK_SMALL_FENCE |
1712 MLX5_MKEY_MASK_FREE;
1713
1714 return cpu_to_be64(result);
1715}
1716
1717static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1718 struct ib_send_wr *wr, int li)
1719{
1720 memset(umr, 0, sizeof(*umr));
1721
1722 if (li) {
1723 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
1724 umr->flags = 1 << 7;
1725 return;
1726 }
1727
1728 umr->flags = (1 << 5); /* fail if not free */
1729 umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
1730 umr->mkey_mask = frwr_mkey_mask();
1731}
1732
1733static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1734 struct ib_send_wr *wr)
1735{
1736 struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
1737 u64 mask;
1738
1739 memset(umr, 0, sizeof(*umr));
1740
1741 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
1742 umr->flags = 1 << 5; /* fail if not free */
1743 umr->klm_octowords = get_klm_octo(umrwr->npages);
1744 mask = MLX5_MKEY_MASK_LEN |
1745 MLX5_MKEY_MASK_PAGE_SIZE |
1746 MLX5_MKEY_MASK_START_ADDR |
1747 MLX5_MKEY_MASK_PD |
1748 MLX5_MKEY_MASK_LR |
1749 MLX5_MKEY_MASK_LW |
1750 MLX5_MKEY_MASK_RR |
1751 MLX5_MKEY_MASK_RW |
1752 MLX5_MKEY_MASK_A |
1753 MLX5_MKEY_MASK_FREE;
1754 umr->mkey_mask = cpu_to_be64(mask);
1755 } else {
1756 umr->flags = 2 << 5; /* fail if free */
1757 mask = MLX5_MKEY_MASK_FREE;
1758 umr->mkey_mask = cpu_to_be64(mask);
1759 }
1760
1761 if (!wr->num_sge)
1762 umr->flags |= (1 << 7); /* inline */
1763}
1764
1765static u8 get_umr_flags(int acc)
1766{
1767 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
1768 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
1769 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
1770 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
1771 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1772}
1773
1774static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
1775 int li, int *writ)
1776{
1777 memset(seg, 0, sizeof(*seg));
1778 if (li) {
1779 seg->status = 1 << 6;
1780 return;
1781 }
1782
1783 seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
1784 *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
1785 seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
1786 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
1787 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1788 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1789 seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
1790 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1791}
1792
1793static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
1794{
1795 memset(seg, 0, sizeof(*seg));
1796 if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
1797 seg->status = 1 << 6;
1798 return;
1799 }
1800
1801 seg->flags = convert_access(wr->wr.fast_reg.access_flags);
1802 seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
1803 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1804 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1805 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1806 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1807}
1808
1809static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
1810 struct ib_send_wr *wr,
1811 struct mlx5_core_dev *mdev,
1812 struct mlx5_ib_pd *pd,
1813 int writ)
1814{
1815 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1816 u64 *page_list = wr->wr.fast_reg.page_list->page_list;
1817 u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
1818 int i;
1819
1820 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
1821 mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
1822 dseg->addr = cpu_to_be64(mfrpl->map);
1823 dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
1824 dseg->lkey = cpu_to_be32(pd->pa_lkey);
1825}
1826
1827static __be32 send_ieth(struct ib_send_wr *wr)
1828{
1829 switch (wr->opcode) {
1830 case IB_WR_SEND_WITH_IMM:
1831 case IB_WR_RDMA_WRITE_WITH_IMM:
1832 return wr->ex.imm_data;
1833
1834 case IB_WR_SEND_WITH_INV:
1835 return cpu_to_be32(wr->ex.invalidate_rkey);
1836
1837 default:
1838 return 0;
1839 }
1840}
1841
1842static u8 calc_sig(void *wqe, int size)
1843{
1844 u8 *p = wqe;
1845 u8 res = 0;
1846 int i;
1847
1848 for (i = 0; i < size; i++)
1849 res ^= p[i];
1850
1851 return ~res;
1852}
1853
1854static u8 wq_sig(void *wqe)
1855{
1856 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
1857}
1858
1859static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
1860 void *wqe, int *sz)
1861{
1862 struct mlx5_wqe_inline_seg *seg;
1863 void *qend = qp->sq.qend;
1864 void *addr;
1865 int inl = 0;
1866 int copy;
1867 int len;
1868 int i;
1869
1870 seg = wqe;
1871 wqe += sizeof(*seg);
1872 for (i = 0; i < wr->num_sge; i++) {
1873 addr = (void *)(unsigned long)(wr->sg_list[i].addr);
1874 len = wr->sg_list[i].length;
1875 inl += len;
1876
1877 if (unlikely(inl > qp->max_inline_data))
1878 return -ENOMEM;
1879
1880 if (unlikely(wqe + len > qend)) {
1881 copy = qend - wqe;
1882 memcpy(wqe, addr, copy);
1883 addr += copy;
1884 len -= copy;
1885 wqe = mlx5_get_send_wqe(qp, 0);
1886 }
1887 memcpy(wqe, addr, len);
1888 wqe += len;
1889 }
1890
1891 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
1892
1893 *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
1894
1895 return 0;
1896}
1897
1898static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
1899 struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
1900{
1901 int writ = 0;
1902 int li;
1903
1904 li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
1905 if (unlikely(wr->send_flags & IB_SEND_INLINE))
1906 return -EINVAL;
1907
1908 set_frwr_umr_segment(*seg, wr, li);
1909 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1910 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1911 if (unlikely((*seg == qp->sq.qend)))
1912 *seg = mlx5_get_send_wqe(qp, 0);
1913 set_mkey_segment(*seg, wr, li, &writ);
1914 *seg += sizeof(struct mlx5_mkey_seg);
1915 *size += sizeof(struct mlx5_mkey_seg) / 16;
1916 if (unlikely((*seg == qp->sq.qend)))
1917 *seg = mlx5_get_send_wqe(qp, 0);
1918 if (!li) {
1919 set_frwr_pages(*seg, wr, mdev, pd, writ);
1920 *seg += sizeof(struct mlx5_wqe_data_seg);
1921 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
1922 }
1923 return 0;
1924}
1925
1926static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
1927{
1928 __be32 *p = NULL;
1929 int tidx = idx;
1930 int i, j;
1931
1932 pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
1933 for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
1934 if ((i & 0xf) == 0) {
1935 void *buf = mlx5_get_send_wqe(qp, tidx);
1936 tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
1937 p = buf;
1938 j = 0;
1939 }
1940 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
1941 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
1942 be32_to_cpu(p[j + 3]));
1943 }
1944}
1945
1946static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
1947 unsigned bytecnt, struct mlx5_ib_qp *qp)
1948{
1949 while (bytecnt > 0) {
1950 __iowrite64_copy(dst++, src++, 8);
1951 __iowrite64_copy(dst++, src++, 8);
1952 __iowrite64_copy(dst++, src++, 8);
1953 __iowrite64_copy(dst++, src++, 8);
1954 __iowrite64_copy(dst++, src++, 8);
1955 __iowrite64_copy(dst++, src++, 8);
1956 __iowrite64_copy(dst++, src++, 8);
1957 __iowrite64_copy(dst++, src++, 8);
1958 bytecnt -= 64;
1959 if (unlikely(src == qp->sq.qend))
1960 src = mlx5_get_send_wqe(qp, 0);
1961 }
1962}
1963
1964static u8 get_fence(u8 fence, struct ib_send_wr *wr)
1965{
1966 if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
1967 wr->send_flags & IB_SEND_FENCE))
1968 return MLX5_FENCE_MODE_STRONG_ORDERING;
1969
1970 if (unlikely(fence)) {
1971 if (wr->send_flags & IB_SEND_FENCE)
1972 return MLX5_FENCE_MODE_SMALL_AND_FENCE;
1973 else
1974 return fence;
1975
1976 } else {
1977 return 0;
1978 }
1979}
1980
1981int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1982 struct ib_send_wr **bad_wr)
1983{
1984 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
1985 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1986 struct mlx5_core_dev *mdev = &dev->mdev;
1987 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1988 struct mlx5_wqe_data_seg *dpseg;
1989 struct mlx5_wqe_xrc_seg *xrc;
1990 struct mlx5_bf *bf = qp->bf;
1991 int uninitialized_var(size);
1992 void *qend = qp->sq.qend;
1993 unsigned long flags;
1994 u32 mlx5_opcode;
1995 unsigned idx;
1996 int err = 0;
1997 int inl = 0;
1998 int num_sge;
1999 void *seg;
2000 int nreq;
2001 int i;
2002 u8 next_fence = 0;
2003 u8 opmod = 0;
2004 u8 fence;
2005
2006 spin_lock_irqsave(&qp->sq.lock, flags);
2007
2008 for (nreq = 0; wr; nreq++, wr = wr->next) {
2009 if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
2010 mlx5_ib_warn(dev, "\n");
2011 err = -EINVAL;
2012 *bad_wr = wr;
2013 goto out;
2014 }
2015
2016 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
2017 mlx5_ib_warn(dev, "\n");
2018 err = -ENOMEM;
2019 *bad_wr = wr;
2020 goto out;
2021 }
2022
2023 fence = qp->fm_cache;
2024 num_sge = wr->num_sge;
2025 if (unlikely(num_sge > qp->sq.max_gs)) {
2026 mlx5_ib_warn(dev, "\n");
2027 err = -ENOMEM;
2028 *bad_wr = wr;
2029 goto out;
2030 }
2031
2032 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
2033 seg = mlx5_get_send_wqe(qp, idx);
2034 ctrl = seg;
2035 *(uint32_t *)(seg + 8) = 0;
2036 ctrl->imm = send_ieth(wr);
2037 ctrl->fm_ce_se = qp->sq_signal_bits |
2038 (wr->send_flags & IB_SEND_SIGNALED ?
2039 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
2040 (wr->send_flags & IB_SEND_SOLICITED ?
2041 MLX5_WQE_CTRL_SOLICITED : 0);
2042
2043 seg += sizeof(*ctrl);
2044 size = sizeof(*ctrl) / 16;
2045
2046 switch (ibqp->qp_type) {
2047 case IB_QPT_XRC_INI:
2048 xrc = seg;
2049 xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2050 seg += sizeof(*xrc);
2051 size += sizeof(*xrc) / 16;
2052 /* fall through */
2053 case IB_QPT_RC:
2054 switch (wr->opcode) {
2055 case IB_WR_RDMA_READ:
2056 case IB_WR_RDMA_WRITE:
2057 case IB_WR_RDMA_WRITE_WITH_IMM:
2058 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2059 wr->wr.rdma.rkey);
2060 seg += sizeof(struct mlx5_wqe_raddr_seg);
2061 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2062 break;
2063
2064 case IB_WR_ATOMIC_CMP_AND_SWP:
2065 case IB_WR_ATOMIC_FETCH_AND_ADD:
2066 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2067 wr->wr.atomic.rkey);
2068 seg += sizeof(struct mlx5_wqe_raddr_seg);
2069
2070 set_atomic_seg(seg, wr);
2071 seg += sizeof(struct mlx5_wqe_atomic_seg);
2072
2073 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2074 sizeof(struct mlx5_wqe_atomic_seg)) / 16;
2075 break;
2076
2077 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2078 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2079 wr->wr.atomic.rkey);
2080 seg += sizeof(struct mlx5_wqe_raddr_seg);
2081
2082 set_masked_atomic_seg(seg, wr);
2083 seg += sizeof(struct mlx5_wqe_masked_atomic_seg);
2084
2085 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2086 sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
2087 break;
2088
2089 case IB_WR_LOCAL_INV:
2090 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2091 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
2092 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2093 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2094 if (err) {
2095 mlx5_ib_warn(dev, "\n");
2096 *bad_wr = wr;
2097 goto out;
2098 }
2099 num_sge = 0;
2100 break;
2101
2102 case IB_WR_FAST_REG_MR:
2103 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2104 qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
2105 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2106 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2107 if (err) {
2108 mlx5_ib_warn(dev, "\n");
2109 *bad_wr = wr;
2110 goto out;
2111 }
2112 num_sge = 0;
2113 break;
2114
2115 default:
2116 break;
2117 }
2118 break;
2119
2120 case IB_QPT_UC:
2121 switch (wr->opcode) {
2122 case IB_WR_RDMA_WRITE:
2123 case IB_WR_RDMA_WRITE_WITH_IMM:
2124 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2125 wr->wr.rdma.rkey);
2126 seg += sizeof(struct mlx5_wqe_raddr_seg);
2127 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2128 break;
2129
2130 default:
2131 break;
2132 }
2133 break;
2134
2135 case IB_QPT_UD:
2136 case IB_QPT_SMI:
2137 case IB_QPT_GSI:
2138 set_datagram_seg(seg, wr);
2139 seg += sizeof(struct mlx5_wqe_datagram_seg);
2140 size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
2141 if (unlikely((seg == qend)))
2142 seg = mlx5_get_send_wqe(qp, 0);
2143 break;
2144
2145 case MLX5_IB_QPT_REG_UMR:
2146 if (wr->opcode != MLX5_IB_WR_UMR) {
2147 err = -EINVAL;
2148 mlx5_ib_warn(dev, "bad opcode\n");
2149 goto out;
2150 }
2151 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
2152 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2153 set_reg_umr_segment(seg, wr);
2154 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2155 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2156 if (unlikely((seg == qend)))
2157 seg = mlx5_get_send_wqe(qp, 0);
2158 set_reg_mkey_segment(seg, wr);
2159 seg += sizeof(struct mlx5_mkey_seg);
2160 size += sizeof(struct mlx5_mkey_seg) / 16;
2161 if (unlikely((seg == qend)))
2162 seg = mlx5_get_send_wqe(qp, 0);
2163 break;
2164
2165 default:
2166 break;
2167 }
2168
2169 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
2170 int uninitialized_var(sz);
2171
2172 err = set_data_inl_seg(qp, wr, seg, &sz);
2173 if (unlikely(err)) {
2174 mlx5_ib_warn(dev, "\n");
2175 *bad_wr = wr;
2176 goto out;
2177 }
2178 inl = 1;
2179 size += sz;
2180 } else {
2181 dpseg = seg;
2182 for (i = 0; i < num_sge; i++) {
2183 if (unlikely(dpseg == qend)) {
2184 seg = mlx5_get_send_wqe(qp, 0);
2185 dpseg = seg;
2186 }
2187 if (likely(wr->sg_list[i].length)) {
2188 set_data_ptr_seg(dpseg, wr->sg_list + i);
2189 size += sizeof(struct mlx5_wqe_data_seg) / 16;
2190 dpseg++;
2191 }
2192 }
2193 }
2194
2195 mlx5_opcode = mlx5_ib_opcode[wr->opcode];
2196 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2197 mlx5_opcode |
2198 ((u32)opmod << 24));
2199 ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
2200 ctrl->fm_ce_se |= get_fence(fence, wr);
2201 qp->fm_cache = next_fence;
2202 if (unlikely(qp->wq_sig))
2203 ctrl->signature = wq_sig(ctrl);
2204
2205 qp->sq.wrid[idx] = wr->wr_id;
2206 qp->sq.w_list[idx].opcode = mlx5_opcode;
2207 qp->sq.wqe_head[idx] = qp->sq.head + nreq;
2208 qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
2209 qp->sq.w_list[idx].next = qp->sq.cur_post;
2210
2211 if (0)
2212 dump_wqe(qp, idx, size);
2213 }
2214
2215out:
2216 if (likely(nreq)) {
2217 qp->sq.head += nreq;
2218
2219 /* Make sure that descriptors are written before
2220 * updating doorbell record and ringing the doorbell
2221 */
2222 wmb();
2223
2224 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
2225
2226 if (bf->need_lock)
2227 spin_lock(&bf->lock);
2228
2229 /* TBD enable WC */
2230 if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
2231 mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
2232 /* wc_wmb(); */
2233 } else {
2234 mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
2235 MLX5_GET_DOORBELL_LOCK(&bf->lock32));
2236 /* Make sure doorbells don't leak out of SQ spinlock
2237 * and reach the HCA out of order.
2238 */
2239 mmiowb();
2240 }
2241 bf->offset ^= bf->buf_size;
2242 if (bf->need_lock)
2243 spin_unlock(&bf->lock);
2244 }
2245
2246 spin_unlock_irqrestore(&qp->sq.lock, flags);
2247
2248 return err;
2249}
2250
2251static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
2252{
2253 sig->signature = calc_sig(sig, size);
2254}
2255
2256int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2257 struct ib_recv_wr **bad_wr)
2258{
2259 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2260 struct mlx5_wqe_data_seg *scat;
2261 struct mlx5_rwqe_sig *sig;
2262 unsigned long flags;
2263 int err = 0;
2264 int nreq;
2265 int ind;
2266 int i;
2267
2268 spin_lock_irqsave(&qp->rq.lock, flags);
2269
2270 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2271
2272 for (nreq = 0; wr; nreq++, wr = wr->next) {
2273 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2274 err = -ENOMEM;
2275 *bad_wr = wr;
2276 goto out;
2277 }
2278
2279 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2280 err = -EINVAL;
2281 *bad_wr = wr;
2282 goto out;
2283 }
2284
2285 scat = get_recv_wqe(qp, ind);
2286 if (qp->wq_sig)
2287 scat++;
2288
2289 for (i = 0; i < wr->num_sge; i++)
2290 set_data_ptr_seg(scat + i, wr->sg_list + i);
2291
2292 if (i < qp->rq.max_gs) {
2293 scat[i].byte_count = 0;
2294 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
2295 scat[i].addr = 0;
2296 }
2297
2298 if (qp->wq_sig) {
2299 sig = (struct mlx5_rwqe_sig *)scat;
2300 set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
2301 }
2302
2303 qp->rq.wrid[ind] = wr->wr_id;
2304
2305 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2306 }
2307
2308out:
2309 if (likely(nreq)) {
2310 qp->rq.head += nreq;
2311
2312 /* Make sure that descriptors are written before
2313 * doorbell record.
2314 */
2315 wmb();
2316
2317 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2318 }
2319
2320 spin_unlock_irqrestore(&qp->rq.lock, flags);
2321
2322 return err;
2323}
2324
2325static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
2326{
2327 switch (mlx5_state) {
2328 case MLX5_QP_STATE_RST: return IB_QPS_RESET;
2329 case MLX5_QP_STATE_INIT: return IB_QPS_INIT;
2330 case MLX5_QP_STATE_RTR: return IB_QPS_RTR;
2331 case MLX5_QP_STATE_RTS: return IB_QPS_RTS;
2332 case MLX5_QP_STATE_SQ_DRAINING:
2333 case MLX5_QP_STATE_SQD: return IB_QPS_SQD;
2334 case MLX5_QP_STATE_SQER: return IB_QPS_SQE;
2335 case MLX5_QP_STATE_ERR: return IB_QPS_ERR;
2336 default: return -1;
2337 }
2338}
2339
2340static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
2341{
2342 switch (mlx5_mig_state) {
2343 case MLX5_QP_PM_ARMED: return IB_MIG_ARMED;
2344 case MLX5_QP_PM_REARM: return IB_MIG_REARM;
2345 case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
2346 default: return -1;
2347 }
2348}
2349
2350static int to_ib_qp_access_flags(int mlx5_flags)
2351{
2352 int ib_flags = 0;
2353
2354 if (mlx5_flags & MLX5_QP_BIT_RRE)
2355 ib_flags |= IB_ACCESS_REMOTE_READ;
2356 if (mlx5_flags & MLX5_QP_BIT_RWE)
2357 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2358 if (mlx5_flags & MLX5_QP_BIT_RAE)
2359 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2360
2361 return ib_flags;
2362}
2363
2364static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2365 struct mlx5_qp_path *path)
2366{
2367 struct mlx5_core_dev *dev = &ibdev->mdev;
2368
2369 memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
2370 ib_ah_attr->port_num = path->port;
2371
2372 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
2373 return;
2374
2375 ib_ah_attr->sl = path->sl & 0xf;
2376
2377 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
2378 ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
2379 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
2380 ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
2381 if (ib_ah_attr->ah_flags) {
2382 ib_ah_attr->grh.sgid_index = path->mgid_index;
2383 ib_ah_attr->grh.hop_limit = path->hop_limit;
2384 ib_ah_attr->grh.traffic_class =
2385 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2386 ib_ah_attr->grh.flow_label =
2387 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2388 memcpy(ib_ah_attr->grh.dgid.raw,
2389 path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
2390 }
2391}
2392
2393int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2394 struct ib_qp_init_attr *qp_init_attr)
2395{
2396 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2397 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2398 struct mlx5_query_qp_mbox_out *outb;
2399 struct mlx5_qp_context *context;
2400 int mlx5_state;
2401 int err = 0;
2402
2403 mutex_lock(&qp->mutex);
2404 outb = kzalloc(sizeof(*outb), GFP_KERNEL);
2405 if (!outb) {
2406 err = -ENOMEM;
2407 goto out;
2408 }
2409 context = &outb->ctx;
2410 err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
2411 if (err)
2412 goto out_free;
2413
2414 mlx5_state = be32_to_cpu(context->flags) >> 28;
2415
2416 qp->state = to_ib_qp_state(mlx5_state);
2417 qp_attr->qp_state = qp->state;
2418 qp_attr->path_mtu = context->mtu_msgmax >> 5;
2419 qp_attr->path_mig_state =
2420 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
2421 qp_attr->qkey = be32_to_cpu(context->qkey);
2422 qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
2423 qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
2424 qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
2425 qp_attr->qp_access_flags =
2426 to_ib_qp_access_flags(be32_to_cpu(context->params2));
2427
2428 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2429 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
2430 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
2431 qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
2432 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
2433 }
2434
2435 qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
2436 qp_attr->port_num = context->pri_path.port;
2437
2438 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
2439 qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
2440
2441 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
2442
2443 qp_attr->max_dest_rd_atomic =
2444 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
2445 qp_attr->min_rnr_timer =
2446 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
2447 qp_attr->timeout = context->pri_path.ackto_lt >> 3;
2448 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
2449 qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
2450 qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
2451 qp_attr->cur_qp_state = qp_attr->qp_state;
2452 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
2453 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
2454
2455 if (!ibqp->uobject) {
2456 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
2457 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2458 } else {
2459 qp_attr->cap.max_send_wr = 0;
2460 qp_attr->cap.max_send_sge = 0;
2461 }
2462
2463 /* We don't support inline sends for kernel QPs (yet), and we
2464 * don't know what userspace's value should be.
2465 */
2466 qp_attr->cap.max_inline_data = 0;
2467
2468 qp_init_attr->cap = qp_attr->cap;
2469
2470 qp_init_attr->create_flags = 0;
2471 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2472 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2473
2474 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
2475 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2476
2477out_free:
2478 kfree(outb);
2479
2480out:
2481 mutex_unlock(&qp->mutex);
2482 return err;
2483}
2484
2485struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
2486 struct ib_ucontext *context,
2487 struct ib_udata *udata)
2488{
2489 struct mlx5_ib_dev *dev = to_mdev(ibdev);
2490 struct mlx5_ib_xrcd *xrcd;
2491 int err;
2492
2493 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
2494 return ERR_PTR(-ENOSYS);
2495
2496 xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
2497 if (!xrcd)
2498 return ERR_PTR(-ENOMEM);
2499
2500 err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
2501 if (err) {
2502 kfree(xrcd);
2503 return ERR_PTR(-ENOMEM);
2504 }
2505
2506 return &xrcd->ibxrcd;
2507}
2508
2509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
2510{
2511 struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
2512 u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
2513 int err;
2514
2515 err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
2516 if (err) {
2517 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
2518 return err;
2519 }
2520
2521 kfree(xrcd);
2522
2523 return 0;
2524}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644
index 000000000000..84d297afd6a9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -0,0 +1,473 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/mlx5/qp.h>
35#include <linux/mlx5/srq.h>
36#include <linux/slab.h>
37#include <rdma/ib_umem.h>
38
39#include "mlx5_ib.h"
40#include "user.h"
41
42/* not supported currently */
43static int srq_signature;
44
45static void *get_wqe(struct mlx5_ib_srq *srq, int n)
46{
47 return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
48}
49
50static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
51{
52 struct ib_event event;
53 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
54
55 if (ibsrq->event_handler) {
56 event.device = ibsrq->device;
57 event.element.srq = ibsrq;
58 switch (type) {
59 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
60 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
61 break;
62 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
63 event.event = IB_EVENT_SRQ_ERR;
64 break;
65 default:
66 pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
67 type, srq->srqn);
68 return;
69 }
70
71 ibsrq->event_handler(&event, ibsrq->srq_context);
72 }
73}
74
75static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
76 struct mlx5_create_srq_mbox_in **in,
77 struct ib_udata *udata, int buf_size, int *inlen)
78{
79 struct mlx5_ib_dev *dev = to_mdev(pd->device);
80 struct mlx5_ib_create_srq ucmd;
81 int err;
82 int npages;
83 int page_shift;
84 int ncont;
85 u32 offset;
86
87 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
88 mlx5_ib_dbg(dev, "failed copy udata\n");
89 return -EFAULT;
90 }
91 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
92
93 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
94 0, 0);
95 if (IS_ERR(srq->umem)) {
96 mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
97 err = PTR_ERR(srq->umem);
98 return err;
99 }
100
101 mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
102 &page_shift, &ncont, NULL);
103 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
104 &offset);
105 if (err) {
106 mlx5_ib_warn(dev, "bad offset\n");
107 goto err_umem;
108 }
109
110 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
111 *in = mlx5_vzalloc(*inlen);
112 if (!(*in)) {
113 err = -ENOMEM;
114 goto err_umem;
115 }
116
117 mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
118
119 err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
120 ucmd.db_addr, &srq->db);
121 if (err) {
122 mlx5_ib_dbg(dev, "map doorbell failed\n");
123 goto err_in;
124 }
125
126 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
128
129 return 0;
130
131err_in:
132 mlx5_vfree(*in);
133
134err_umem:
135 ib_umem_release(srq->umem);
136
137 return err;
138}
139
140static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
141 struct mlx5_create_srq_mbox_in **in, int buf_size,
142 int *inlen)
143{
144 int err;
145 int i;
146 struct mlx5_wqe_srq_next_seg *next;
147 int page_shift;
148 int npages;
149
150 err = mlx5_db_alloc(&dev->mdev, &srq->db);
151 if (err) {
152 mlx5_ib_warn(dev, "alloc dbell rec failed\n");
153 return err;
154 }
155
156 *srq->db.db = 0;
157
158 if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
159 mlx5_ib_dbg(dev, "buf alloc failed\n");
160 err = -ENOMEM;
161 goto err_db;
162 }
163 page_shift = srq->buf.page_shift;
164
165 srq->head = 0;
166 srq->tail = srq->msrq.max - 1;
167 srq->wqe_ctr = 0;
168
169 for (i = 0; i < srq->msrq.max; i++) {
170 next = get_wqe(srq, i);
171 next->next_wqe_index =
172 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
173 }
174
175 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
176 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
177 buf_size, page_shift, srq->buf.npages, npages);
178 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
179 *in = mlx5_vzalloc(*inlen);
180 if (!*in) {
181 err = -ENOMEM;
182 goto err_buf;
183 }
184 mlx5_fill_page_array(&srq->buf, (*in)->pas);
185
186 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
187 if (!srq->wrid) {
188 mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
189 (unsigned long)(srq->msrq.max * sizeof(u64)));
190 err = -ENOMEM;
191 goto err_in;
192 }
193 srq->wq_sig = !!srq_signature;
194
195 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
196
197 return 0;
198
199err_in:
200 mlx5_vfree(*in);
201
202err_buf:
203 mlx5_buf_free(&dev->mdev, &srq->buf);
204
205err_db:
206 mlx5_db_free(&dev->mdev, &srq->db);
207 return err;
208}
209
210static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
211{
212 mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
213 ib_umem_release(srq->umem);
214}
215
216
217static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
218{
219 kfree(srq->wrid);
220 mlx5_buf_free(&dev->mdev, &srq->buf);
221 mlx5_db_free(&dev->mdev, &srq->db);
222}
223
224struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
225 struct ib_srq_init_attr *init_attr,
226 struct ib_udata *udata)
227{
228 struct mlx5_ib_dev *dev = to_mdev(pd->device);
229 struct mlx5_ib_srq *srq;
230 int desc_size;
231 int buf_size;
232 int err;
233 struct mlx5_create_srq_mbox_in *uninitialized_var(in);
234 int uninitialized_var(inlen);
235 int is_xrc;
236 u32 flgs, xrcdn;
237
238 /* Sanity check SRQ size before proceeding */
239 if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
240 mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
241 init_attr->attr.max_wr,
242 dev->mdev.caps.max_srq_wqes);
243 return ERR_PTR(-EINVAL);
244 }
245
246 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
247 if (!srq)
248 return ERR_PTR(-ENOMEM);
249
250 mutex_init(&srq->mutex);
251 spin_lock_init(&srq->lock);
252 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
253 srq->msrq.max_gs = init_attr->attr.max_sge;
254
255 desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
256 srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
257 desc_size = roundup_pow_of_two(desc_size);
258 desc_size = max_t(int, 32, desc_size);
259 srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
260 sizeof(struct mlx5_wqe_data_seg);
261 srq->msrq.wqe_shift = ilog2(desc_size);
262 buf_size = srq->msrq.max * desc_size;
263 mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
264 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
265 srq->msrq.max_avail_gather);
266
267 if (pd->uobject)
268 err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
269 else
270 err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
271
272 if (err) {
273 mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
274 pd->uobject ? "user" : "kernel", err);
275 goto err_srq;
276 }
277
278 is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
279 in->ctx.state_log_sz = ilog2(srq->msrq.max);
280 flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
281 xrcdn = 0;
282 if (is_xrc) {
283 xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
284 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
285 } else if (init_attr->srq_type == IB_SRQT_BASIC) {
286 xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
287 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
288 }
289
290 in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
291
292 in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
293 in->ctx.db_record = cpu_to_be64(srq->db.dma);
294 err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
295 mlx5_vfree(in);
296 if (err) {
297 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
298 goto err_srq;
299 }
300
301 mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
302
303 srq->msrq.event = mlx5_ib_srq_event;
304 srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
305
306 if (pd->uobject)
307 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
308 mlx5_ib_dbg(dev, "copy to user failed\n");
309 err = -EFAULT;
310 goto err_core;
311 }
312
313 init_attr->attr.max_wr = srq->msrq.max - 1;
314
315 return &srq->ibsrq;
316
317err_core:
318 mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
319 if (pd->uobject)
320 destroy_srq_user(pd, srq);
321 else
322 destroy_srq_kernel(dev, srq);
323
324err_srq:
325 kfree(srq);
326
327 return ERR_PTR(err);
328}
329
330int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
331 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
332{
333 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
334 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
335 int ret;
336
337 /* We don't support resizing SRQs yet */
338 if (attr_mask & IB_SRQ_MAX_WR)
339 return -EINVAL;
340
341 if (attr_mask & IB_SRQ_LIMIT) {
342 if (attr->srq_limit >= srq->msrq.max)
343 return -EINVAL;
344
345 mutex_lock(&srq->mutex);
346 ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
347 mutex_unlock(&srq->mutex);
348
349 if (ret)
350 return ret;
351 }
352
353 return 0;
354}
355
356int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
357{
358 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
359 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
360 int ret;
361 struct mlx5_query_srq_mbox_out *out;
362
363 out = kzalloc(sizeof(*out), GFP_KERNEL);
364 if (!out)
365 return -ENOMEM;
366
367 ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
368 if (ret)
369 goto out_box;
370
371 srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
372 srq_attr->max_wr = srq->msrq.max - 1;
373 srq_attr->max_sge = srq->msrq.max_gs;
374
375out_box:
376 kfree(out);
377 return ret;
378}
379
380int mlx5_ib_destroy_srq(struct ib_srq *srq)
381{
382 struct mlx5_ib_dev *dev = to_mdev(srq->device);
383 struct mlx5_ib_srq *msrq = to_msrq(srq);
384
385 mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
386
387 if (srq->uobject) {
388 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
389 ib_umem_release(msrq->umem);
390 } else {
391 kfree(msrq->wrid);
392 mlx5_buf_free(&dev->mdev, &msrq->buf);
393 mlx5_db_free(&dev->mdev, &msrq->db);
394 }
395
396 kfree(srq);
397 return 0;
398}
399
400void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
401{
402 struct mlx5_wqe_srq_next_seg *next;
403
404 /* always called with interrupts disabled. */
405 spin_lock(&srq->lock);
406
407 next = get_wqe(srq, srq->tail);
408 next->next_wqe_index = cpu_to_be16(wqe_index);
409 srq->tail = wqe_index;
410
411 spin_unlock(&srq->lock);
412}
413
414int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
415 struct ib_recv_wr **bad_wr)
416{
417 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
418 struct mlx5_wqe_srq_next_seg *next;
419 struct mlx5_wqe_data_seg *scat;
420 unsigned long flags;
421 int err = 0;
422 int nreq;
423 int i;
424
425 spin_lock_irqsave(&srq->lock, flags);
426
427 for (nreq = 0; wr; nreq++, wr = wr->next) {
428 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
429 err = -EINVAL;
430 *bad_wr = wr;
431 break;
432 }
433
434 if (unlikely(srq->head == srq->tail)) {
435 err = -ENOMEM;
436 *bad_wr = wr;
437 break;
438 }
439
440 srq->wrid[srq->head] = wr->wr_id;
441
442 next = get_wqe(srq, srq->head);
443 srq->head = be16_to_cpu(next->next_wqe_index);
444 scat = (struct mlx5_wqe_data_seg *)(next + 1);
445
446 for (i = 0; i < wr->num_sge; i++) {
447 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
448 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
449 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
450 }
451
452 if (i < srq->msrq.max_avail_gather) {
453 scat[i].byte_count = 0;
454 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
455 scat[i].addr = 0;
456 }
457 }
458
459 if (likely(nreq)) {
460 srq->wqe_ctr += nreq;
461
462 /* Make sure that descriptors are written before
463 * doorbell record.
464 */
465 wmb();
466
467 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
468 }
469
470 spin_unlock_irqrestore(&srq->lock, flags);
471
472 return err;
473}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644
index 000000000000..a886de3e593c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_USER_H
34#define MLX5_IB_USER_H
35
36#include <linux/types.h>
37
38enum {
39 MLX5_QP_FLAG_SIGNATURE = 1 << 0,
40 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
41};
42
43enum {
44 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
45};
46
47
48/* Increment this value if any changes that break userspace ABI
49 * compatibility are made.
50 */
51#define MLX5_IB_UVERBS_ABI_VERSION 1
52
53/* Make sure that all structs defined in this file remain laid out so
54 * that they pack the same way on 32-bit and 64-bit architectures (to
55 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
56 * In particular do not use pointer types -- pass pointers in __u64
57 * instead.
58 */
59
60struct mlx5_ib_alloc_ucontext_req {
61 __u32 total_num_uuars;
62 __u32 num_low_latency_uuars;
63};
64
65struct mlx5_ib_alloc_ucontext_resp {
66 __u32 qp_tab_size;
67 __u32 bf_reg_size;
68 __u32 tot_uuars;
69 __u32 cache_line_size;
70 __u16 max_sq_desc_sz;
71 __u16 max_rq_desc_sz;
72 __u32 max_send_wqebb;
73 __u32 max_recv_wr;
74 __u32 max_srq_recv_wr;
75 __u16 num_ports;
76 __u16 reserved;
77};
78
79struct mlx5_ib_alloc_pd_resp {
80 __u32 pdn;
81};
82
83struct mlx5_ib_create_cq {
84 __u64 buf_addr;
85 __u64 db_addr;
86 __u32 cqe_size;
87};
88
89struct mlx5_ib_create_cq_resp {
90 __u32 cqn;
91 __u32 reserved;
92};
93
94struct mlx5_ib_resize_cq {
95 __u64 buf_addr;
96};
97
98struct mlx5_ib_create_srq {
99 __u64 buf_addr;
100 __u64 db_addr;
101 __u32 flags;
102};
103
104struct mlx5_ib_create_srq_resp {
105 __u32 srqn;
106 __u32 reserved;
107};
108
109struct mlx5_ib_create_qp {
110 __u64 buf_addr;
111 __u64 db_addr;
112 __u32 sq_wqe_count;
113 __u32 rq_wqe_count;
114 __u32 rq_wqe_shift;
115 __u32 flags;
116};
117
118struct mlx5_ib_create_qp_resp {
119 __u32 uuar_index;
120};
121#endif /* MLX5_IB_USER_H */