aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/qp.c
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-05-08 21:00:38 -0400
committerRoland Dreier <rolandd@cisco.com>2007-05-08 21:00:38 -0400
commit225c7b1feef1b41170f7037a5b10a65cd8a42c54 (patch)
tree702a0a2cbba7f1c5b2949d236b4463d486204fdc /drivers/infiniband/hw/mlx4/qp.c
parent1bf66a30421ca772820f489d88c16d0c430d6a67 (diff)
IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters
Add an InfiniBand driver for Mellanox ConnectX adapters. Because these adapters can also be used as ethernet NICs and Fibre Channel HBAs, the driver is split into two modules: mlx4_core: Handles low-level things like device initialization and processing firmware commands. Also controls resource allocation so that the InfiniBand, ethernet and FC functions can share a device without stepping on each other. mlx4_ib: Handles InfiniBand-specific things; plugs into the InfiniBand midlayer. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1294
1 files changed, 1294 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
new file mode 100644
index 000000000000..5cd706908450
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -0,0 +1,1294 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_cache.h>
34#include <rdma/ib_pack.h>
35
36#include <linux/mlx4/qp.h>
37
38#include "mlx4_ib.h"
39#include "user.h"
40
41enum {
42 MLX4_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
48};
49
50enum {
51 /*
52 * Largest possible UD header: send with GRH and immediate data.
53 */
54 MLX4_IB_UD_HEADER_SIZE = 72
55};
56
57struct mlx4_ib_sqp {
58 struct mlx4_ib_qp qp;
59 int pkey_index;
60 u32 qkey;
61 u32 send_psn;
62 struct ib_ud_header ud_header;
63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
64};
65
66static const __be32 mlx4_ib_opcode[] = {
67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
69 [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
70 [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
71 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
72 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
73 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
74};
75
76static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
77{
78 return container_of(mqp, struct mlx4_ib_sqp, qp);
79}
80
81static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
82{
83 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
84 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
85}
86
87static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
88{
89 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
90 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
91}
92
93static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
94{
95 if (qp->buf.nbufs == 1)
96 return qp->buf.u.direct.buf + offset;
97 else
98 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
99 (offset & (PAGE_SIZE - 1));
100}
101
102static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
103{
104 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
105}
106
107static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
108{
109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
110}
111
112static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
113{
114 struct ib_event event;
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116
117 if (type == MLX4_EVENT_TYPE_PATH_MIG)
118 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
119
120 if (ibqp->event_handler) {
121 event.device = ibqp->device;
122 event.element.qp = ibqp;
123 switch (type) {
124 case MLX4_EVENT_TYPE_PATH_MIG:
125 event.event = IB_EVENT_PATH_MIG;
126 break;
127 case MLX4_EVENT_TYPE_COMM_EST:
128 event.event = IB_EVENT_COMM_EST;
129 break;
130 case MLX4_EVENT_TYPE_SQ_DRAINED:
131 event.event = IB_EVENT_SQ_DRAINED;
132 break;
133 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
134 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
135 break;
136 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
137 event.event = IB_EVENT_QP_FATAL;
138 break;
139 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
140 event.event = IB_EVENT_PATH_MIG_ERR;
141 break;
142 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
143 event.event = IB_EVENT_QP_REQ_ERR;
144 break;
145 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
146 event.event = IB_EVENT_QP_ACCESS_ERR;
147 break;
148 default:
149 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
150 "on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int send_wqe_overhead(enum ib_qp_type type)
159{
160 /*
161 * UD WQEs must have a datagram segment.
162 * RC and UC WQEs might have a remote address segment.
163 * MLX WQEs need two extra inline data segments (for the UD
164 * header and space for the ICRC).
165 */
166 switch (type) {
167 case IB_QPT_UD:
168 return sizeof (struct mlx4_wqe_ctrl_seg) +
169 sizeof (struct mlx4_wqe_datagram_seg);
170 case IB_QPT_UC:
171 return sizeof (struct mlx4_wqe_ctrl_seg) +
172 sizeof (struct mlx4_wqe_raddr_seg);
173 case IB_QPT_RC:
174 return sizeof (struct mlx4_wqe_ctrl_seg) +
175 sizeof (struct mlx4_wqe_atomic_seg) +
176 sizeof (struct mlx4_wqe_raddr_seg);
177 case IB_QPT_SMI:
178 case IB_QPT_GSI:
179 return sizeof (struct mlx4_wqe_ctrl_seg) +
180 ALIGN(MLX4_IB_UD_HEADER_SIZE +
181 sizeof (struct mlx4_wqe_inline_seg),
182 sizeof (struct mlx4_wqe_data_seg)) +
183 ALIGN(4 +
184 sizeof (struct mlx4_wqe_inline_seg),
185 sizeof (struct mlx4_wqe_data_seg));
186 default:
187 return sizeof (struct mlx4_wqe_ctrl_seg);
188 }
189}
190
191static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
192 enum ib_qp_type type, struct mlx4_ib_qp *qp)
193{
194 /* Sanity check QP size before proceeding */
195 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_wr > dev->dev->caps.max_wqes ||
197 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
198 cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
199 cap->max_inline_data + send_wqe_overhead(type) +
200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
201 return -EINVAL;
202
203 /*
204 * For MLX transport we need 2 extra S/G entries:
205 * one for the header and one for the checksum at the end
206 */
207 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
209 return -EINVAL;
210
211 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
212 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
213
214 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
215 sizeof (struct mlx4_wqe_data_seg)));
216 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
217
218 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
219 sizeof (struct mlx4_wqe_data_seg),
220 cap->max_inline_data +
221 sizeof (struct mlx4_wqe_inline_seg)) +
222 send_wqe_overhead(type)));
223 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
224 sizeof (struct mlx4_wqe_data_seg);
225
226 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
227 (qp->sq.max << qp->sq.wqe_shift);
228 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
229 qp->rq.offset = 0;
230 qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
231 } else {
232 qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
233 qp->sq.offset = 0;
234 }
235
236 cap->max_send_wr = qp->sq.max;
237 cap->max_recv_wr = qp->rq.max;
238 cap->max_send_sge = qp->sq.max_gs;
239 cap->max_recv_sge = qp->rq.max_gs;
240 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
241 sizeof (struct mlx4_wqe_inline_seg);
242
243 return 0;
244}
245
246static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
247 struct ib_qp_init_attr *init_attr,
248 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
249{
250 struct mlx4_wqe_ctrl_seg *ctrl;
251 int err;
252 int i;
253
254 mutex_init(&qp->mutex);
255 spin_lock_init(&qp->sq.lock);
256 spin_lock_init(&qp->rq.lock);
257
258 qp->state = IB_QPS_RESET;
259 qp->atomic_rd_en = 0;
260 qp->resp_depth = 0;
261
262 qp->rq.head = 0;
263 qp->rq.tail = 0;
264 qp->sq.head = 0;
265 qp->sq.tail = 0;
266
267 err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp);
268 if (err)
269 goto err;
270
271 if (pd->uobject) {
272 struct mlx4_ib_create_qp ucmd;
273
274 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
275 err = -EFAULT;
276 goto err;
277 }
278
279 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
280 qp->buf_size, 0);
281 if (IS_ERR(qp->umem)) {
282 err = PTR_ERR(qp->umem);
283 goto err;
284 }
285
286 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
287 ilog2(qp->umem->page_size), &qp->mtt);
288 if (err)
289 goto err_buf;
290
291 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
292 if (err)
293 goto err_mtt;
294
295 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
296 ucmd.db_addr, &qp->db);
297 if (err)
298 goto err_mtt;
299 } else {
300 err = mlx4_ib_db_alloc(dev, &qp->db, 0);
301 if (err)
302 goto err;
303
304 *qp->db.db = 0;
305
306 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
307 err = -ENOMEM;
308 goto err_db;
309 }
310
311 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
312 &qp->mtt);
313 if (err)
314 goto err_buf;
315
316 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
317 if (err)
318 goto err_mtt;
319
320 for (i = 0; i < qp->sq.max; ++i) {
321 ctrl = get_send_wqe(qp, i);
322 ctrl->owner_opcode = cpu_to_be32(1 << 31);
323 }
324
325 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
326 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
327
328 if (!qp->sq.wrid || !qp->rq.wrid) {
329 err = -ENOMEM;
330 goto err_wrid;
331 }
332
333 /* We don't support inline sends for kernel QPs (yet) */
334 init_attr->cap.max_inline_data = 0;
335 }
336
337 err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
338 if (err)
339 goto err_wrid;
340
341 /*
342 * Hardware wants QPN written in big-endian order (after
343 * shifting) for send doorbell. Precompute this value to save
344 * a little bit when posting sends.
345 */
346 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
347
348 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
349 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
350 else
351 qp->sq_signal_bits = 0;
352
353 qp->mqp.event = mlx4_ib_qp_event;
354
355 return 0;
356
357err_wrid:
358 if (pd->uobject)
359 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
360 else {
361 kfree(qp->sq.wrid);
362 kfree(qp->rq.wrid);
363 }
364
365err_mtt:
366 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
367
368err_buf:
369 if (pd->uobject)
370 ib_umem_release(qp->umem);
371 else
372 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
373
374err_db:
375 if (!pd->uobject)
376 mlx4_ib_db_free(dev, &qp->db);
377
378err:
379 return err;
380}
381
382static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
383{
384 switch (state) {
385 case IB_QPS_RESET: return MLX4_QP_STATE_RST;
386 case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
387 case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
388 case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
389 case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
390 case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
391 case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
392 default: return -1;
393 }
394}
395
396static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
397{
398 if (send_cq == recv_cq)
399 spin_lock_irq(&send_cq->lock);
400 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
401 spin_lock_irq(&send_cq->lock);
402 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
403 } else {
404 spin_lock_irq(&recv_cq->lock);
405 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
406 }
407}
408
409static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
410{
411 if (send_cq == recv_cq)
412 spin_unlock_irq(&send_cq->lock);
413 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
414 spin_unlock(&recv_cq->lock);
415 spin_unlock_irq(&send_cq->lock);
416 } else {
417 spin_unlock(&send_cq->lock);
418 spin_unlock_irq(&recv_cq->lock);
419 }
420}
421
422static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
423 int is_user)
424{
425 struct mlx4_ib_cq *send_cq, *recv_cq;
426
427 if (qp->state != IB_QPS_RESET)
428 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
429 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
430 printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
431 qp->mqp.qpn);
432
433 send_cq = to_mcq(qp->ibqp.send_cq);
434 recv_cq = to_mcq(qp->ibqp.recv_cq);
435
436 mlx4_ib_lock_cqs(send_cq, recv_cq);
437
438 if (!is_user) {
439 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
440 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
441 if (send_cq != recv_cq)
442 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
443 }
444
445 mlx4_qp_remove(dev->dev, &qp->mqp);
446
447 mlx4_ib_unlock_cqs(send_cq, recv_cq);
448
449 mlx4_qp_free(dev->dev, &qp->mqp);
450 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
451
452 if (is_user) {
453 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
454 &qp->db);
455 ib_umem_release(qp->umem);
456 } else {
457 kfree(qp->sq.wrid);
458 kfree(qp->rq.wrid);
459 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
460 mlx4_ib_db_free(dev, &qp->db);
461 }
462}
463
464struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
465 struct ib_qp_init_attr *init_attr,
466 struct ib_udata *udata)
467{
468 struct mlx4_ib_dev *dev = to_mdev(pd->device);
469 struct mlx4_ib_sqp *sqp;
470 struct mlx4_ib_qp *qp;
471 int err;
472
473 switch (init_attr->qp_type) {
474 case IB_QPT_RC:
475 case IB_QPT_UC:
476 case IB_QPT_UD:
477 {
478 qp = kmalloc(sizeof *qp, GFP_KERNEL);
479 if (!qp)
480 return ERR_PTR(-ENOMEM);
481
482 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
483 if (err) {
484 kfree(qp);
485 return ERR_PTR(err);
486 }
487
488 qp->ibqp.qp_num = qp->mqp.qpn;
489
490 break;
491 }
492 case IB_QPT_SMI:
493 case IB_QPT_GSI:
494 {
495 /* Userspace is not allowed to create special QPs: */
496 if (pd->uobject)
497 return ERR_PTR(-EINVAL);
498
499 sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
500 if (!sqp)
501 return ERR_PTR(-ENOMEM);
502
503 qp = &sqp->qp;
504
505 err = create_qp_common(dev, pd, init_attr, udata,
506 dev->dev->caps.sqp_start +
507 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
508 init_attr->port_num - 1,
509 qp);
510 if (err) {
511 kfree(sqp);
512 return ERR_PTR(err);
513 }
514
515 qp->port = init_attr->port_num;
516 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
517
518 break;
519 }
520 default:
521 /* Don't support raw QPs */
522 return ERR_PTR(-EINVAL);
523 }
524
525 return &qp->ibqp;
526}
527
528int mlx4_ib_destroy_qp(struct ib_qp *qp)
529{
530 struct mlx4_ib_dev *dev = to_mdev(qp->device);
531 struct mlx4_ib_qp *mqp = to_mqp(qp);
532
533 if (is_qp0(dev, mqp))
534 mlx4_CLOSE_PORT(dev->dev, mqp->port);
535
536 destroy_qp_common(dev, mqp, !!qp->pd->uobject);
537
538 if (is_sqp(dev, mqp))
539 kfree(to_msqp(mqp));
540 else
541 kfree(mqp);
542
543 return 0;
544}
545
546static void init_port(struct mlx4_ib_dev *dev, int port)
547{
548 struct mlx4_init_port_param param;
549 int err;
550
551 memset(&param, 0, sizeof param);
552
553 param.port_width_cap = dev->dev->caps.port_width_cap;
554 param.vl_cap = dev->dev->caps.vl_cap;
555 param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
556 param.max_gid = dev->dev->caps.gid_table_len;
557 param.max_pkey = dev->dev->caps.pkey_table_len;
558
559 err = mlx4_INIT_PORT(dev->dev, &param, port);
560 if (err)
561 printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
562}
563
564static int to_mlx4_st(enum ib_qp_type type)
565{
566 switch (type) {
567 case IB_QPT_RC: return MLX4_QP_ST_RC;
568 case IB_QPT_UC: return MLX4_QP_ST_UC;
569 case IB_QPT_UD: return MLX4_QP_ST_UD;
570 case IB_QPT_SMI:
571 case IB_QPT_GSI: return MLX4_QP_ST_MLX;
572 default: return -1;
573 }
574}
575
576static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
577 int attr_mask)
578{
579 u8 dest_rd_atomic;
580 u32 access_flags;
581 u32 hw_access_flags = 0;
582
583 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
584 dest_rd_atomic = attr->max_dest_rd_atomic;
585 else
586 dest_rd_atomic = qp->resp_depth;
587
588 if (attr_mask & IB_QP_ACCESS_FLAGS)
589 access_flags = attr->qp_access_flags;
590 else
591 access_flags = qp->atomic_rd_en;
592
593 if (!dest_rd_atomic)
594 access_flags &= IB_ACCESS_REMOTE_WRITE;
595
596 if (access_flags & IB_ACCESS_REMOTE_READ)
597 hw_access_flags |= MLX4_QP_BIT_RRE;
598 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
599 hw_access_flags |= MLX4_QP_BIT_RAE;
600 if (access_flags & IB_ACCESS_REMOTE_WRITE)
601 hw_access_flags |= MLX4_QP_BIT_RWE;
602
603 return cpu_to_be32(hw_access_flags);
604}
605
606static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
607 int attr_mask)
608{
609 if (attr_mask & IB_QP_PKEY_INDEX)
610 sqp->pkey_index = attr->pkey_index;
611 if (attr_mask & IB_QP_QKEY)
612 sqp->qkey = attr->qkey;
613 if (attr_mask & IB_QP_SQ_PSN)
614 sqp->send_psn = attr->sq_psn;
615}
616
617static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
618{
619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
620}
621
622static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
623 struct mlx4_qp_path *path, u8 port)
624{
625 path->grh_mylmc = ah->src_path_bits & 0x7f;
626 path->rlid = cpu_to_be16(ah->dlid);
627 if (ah->static_rate) {
628 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
629 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
630 !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
631 --path->static_rate;
632 } else
633 path->static_rate = 0;
634 path->counter_index = 0xff;
635
636 if (ah->ah_flags & IB_AH_GRH) {
637 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
638 printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
639 ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
640 return -1;
641 }
642
643 path->grh_mylmc |= 1 << 7;
644 path->mgid_index = ah->grh.sgid_index;
645 path->hop_limit = ah->grh.hop_limit;
646 path->tclass_flowlabel =
647 cpu_to_be32((ah->grh.traffic_class << 20) |
648 (ah->grh.flow_label));
649 memcpy(path->rgid, ah->grh.dgid.raw, 16);
650 }
651
652 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
653 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
654
655 return 0;
656}
657
658int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
659 int attr_mask, struct ib_udata *udata)
660{
661 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
662 struct mlx4_ib_qp *qp = to_mqp(ibqp);
663 struct mlx4_qp_context *context;
664 enum mlx4_qp_optpar optpar = 0;
665 enum ib_qp_state cur_state, new_state;
666 int sqd_event;
667 int err = -EINVAL;
668
669 context = kzalloc(sizeof *context, GFP_KERNEL);
670 if (!context)
671 return -ENOMEM;
672
673 mutex_lock(&qp->mutex);
674
675 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
676 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
677
678 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
679 goto out;
680
681 if ((attr_mask & IB_QP_PKEY_INDEX) &&
682 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
683 goto out;
684 }
685
686 if ((attr_mask & IB_QP_PORT) &&
687 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
688 goto out;
689 }
690
691 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
692 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
693 goto out;
694 }
695
696 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
697 attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
698 goto out;
699 }
700
701 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
702 (to_mlx4_st(ibqp->qp_type) << 16));
703 context->flags |= cpu_to_be32(1 << 8); /* DE? */
704
705 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
706 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
707 else {
708 optpar |= MLX4_QP_OPTPAR_PM_STATE;
709 switch (attr->path_mig_state) {
710 case IB_MIG_MIGRATED:
711 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
712 break;
713 case IB_MIG_REARM:
714 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
715 break;
716 case IB_MIG_ARMED:
717 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
718 break;
719 }
720 }
721
722 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
723 ibqp->qp_type == IB_QPT_UD)
724 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
725 else if (attr_mask & IB_QP_PATH_MTU) {
726 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
727 printk(KERN_ERR "path MTU (%u) is invalid\n",
728 attr->path_mtu);
729 return -EINVAL;
730 }
731 context->mtu_msgmax = (attr->path_mtu << 5) | 31;
732 }
733
734 if (qp->rq.max)
735 context->rq_size_stride = ilog2(qp->rq.max) << 3;
736 context->rq_size_stride |= qp->rq.wqe_shift - 4;
737
738 if (qp->sq.max)
739 context->sq_size_stride = ilog2(qp->sq.max) << 3;
740 context->sq_size_stride |= qp->sq.wqe_shift - 4;
741
742 if (qp->ibqp.uobject)
743 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
744 else
745 context->usr_page = cpu_to_be32(dev->priv_uar.index);
746
747 if (attr_mask & IB_QP_DEST_QPN)
748 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
749
750 if (attr_mask & IB_QP_PORT) {
751 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
752 !(attr_mask & IB_QP_AV)) {
753 mlx4_set_sched(&context->pri_path, attr->port_num);
754 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
755 }
756 }
757
758 if (attr_mask & IB_QP_PKEY_INDEX) {
759 context->pri_path.pkey_index = attr->pkey_index;
760 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
761 }
762
763 if (attr_mask & IB_QP_RNR_RETRY) {
764 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
765 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
766 }
767
768 if (attr_mask & IB_QP_AV) {
769 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
770 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
771 err = -EINVAL;
772 goto out;
773 }
774
775 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
776 MLX4_QP_OPTPAR_SCHED_QUEUE);
777 }
778
779 if (attr_mask & IB_QP_TIMEOUT) {
780 context->pri_path.ackto = attr->timeout << 3;
781 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
782 }
783
784 if (attr_mask & IB_QP_ALT_PATH) {
785 if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
786 return -EINVAL;
787
788 if (attr->alt_port_num == 0 ||
789 attr->alt_port_num > dev->dev->caps.num_ports)
790 return -EINVAL;
791
792 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
793 attr->alt_port_num))
794 return -EINVAL;
795
796 context->alt_path.pkey_index = attr->alt_pkey_index;
797 context->alt_path.ackto = attr->alt_timeout << 3;
798 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
799 }
800
801 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
802 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
803 if (attr_mask & IB_QP_RETRY_CNT) {
804 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
805 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
806 }
807
808 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
809 if (attr->max_rd_atomic)
810 context->params1 |=
811 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
812 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
813 }
814
815 if (attr_mask & IB_QP_SQ_PSN)
816 context->next_send_psn = cpu_to_be32(attr->sq_psn);
817
818 context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
819
820 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
821 if (attr->max_dest_rd_atomic)
822 context->params2 |=
823 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
824 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
825 }
826
827 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
828 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
829 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
830 }
831
832 if (ibqp->srq)
833 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
834
835 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
836 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
837 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
838 }
839 if (attr_mask & IB_QP_RQ_PSN)
840 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
841
842 context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
843
844 if (attr_mask & IB_QP_QKEY) {
845 context->qkey = cpu_to_be32(attr->qkey);
846 optpar |= MLX4_QP_OPTPAR_Q_KEY;
847 }
848
849 if (ibqp->srq)
850 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
851
852 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
853 context->db_rec_addr = cpu_to_be64(qp->db.dma);
854
855 if (cur_state == IB_QPS_INIT &&
856 new_state == IB_QPS_RTR &&
857 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
858 ibqp->qp_type == IB_QPT_UD)) {
859 context->pri_path.sched_queue = (qp->port - 1) << 6;
860 if (is_qp0(dev, qp))
861 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
862 else
863 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
864 }
865
866 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
867 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
868 sqd_event = 1;
869 else
870 sqd_event = 0;
871
872 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
873 to_mlx4_state(new_state), context, optpar,
874 sqd_event, &qp->mqp);
875 if (err)
876 goto out;
877
878 qp->state = new_state;
879
880 if (attr_mask & IB_QP_ACCESS_FLAGS)
881 qp->atomic_rd_en = attr->qp_access_flags;
882 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
883 qp->resp_depth = attr->max_dest_rd_atomic;
884 if (attr_mask & IB_QP_PORT)
885 qp->port = attr->port_num;
886 if (attr_mask & IB_QP_ALT_PATH)
887 qp->alt_port = attr->alt_port_num;
888
889 if (is_sqp(dev, qp))
890 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
891
892 /*
893 * If we moved QP0 to RTR, bring the IB link up; if we moved
894 * QP0 to RESET or ERROR, bring the link back down.
895 */
896 if (is_qp0(dev, qp)) {
897 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
898 init_port(dev, qp->port);
899
900 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
901 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
902 mlx4_CLOSE_PORT(dev->dev, qp->port);
903 }
904
905 /*
906 * If we moved a kernel QP to RESET, clean up all old CQ
907 * entries and reinitialize the QP.
908 */
909 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
910 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
911 ibqp->srq ? to_msrq(ibqp->srq): NULL);
912 if (ibqp->send_cq != ibqp->recv_cq)
913 mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
914
915 qp->rq.head = 0;
916 qp->rq.tail = 0;
917 qp->sq.head = 0;
918 qp->sq.tail = 0;
919 *qp->db.db = 0;
920 }
921
922out:
923 mutex_unlock(&qp->mutex);
924 kfree(context);
925 return err;
926}
927
928static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
929 void *wqe)
930{
931 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
932 struct mlx4_wqe_mlx_seg *mlx = wqe;
933 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
934 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
935 u16 pkey;
936 int send_size;
937 int header_size;
938 int i;
939
940 send_size = 0;
941 for (i = 0; i < wr->num_sge; ++i)
942 send_size += wr->sg_list[i].length;
943
944 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
945
946 sqp->ud_header.lrh.service_level =
947 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
948 sqp->ud_header.lrh.destination_lid = ah->av.dlid;
949 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
950 if (mlx4_ib_ah_grh_present(ah)) {
951 sqp->ud_header.grh.traffic_class =
952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
953 sqp->ud_header.grh.flow_label =
954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
955 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
956 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
957 memcpy(sqp->ud_header.grh.destination_gid.raw,
958 ah->av.dgid, 16);
959 }
960
961 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
962 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
963 (sqp->ud_header.lrh.destination_lid ==
964 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
965 (sqp->ud_header.lrh.service_level << 8));
966 mlx->rlid = sqp->ud_header.lrh.destination_lid;
967
968 switch (wr->opcode) {
969 case IB_WR_SEND:
970 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
971 sqp->ud_header.immediate_present = 0;
972 break;
973 case IB_WR_SEND_WITH_IMM:
974 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
975 sqp->ud_header.immediate_present = 1;
976 sqp->ud_header.immediate_data = wr->imm_data;
977 break;
978 default:
979 return -EINVAL;
980 }
981
982 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
983 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
984 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
985 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
986 if (!sqp->qp.ibqp.qp_num)
987 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
988 else
989 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
990 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
991 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
992 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
993 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
994 sqp->qkey : wr->wr.ud.remote_qkey);
995 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
996
997 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
998
999 if (0) {
1000 printk(KERN_ERR "built UD header of size %d:\n", header_size);
1001 for (i = 0; i < header_size / 4; ++i) {
1002 if (i % 8 == 0)
1003 printk(" [%02x] ", i * 4);
1004 printk(" %08x",
1005 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1006 if ((i + 1) % 8 == 0)
1007 printk("\n");
1008 }
1009 printk("\n");
1010 }
1011
1012 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1013 memcpy(inl + 1, sqp->header_buf, header_size);
1014
1015 return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1016}
1017
1018static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1019{
1020 unsigned cur;
1021 struct mlx4_ib_cq *cq;
1022
1023 cur = wq->head - wq->tail;
1024 if (likely(cur + nreq < wq->max))
1025 return 0;
1026
1027 cq = to_mcq(ib_cq);
1028 spin_lock(&cq->lock);
1029 cur = wq->head - wq->tail;
1030 spin_unlock(&cq->lock);
1031
1032 return cur + nreq >= wq->max;
1033}
1034
1035int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1036 struct ib_send_wr **bad_wr)
1037{
1038 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1039 void *wqe;
1040 struct mlx4_wqe_ctrl_seg *ctrl;
1041 unsigned long flags;
1042 int nreq;
1043 int err = 0;
1044 int ind;
1045 int size;
1046 int i;
1047
1048 spin_lock_irqsave(&qp->rq.lock, flags);
1049
1050 ind = qp->sq.head;
1051
1052 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1053 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1054 err = -ENOMEM;
1055 *bad_wr = wr;
1056 goto out;
1057 }
1058
1059 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
1060 err = -EINVAL;
1061 *bad_wr = wr;
1062 goto out;
1063 }
1064
1065 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
1066 qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
1067
1068 ctrl->srcrb_flags =
1069 (wr->send_flags & IB_SEND_SIGNALED ?
1070 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
1071 (wr->send_flags & IB_SEND_SOLICITED ?
1072 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
1073 qp->sq_signal_bits;
1074
1075 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1076 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1077 ctrl->imm = wr->imm_data;
1078 else
1079 ctrl->imm = 0;
1080
1081 wqe += sizeof *ctrl;
1082 size = sizeof *ctrl / 16;
1083
1084 switch (ibqp->qp_type) {
1085 case IB_QPT_RC:
1086 case IB_QPT_UC:
1087 switch (wr->opcode) {
1088 case IB_WR_ATOMIC_CMP_AND_SWP:
1089 case IB_WR_ATOMIC_FETCH_AND_ADD:
1090 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1091 cpu_to_be64(wr->wr.atomic.remote_addr);
1092 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1093 cpu_to_be32(wr->wr.atomic.rkey);
1094 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1095
1096 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1097
1098 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1099 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1100 cpu_to_be64(wr->wr.atomic.swap);
1101 ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
1102 cpu_to_be64(wr->wr.atomic.compare_add);
1103 } else {
1104 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1105 cpu_to_be64(wr->wr.atomic.compare_add);
1106 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
1107 }
1108
1109 wqe += sizeof (struct mlx4_wqe_atomic_seg);
1110 size += (sizeof (struct mlx4_wqe_raddr_seg) +
1111 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
1112
1113 break;
1114
1115 case IB_WR_RDMA_READ:
1116 case IB_WR_RDMA_WRITE:
1117 case IB_WR_RDMA_WRITE_WITH_IMM:
1118 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1119 cpu_to_be64(wr->wr.rdma.remote_addr);
1120 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1121 cpu_to_be32(wr->wr.rdma.rkey);
1122 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1123
1124 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1125 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
1126
1127 break;
1128
1129 default:
1130 /* No extra segments required for sends */
1131 break;
1132 }
1133 break;
1134
1135 case IB_QPT_UD:
1136 memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
1137 &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1138 ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
1139 cpu_to_be32(wr->wr.ud.remote_qpn);
1140 ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
1141 cpu_to_be32(wr->wr.ud.remote_qkey);
1142
1143 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1144 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1145 break;
1146
1147 case IB_QPT_SMI:
1148 case IB_QPT_GSI:
1149 err = build_mlx_header(to_msqp(qp), wr, ctrl);
1150 if (err < 0) {
1151 *bad_wr = wr;
1152 goto out;
1153 }
1154 wqe += err;
1155 size += err / 16;
1156
1157 err = 0;
1158 break;
1159
1160 default:
1161 break;
1162 }
1163
1164 for (i = 0; i < wr->num_sge; ++i) {
1165 ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
1166 cpu_to_be32(wr->sg_list[i].length);
1167 ((struct mlx4_wqe_data_seg *) wqe)->lkey =
1168 cpu_to_be32(wr->sg_list[i].lkey);
1169 ((struct mlx4_wqe_data_seg *) wqe)->addr =
1170 cpu_to_be64(wr->sg_list[i].addr);
1171
1172 wqe += sizeof (struct mlx4_wqe_data_seg);
1173 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1174 }
1175
1176 /* Add one more inline data segment for ICRC for MLX sends */
1177 if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
1178 ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
1179 cpu_to_be32((1 << 31) | 4);
1180 ((u32 *) wqe)[1] = 0;
1181 wqe += sizeof (struct mlx4_wqe_data_seg);
1182 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1183 }
1184
1185 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
1186 MLX4_WQE_CTRL_FENCE : 0) | size;
1187
1188 /*
1189 * Make sure descriptor is fully written before
1190 * setting ownership bit (because HW can start
1191 * executing as soon as we do).
1192 */
1193 wmb();
1194
1195 if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) {
1196 err = -EINVAL;
1197 goto out;
1198 }
1199
1200 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
1201 (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
1202
1203 ++ind;
1204 }
1205
1206out:
1207 if (likely(nreq)) {
1208 qp->sq.head += nreq;
1209
1210 /*
1211 * Make sure that descriptors are written before
1212 * doorbell record.
1213 */
1214 wmb();
1215
1216 writel(qp->doorbell_qpn,
1217 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
1218
1219 /*
1220 * Make sure doorbells don't leak out of SQ spinlock
1221 * and reach the HCA out of order.
1222 */
1223 mmiowb();
1224 }
1225
1226 spin_unlock_irqrestore(&qp->rq.lock, flags);
1227
1228 return err;
1229}
1230
1231int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1232 struct ib_recv_wr **bad_wr)
1233{
1234 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1235 struct mlx4_wqe_data_seg *scat;
1236 unsigned long flags;
1237 int err = 0;
1238 int nreq;
1239 int ind;
1240 int i;
1241
1242 spin_lock_irqsave(&qp->rq.lock, flags);
1243
1244 ind = qp->rq.head & (qp->rq.max - 1);
1245
1246 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1247 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
1248 err = -ENOMEM;
1249 *bad_wr = wr;
1250 goto out;
1251 }
1252
1253 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1254 err = -EINVAL;
1255 *bad_wr = wr;
1256 goto out;
1257 }
1258
1259 scat = get_recv_wqe(qp, ind);
1260
1261 for (i = 0; i < wr->num_sge; ++i) {
1262 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
1263 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
1264 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
1265 }
1266
1267 if (i < qp->rq.max_gs) {
1268 scat[i].byte_count = 0;
1269 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
1270 scat[i].addr = 0;
1271 }
1272
1273 qp->rq.wrid[ind] = wr->wr_id;
1274
1275 ind = (ind + 1) & (qp->rq.max - 1);
1276 }
1277
1278out:
1279 if (likely(nreq)) {
1280 qp->rq.head += nreq;
1281
1282 /*
1283 * Make sure that descriptors are written before
1284 * doorbell record.
1285 */
1286 wmb();
1287
1288 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1289 }
1290
1291 spin_unlock_irqrestore(&qp->rq.lock, flags);
1292
1293 return err;
1294}