aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/cq.c
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-05-08 21:00:38 -0400
committerRoland Dreier <rolandd@cisco.com>2007-05-08 21:00:38 -0400
commit225c7b1feef1b41170f7037a5b10a65cd8a42c54 (patch)
tree702a0a2cbba7f1c5b2949d236b4463d486204fdc /drivers/infiniband/hw/mlx4/cq.c
parent1bf66a30421ca772820f489d88c16d0c430d6a67 (diff)
IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters
Add an InfiniBand driver for Mellanox ConnectX adapters. Because these adapters can also be used as ethernet NICs and Fibre Channel HBAs, the driver is split into two modules: mlx4_core: Handles low-level things like device initialization and processing firmware commands. Also controls resource allocation so that the InfiniBand, ethernet and FC functions can share a device without stepping on each other. mlx4_ib: Handles InfiniBand-specific things; plugs into the InfiniBand midlayer. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/cq.c')
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c525
1 files changed, 525 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
new file mode 100644
index 000000000000..b2a290c6703a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -0,0 +1,525 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/cq.h>
34#include <linux/mlx4/qp.h>
35
36#include "mlx4_ib.h"
37#include "user.h"
38
39static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
40{
41 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
46{
47 struct ib_event event;
48 struct ib_cq *ibcq;
49
50 if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
51 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
52 "on CQ %06x\n", type, cq->cqn);
53 return;
54 }
55
56 ibcq = &to_mibcq(cq)->ibcq;
57 if (ibcq->event_handler) {
58 event.device = ibcq->device;
59 event.event = IB_EVENT_CQ_ERR;
60 event.element.cq = ibcq;
61 ibcq->event_handler(&event, ibcq->cq_context);
62 }
63}
64
65static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
66{
67 int offset = n * sizeof (struct mlx4_cqe);
68
69 if (buf->buf.nbufs == 1)
70 return buf->buf.u.direct.buf + offset;
71 else
72 return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
73 (offset & (PAGE_SIZE - 1));
74}
75
76static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77{
78 return get_cqe_from_buf(&cq->buf, n);
79}
80
81static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
82{
83 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
84
85 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
86 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
87}
88
89static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
90{
91 return get_sw_cqe(cq, cq->mcq.cons_index);
92}
93
94struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
95 struct ib_ucontext *context,
96 struct ib_udata *udata)
97{
98 struct mlx4_ib_dev *dev = to_mdev(ibdev);
99 struct mlx4_ib_cq *cq;
100 struct mlx4_uar *uar;
101 int buf_size;
102 int err;
103
104 if (entries < 1 || entries > dev->dev->caps.max_cqes)
105 return ERR_PTR(-EINVAL);
106
107 cq = kmalloc(sizeof *cq, GFP_KERNEL);
108 if (!cq)
109 return ERR_PTR(-ENOMEM);
110
111 entries = roundup_pow_of_two(entries + 1);
112 cq->ibcq.cqe = entries - 1;
113 buf_size = entries * sizeof (struct mlx4_cqe);
114 spin_lock_init(&cq->lock);
115
116 if (context) {
117 struct mlx4_ib_create_cq ucmd;
118
119 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
120 err = -EFAULT;
121 goto err_cq;
122 }
123
124 cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
125 IB_ACCESS_LOCAL_WRITE);
126 if (IS_ERR(cq->umem)) {
127 err = PTR_ERR(cq->umem);
128 goto err_cq;
129 }
130
131 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
132 ilog2(cq->umem->page_size), &cq->buf.mtt);
133 if (err)
134 goto err_buf;
135
136 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
137 if (err)
138 goto err_mtt;
139
140 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
141 &cq->db);
142 if (err)
143 goto err_mtt;
144
145 uar = &to_mucontext(context)->uar;
146 } else {
147 err = mlx4_ib_db_alloc(dev, &cq->db, 1);
148 if (err)
149 goto err_cq;
150
151 cq->mcq.set_ci_db = cq->db.db;
152 cq->mcq.arm_db = cq->db.db + 1;
153 *cq->mcq.set_ci_db = 0;
154 *cq->mcq.arm_db = 0;
155
156 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
157 err = -ENOMEM;
158 goto err_db;
159 }
160
161 err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
162 &cq->buf.mtt);
163 if (err)
164 goto err_buf;
165
166 err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
167 if (err)
168 goto err_mtt;
169
170 uar = &dev->priv_uar;
171 }
172
173 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
174 cq->db.dma, &cq->mcq);
175 if (err)
176 goto err_dbmap;
177
178 cq->mcq.comp = mlx4_ib_cq_comp;
179 cq->mcq.event = mlx4_ib_cq_event;
180
181 if (context)
182 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
183 err = -EFAULT;
184 goto err_dbmap;
185 }
186
187 return &cq->ibcq;
188
189err_dbmap:
190 if (context)
191 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
192
193err_mtt:
194 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
195
196err_buf:
197 if (context)
198 ib_umem_release(cq->umem);
199 else
200 mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
201 &cq->buf.buf);
202
203err_db:
204 if (!context)
205 mlx4_ib_db_free(dev, &cq->db);
206
207err_cq:
208 kfree(cq);
209
210 return ERR_PTR(err);
211}
212
213int mlx4_ib_destroy_cq(struct ib_cq *cq)
214{
215 struct mlx4_ib_dev *dev = to_mdev(cq->device);
216 struct mlx4_ib_cq *mcq = to_mcq(cq);
217
218 mlx4_cq_free(dev->dev, &mcq->mcq);
219 mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
220
221 if (cq->uobject) {
222 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
223 ib_umem_release(mcq->umem);
224 } else {
225 mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
226 &mcq->buf.buf);
227 mlx4_ib_db_free(dev, &mcq->db);
228 }
229
230 kfree(mcq);
231
232 return 0;
233}
234
235static void dump_cqe(void *cqe)
236{
237 __be32 *buf = cqe;
238
239 printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
240 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
241 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
242 be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
243}
244
245static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
246 struct ib_wc *wc)
247{
248 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
249 printk(KERN_DEBUG "local QP operation err "
250 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
251 "opcode = %02x)\n",
252 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
253 cqe->vendor_err_syndrome,
254 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
255 dump_cqe(cqe);
256 }
257
258 switch (cqe->syndrome) {
259 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
260 wc->status = IB_WC_LOC_LEN_ERR;
261 break;
262 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
263 wc->status = IB_WC_LOC_QP_OP_ERR;
264 break;
265 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
266 wc->status = IB_WC_LOC_PROT_ERR;
267 break;
268 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
269 wc->status = IB_WC_WR_FLUSH_ERR;
270 break;
271 case MLX4_CQE_SYNDROME_MW_BIND_ERR:
272 wc->status = IB_WC_MW_BIND_ERR;
273 break;
274 case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
275 wc->status = IB_WC_BAD_RESP_ERR;
276 break;
277 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
278 wc->status = IB_WC_LOC_ACCESS_ERR;
279 break;
280 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
281 wc->status = IB_WC_REM_INV_REQ_ERR;
282 break;
283 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
284 wc->status = IB_WC_REM_ACCESS_ERR;
285 break;
286 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
287 wc->status = IB_WC_REM_OP_ERR;
288 break;
289 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
290 wc->status = IB_WC_RETRY_EXC_ERR;
291 break;
292 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
293 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
294 break;
295 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
296 wc->status = IB_WC_REM_ABORT_ERR;
297 break;
298 default:
299 wc->status = IB_WC_GENERAL_ERR;
300 break;
301 }
302
303 wc->vendor_err = cqe->vendor_err_syndrome;
304}
305
306static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
307 struct mlx4_ib_qp **cur_qp,
308 struct ib_wc *wc)
309{
310 struct mlx4_cqe *cqe;
311 struct mlx4_qp *mqp;
312 struct mlx4_ib_wq *wq;
313 struct mlx4_ib_srq *srq;
314 int is_send;
315 int is_error;
316 u16 wqe_ctr;
317
318 cqe = next_cqe_sw(cq);
319 if (!cqe)
320 return -EAGAIN;
321
322 ++cq->mcq.cons_index;
323
324 /*
325 * Make sure we read CQ entry contents after we've checked the
326 * ownership bit.
327 */
328 rmb();
329
330 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
331 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
332 MLX4_CQE_OPCODE_ERROR;
333
334 if (!*cur_qp ||
335 (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
336 /*
337 * We do not have to take the QP table lock here,
338 * because CQs will be locked while QPs are removed
339 * from the table.
340 */
341 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
342 be32_to_cpu(cqe->my_qpn));
343 if (unlikely(!mqp)) {
344 printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
345 cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
346 return -EINVAL;
347 }
348
349 *cur_qp = to_mibqp(mqp);
350 }
351
352 wc->qp = &(*cur_qp)->ibqp;
353
354 if (is_send) {
355 wq = &(*cur_qp)->sq;
356 wqe_ctr = be16_to_cpu(cqe->wqe_index);
357 wq->tail += wqe_ctr - (u16) wq->tail;
358 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
359 ++wq->tail;
360 } else if ((*cur_qp)->ibqp.srq) {
361 srq = to_msrq((*cur_qp)->ibqp.srq);
362 wqe_ctr = be16_to_cpu(cqe->wqe_index);
363 wc->wr_id = srq->wrid[wqe_ctr];
364 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
365 } else {
366 wq = &(*cur_qp)->rq;
367 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
368 ++wq->tail;
369 }
370
371 if (unlikely(is_error)) {
372 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
373 return 0;
374 }
375
376 wc->status = IB_WC_SUCCESS;
377
378 if (is_send) {
379 wc->wc_flags = 0;
380 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
381 case MLX4_OPCODE_RDMA_WRITE_IMM:
382 wc->wc_flags |= IB_WC_WITH_IMM;
383 case MLX4_OPCODE_RDMA_WRITE:
384 wc->opcode = IB_WC_RDMA_WRITE;
385 break;
386 case MLX4_OPCODE_SEND_IMM:
387 wc->wc_flags |= IB_WC_WITH_IMM;
388 case MLX4_OPCODE_SEND:
389 wc->opcode = IB_WC_SEND;
390 break;
391 case MLX4_OPCODE_RDMA_READ:
392 wc->opcode = IB_WC_SEND;
393 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
394 break;
395 case MLX4_OPCODE_ATOMIC_CS:
396 wc->opcode = IB_WC_COMP_SWAP;
397 wc->byte_len = 8;
398 break;
399 case MLX4_OPCODE_ATOMIC_FA:
400 wc->opcode = IB_WC_FETCH_ADD;
401 wc->byte_len = 8;
402 break;
403 case MLX4_OPCODE_BIND_MW:
404 wc->opcode = IB_WC_BIND_MW;
405 break;
406 }
407 } else {
408 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
409
410 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
411 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
412 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
413 wc->wc_flags = IB_WC_WITH_IMM;
414 wc->imm_data = cqe->immed_rss_invalid;
415 break;
416 case MLX4_RECV_OPCODE_SEND:
417 wc->opcode = IB_WC_RECV;
418 wc->wc_flags = 0;
419 break;
420 case MLX4_RECV_OPCODE_SEND_IMM:
421 wc->opcode = IB_WC_RECV;
422 wc->wc_flags = IB_WC_WITH_IMM;
423 wc->imm_data = cqe->immed_rss_invalid;
424 break;
425 }
426
427 wc->slid = be16_to_cpu(cqe->rlid);
428 wc->sl = cqe->sl >> 4;
429 wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
430 wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
431 wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
432 IB_WC_GRH : 0;
433 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16;
434 }
435
436 return 0;
437}
438
439int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
440{
441 struct mlx4_ib_cq *cq = to_mcq(ibcq);
442 struct mlx4_ib_qp *cur_qp = NULL;
443 unsigned long flags;
444 int npolled;
445 int err = 0;
446
447 spin_lock_irqsave(&cq->lock, flags);
448
449 for (npolled = 0; npolled < num_entries; ++npolled) {
450 err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
451 if (err)
452 break;
453 }
454
455 if (npolled)
456 mlx4_cq_set_ci(&cq->mcq);
457
458 spin_unlock_irqrestore(&cq->lock, flags);
459
460 if (err == 0 || err == -EAGAIN)
461 return npolled;
462 else
463 return err;
464}
465
466int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
467{
468 mlx4_cq_arm(&to_mcq(ibcq)->mcq,
469 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
470 MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
471 to_mdev(ibcq->device)->uar_map,
472 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
473
474 return 0;
475}
476
477void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
478{
479 u32 prod_index;
480 int nfreed = 0;
481 struct mlx4_cqe *cqe;
482
483 /*
484 * First we need to find the current producer index, so we
485 * know where to start cleaning from. It doesn't matter if HW
486 * adds new entries after this loop -- the QP we're worried
487 * about is already in RESET, so the new entries won't come
488 * from our QP and therefore don't need to be checked.
489 */
490 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
491 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
492 break;
493
494 /*
495 * Now sweep backwards through the CQ, removing CQ entries
496 * that match our QP by copying older entries on top of them.
497 */
498 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
499 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
500 if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
501 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
502 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
503 ++nfreed;
504 } else if (nfreed)
505 memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
506 cqe, sizeof *cqe);
507 }
508
509 if (nfreed) {
510 cq->mcq.cons_index += nfreed;
511 /*
512 * Make sure update of buffer contents is done before
513 * updating consumer index.
514 */
515 wmb();
516 mlx4_cq_set_ci(&cq->mcq);
517 }
518}
519
520void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
521{
522 spin_lock_irq(&cq->lock);
523 __mlx4_ib_cq_clean(cq, qpn, srq);
524 spin_unlock_irq(&cq->lock);
525}