aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-03-29 18:23:38 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-31 16:14:21 -0500
commit6522108f19a998a5ded4d0c4d0f9eb9736398e31 (patch)
tree675eb475939abf9889edf7b76a49e383b7c6e7e5 /drivers/infiniband
parente28c00ad67164dba688c1d19c208c5fb554465f2 (diff)
IB/ipath: infiniband verbs support
The ipath_verbs.c file implements the driver-specific components of the kernel's Infiniband verbs layer. Signed-off-by: Bryan O'Sullivan <bos@pathscale.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1222
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c333
2 files changed, 1555 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
new file mode 100644
index 000000000000..9f27fd35cdbb
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -0,0 +1,1222 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_user_verbs.h>
35#include <linux/utsname.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ips_common.h"
40
41/* Not static, because we don't want the compiler removing it */
42const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
43
44unsigned int ib_ipath_qp_table_size = 251;
45module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
46MODULE_PARM_DESC(qp_table_size, "QP table size");
47
48unsigned int ib_ipath_lkey_table_size = 12;
49module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
50 S_IRUGO);
51MODULE_PARM_DESC(lkey_table_size,
52 "LKEY table size in bits (2^n, 1 <= n <= 23)");
53
54unsigned int ib_ipath_debug; /* debug mask */
55module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
56MODULE_PARM_DESC(debug, "Verbs debug mask");
57
58MODULE_LICENSE("GPL");
59MODULE_AUTHOR("PathScale <support@pathscale.com>");
60MODULE_DESCRIPTION("Pathscale InfiniPath driver");
61
62const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
63 [IB_QPS_RESET] = 0,
64 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
65 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
66 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
67 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
68 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
69 IPATH_POST_SEND_OK,
70 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
71 [IB_QPS_ERR] = 0,
72};
73
74/*
75 * Translate ib_wr_opcode into ib_wc_opcode.
76 */
77const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
78 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
79 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
80 [IB_WR_SEND] = IB_WC_SEND,
81 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
82 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
83 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
84 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
85};
86
87/*
88 * System image GUID.
89 */
90__be64 sys_image_guid;
91
92/**
93 * ipath_copy_sge - copy data to SGE memory
94 * @ss: the SGE state
95 * @data: the data to copy
96 * @length: the length of the data
97 */
98void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
99{
100 struct ipath_sge *sge = &ss->sge;
101
102 while (length) {
103 u32 len = sge->length;
104
105 BUG_ON(len == 0);
106 if (len > length)
107 len = length;
108 memcpy(sge->vaddr, data, len);
109 sge->vaddr += len;
110 sge->length -= len;
111 sge->sge_length -= len;
112 if (sge->sge_length == 0) {
113 if (--ss->num_sge)
114 *sge = *ss->sg_list++;
115 } else if (sge->length == 0 && sge->mr != NULL) {
116 if (++sge->n >= IPATH_SEGSZ) {
117 if (++sge->m >= sge->mr->mapsz)
118 break;
119 sge->n = 0;
120 }
121 sge->vaddr =
122 sge->mr->map[sge->m]->segs[sge->n].vaddr;
123 sge->length =
124 sge->mr->map[sge->m]->segs[sge->n].length;
125 }
126 data += len;
127 length -= len;
128 }
129}
130
131/**
132 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
133 * @ss: the SGE state
134 * @length: the number of bytes to skip
135 */
136void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
137{
138 struct ipath_sge *sge = &ss->sge;
139
140 while (length > sge->sge_length) {
141 length -= sge->sge_length;
142 ss->sge = *ss->sg_list++;
143 }
144 while (length) {
145 u32 len = sge->length;
146
147 BUG_ON(len == 0);
148 if (len > length)
149 len = length;
150 sge->vaddr += len;
151 sge->length -= len;
152 sge->sge_length -= len;
153 if (sge->sge_length == 0) {
154 if (--ss->num_sge)
155 *sge = *ss->sg_list++;
156 } else if (sge->length == 0 && sge->mr != NULL) {
157 if (++sge->n >= IPATH_SEGSZ) {
158 if (++sge->m >= sge->mr->mapsz)
159 break;
160 sge->n = 0;
161 }
162 sge->vaddr =
163 sge->mr->map[sge->m]->segs[sge->n].vaddr;
164 sge->length =
165 sge->mr->map[sge->m]->segs[sge->n].length;
166 }
167 length -= len;
168 }
169}
170
171/**
172 * ipath_post_send - post a send on a QP
173 * @ibqp: the QP to post the send on
174 * @wr: the list of work requests to post
175 * @bad_wr: the first bad WR is put here
176 *
177 * This may be called from interrupt context.
178 */
179static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
180 struct ib_send_wr **bad_wr)
181{
182 struct ipath_qp *qp = to_iqp(ibqp);
183 int err = 0;
184
185 /* Check that state is OK to post send. */
186 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
187 *bad_wr = wr;
188 err = -EINVAL;
189 goto bail;
190 }
191
192 for (; wr; wr = wr->next) {
193 switch (qp->ibqp.qp_type) {
194 case IB_QPT_UC:
195 case IB_QPT_RC:
196 err = ipath_post_rc_send(qp, wr);
197 break;
198
199 case IB_QPT_SMI:
200 case IB_QPT_GSI:
201 case IB_QPT_UD:
202 err = ipath_post_ud_send(qp, wr);
203 break;
204
205 default:
206 err = -EINVAL;
207 }
208 if (err) {
209 *bad_wr = wr;
210 break;
211 }
212 }
213
214bail:
215 return err;
216}
217
218/**
219 * ipath_post_receive - post a receive on a QP
220 * @ibqp: the QP to post the receive on
221 * @wr: the WR to post
222 * @bad_wr: the first bad WR is put here
223 *
224 * This may be called from interrupt context.
225 */
226static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
227 struct ib_recv_wr **bad_wr)
228{
229 struct ipath_qp *qp = to_iqp(ibqp);
230 unsigned long flags;
231 int ret;
232
233 /* Check that state is OK to post receive. */
234 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
235 *bad_wr = wr;
236 ret = -EINVAL;
237 goto bail;
238 }
239
240 for (; wr; wr = wr->next) {
241 struct ipath_rwqe *wqe;
242 u32 next;
243 int i, j;
244
245 if (wr->num_sge > qp->r_rq.max_sge) {
246 *bad_wr = wr;
247 ret = -ENOMEM;
248 goto bail;
249 }
250
251 spin_lock_irqsave(&qp->r_rq.lock, flags);
252 next = qp->r_rq.head + 1;
253 if (next >= qp->r_rq.size)
254 next = 0;
255 if (next == qp->r_rq.tail) {
256 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
257 *bad_wr = wr;
258 ret = -ENOMEM;
259 goto bail;
260 }
261
262 wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
263 wqe->wr_id = wr->wr_id;
264 wqe->sg_list[0].mr = NULL;
265 wqe->sg_list[0].vaddr = NULL;
266 wqe->sg_list[0].length = 0;
267 wqe->sg_list[0].sge_length = 0;
268 wqe->length = 0;
269 for (i = 0, j = 0; i < wr->num_sge; i++) {
270 /* Check LKEY */
271 if (to_ipd(qp->ibqp.pd)->user &&
272 wr->sg_list[i].lkey == 0) {
273 spin_unlock_irqrestore(&qp->r_rq.lock,
274 flags);
275 *bad_wr = wr;
276 ret = -EINVAL;
277 goto bail;
278 }
279 if (wr->sg_list[i].length == 0)
280 continue;
281 if (!ipath_lkey_ok(
282 &to_idev(qp->ibqp.device)->lk_table,
283 &wqe->sg_list[j], &wr->sg_list[i],
284 IB_ACCESS_LOCAL_WRITE)) {
285 spin_unlock_irqrestore(&qp->r_rq.lock,
286 flags);
287 *bad_wr = wr;
288 ret = -EINVAL;
289 goto bail;
290 }
291 wqe->length += wr->sg_list[i].length;
292 j++;
293 }
294 wqe->num_sge = j;
295 qp->r_rq.head = next;
296 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
297 }
298 ret = 0;
299
300bail:
301 return ret;
302}
303
304/**
305 * ipath_qp_rcv - processing an incoming packet on a QP
306 * @dev: the device the packet came on
307 * @hdr: the packet header
308 * @has_grh: true if the packet has a GRH
309 * @data: the packet data
310 * @tlen: the packet length
311 * @qp: the QP the packet came on
312 *
313 * This is called from ipath_ib_rcv() to process an incoming packet
314 * for the given QP.
315 * Called at interrupt level.
316 */
317static void ipath_qp_rcv(struct ipath_ibdev *dev,
318 struct ipath_ib_header *hdr, int has_grh,
319 void *data, u32 tlen, struct ipath_qp *qp)
320{
321 /* Check for valid receive state. */
322 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
323 dev->n_pkt_drops++;
324 return;
325 }
326
327 switch (qp->ibqp.qp_type) {
328 case IB_QPT_SMI:
329 case IB_QPT_GSI:
330 case IB_QPT_UD:
331 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
332 break;
333
334 case IB_QPT_RC:
335 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
336 break;
337
338 case IB_QPT_UC:
339 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
340 break;
341
342 default:
343 break;
344 }
345}
346
347/**
348 * ipath_ib_rcv - process and incoming packet
349 * @arg: the device pointer
350 * @rhdr: the header of the packet
351 * @data: the packet data
352 * @tlen: the packet length
353 *
354 * This is called from ipath_kreceive() to process an incoming packet at
355 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
356 */
357static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
358{
359 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
360 struct ipath_ib_header *hdr = rhdr;
361 struct ipath_other_headers *ohdr;
362 struct ipath_qp *qp;
363 u32 qp_num;
364 int lnh;
365 u8 opcode;
366 u16 lid;
367
368 if (unlikely(dev == NULL))
369 goto bail;
370
371 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
372 dev->rcv_errors++;
373 goto bail;
374 }
375
376 /* Check for a valid destination LID (see ch. 7.11.1). */
377 lid = be16_to_cpu(hdr->lrh[1]);
378 if (lid < IPS_MULTICAST_LID_BASE) {
379 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
380 if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
381 dev->rcv_errors++;
382 goto bail;
383 }
384 }
385
386 /* Check for GRH */
387 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
388 if (lnh == IPS_LRH_BTH)
389 ohdr = &hdr->u.oth;
390 else if (lnh == IPS_LRH_GRH)
391 ohdr = &hdr->u.l.oth;
392 else {
393 dev->rcv_errors++;
394 goto bail;
395 }
396
397 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
398 dev->opstats[opcode].n_bytes += tlen;
399 dev->opstats[opcode].n_packets++;
400
401 /* Get the destination QP number. */
402 qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK;
403 if (qp_num == IPS_MULTICAST_QPN) {
404 struct ipath_mcast *mcast;
405 struct ipath_mcast_qp *p;
406
407 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
408 if (mcast == NULL) {
409 dev->n_pkt_drops++;
410 goto bail;
411 }
412 dev->n_multicast_rcv++;
413 list_for_each_entry_rcu(p, &mcast->qp_list, list)
414 ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
415 tlen, p->qp);
416 /*
417 * Notify ipath_multicast_detach() if it is waiting for us
418 * to finish.
419 */
420 if (atomic_dec_return(&mcast->refcount) <= 1)
421 wake_up(&mcast->wait);
422 } else {
423 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
424 if (qp) {
425 dev->n_unicast_rcv++;
426 ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
427 tlen, qp);
428 /*
429 * Notify ipath_destroy_qp() if it is waiting
430 * for us to finish.
431 */
432 if (atomic_dec_and_test(&qp->refcount))
433 wake_up(&qp->wait);
434 } else
435 dev->n_pkt_drops++;
436 }
437
438bail:;
439}
440
441/**
442 * ipath_ib_timer - verbs timer
443 * @arg: the device pointer
444 *
445 * This is called from ipath_do_rcv_timer() at interrupt level to check for
446 * QPs which need retransmits and to collect performance numbers.
447 */
448static void ipath_ib_timer(void *arg)
449{
450 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
451 struct ipath_qp *resend = NULL;
452 struct ipath_qp *rnr = NULL;
453 struct list_head *last;
454 struct ipath_qp *qp;
455 unsigned long flags;
456
457 if (dev == NULL)
458 return;
459
460 spin_lock_irqsave(&dev->pending_lock, flags);
461 /* Start filling the next pending queue. */
462 if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
463 dev->pending_index = 0;
464 /* Save any requests still in the new queue, they have timed out. */
465 last = &dev->pending[dev->pending_index];
466 while (!list_empty(last)) {
467 qp = list_entry(last->next, struct ipath_qp, timerwait);
468 if (last->next == LIST_POISON1 ||
469 last->next != &qp->timerwait ||
470 qp->timerwait.prev != last) {
471 INIT_LIST_HEAD(last);
472 } else {
473 list_del(&qp->timerwait);
474 qp->timerwait.prev = (struct list_head *) resend;
475 resend = qp;
476 atomic_inc(&qp->refcount);
477 }
478 }
479 last = &dev->rnrwait;
480 if (!list_empty(last)) {
481 qp = list_entry(last->next, struct ipath_qp, timerwait);
482 if (--qp->s_rnr_timeout == 0) {
483 do {
484 if (last->next == LIST_POISON1 ||
485 last->next != &qp->timerwait ||
486 qp->timerwait.prev != last) {
487 INIT_LIST_HEAD(last);
488 break;
489 }
490 list_del(&qp->timerwait);
491 qp->timerwait.prev =
492 (struct list_head *) rnr;
493 rnr = qp;
494 if (list_empty(last))
495 break;
496 qp = list_entry(last->next, struct ipath_qp,
497 timerwait);
498 } while (qp->s_rnr_timeout == 0);
499 }
500 }
501 /*
502 * We should only be in the started state if pma_sample_start != 0
503 */
504 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
505 --dev->pma_sample_start == 0) {
506 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
507 ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
508 &dev->ipath_rword,
509 &dev->ipath_spkts,
510 &dev->ipath_rpkts,
511 &dev->ipath_xmit_wait);
512 }
513 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
514 if (dev->pma_sample_interval == 0) {
515 u64 ta, tb, tc, td, te;
516
517 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
518 ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
519 &tc, &td, &te);
520
521 dev->ipath_sword = ta - dev->ipath_sword;
522 dev->ipath_rword = tb - dev->ipath_rword;
523 dev->ipath_spkts = tc - dev->ipath_spkts;
524 dev->ipath_rpkts = td - dev->ipath_rpkts;
525 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
526 }
527 else
528 dev->pma_sample_interval--;
529 }
530 spin_unlock_irqrestore(&dev->pending_lock, flags);
531
532 /* XXX What if timer fires again while this is running? */
533 for (qp = resend; qp != NULL;
534 qp = (struct ipath_qp *) qp->timerwait.prev) {
535 struct ib_wc wc;
536
537 spin_lock_irqsave(&qp->s_lock, flags);
538 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
539 dev->n_timeouts++;
540 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
541 }
542 spin_unlock_irqrestore(&qp->s_lock, flags);
543
544 /* Notify ipath_destroy_qp() if it is waiting. */
545 if (atomic_dec_and_test(&qp->refcount))
546 wake_up(&qp->wait);
547 }
548 for (qp = rnr; qp != NULL;
549 qp = (struct ipath_qp *) qp->timerwait.prev)
550 tasklet_hi_schedule(&qp->s_task);
551}
552
553/**
554 * ipath_ib_piobufavail - callback when a PIO buffer is available
555 * @arg: the device pointer
556 *
557 * This is called from ipath_intr() at interrupt level when a PIO buffer is
558 * available after ipath_verbs_send() returned an error that no buffers were
559 * available. Return 0 if we consumed all the PIO buffers and we still have
560 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
561 * return one).
562 */
563static int ipath_ib_piobufavail(void *arg)
564{
565 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
566 struct ipath_qp *qp;
567 unsigned long flags;
568
569 if (dev == NULL)
570 goto bail;
571
572 spin_lock_irqsave(&dev->pending_lock, flags);
573 while (!list_empty(&dev->piowait)) {
574 qp = list_entry(dev->piowait.next, struct ipath_qp,
575 piowait);
576 list_del(&qp->piowait);
577 tasklet_hi_schedule(&qp->s_task);
578 }
579 spin_unlock_irqrestore(&dev->pending_lock, flags);
580
581bail:
582 return 1;
583}
584
585static int ipath_query_device(struct ib_device *ibdev,
586 struct ib_device_attr *props)
587{
588 struct ipath_ibdev *dev = to_idev(ibdev);
589 u32 vendor, boardrev, majrev, minrev;
590
591 memset(props, 0, sizeof(*props));
592
593 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
594 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
595 IB_DEVICE_SYS_IMAGE_GUID;
596 ipath_layer_query_device(dev->dd, &vendor, &boardrev,
597 &majrev, &minrev);
598 props->vendor_id = vendor;
599 props->vendor_part_id = boardrev;
600 props->hw_ver = boardrev << 16 | majrev << 8 | minrev;
601
602 props->sys_image_guid = dev->sys_image_guid;
603
604 props->max_mr_size = ~0ull;
605 props->max_qp = 0xffff;
606 props->max_qp_wr = 0xffff;
607 props->max_sge = 255;
608 props->max_cq = 0xffff;
609 props->max_cqe = 0xffff;
610 props->max_mr = 0xffff;
611 props->max_pd = 0xffff;
612 props->max_qp_rd_atom = 1;
613 props->max_qp_init_rd_atom = 1;
614 /* props->max_res_rd_atom */
615 props->max_srq = 0xffff;
616 props->max_srq_wr = 0xffff;
617 props->max_srq_sge = 255;
618 /* props->local_ca_ack_delay */
619 props->atomic_cap = IB_ATOMIC_HCA;
620 props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
621 props->max_mcast_grp = 0xffff;
622 props->max_mcast_qp_attach = 0xffff;
623 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
624 props->max_mcast_grp;
625
626 return 0;
627}
628
629const u8 ipath_cvt_physportstate[16] = {
630 [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3,
631 [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5,
632 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2,
633 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2,
634 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1,
635 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1,
636 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4,
637 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4,
638 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4,
639 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4,
640 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6,
641 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6,
642 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
643};
644
645static int ipath_query_port(struct ib_device *ibdev,
646 u8 port, struct ib_port_attr *props)
647{
648 struct ipath_ibdev *dev = to_idev(ibdev);
649 enum ib_mtu mtu;
650 u16 lid = ipath_layer_get_lid(dev->dd);
651 u64 ibcstat;
652
653 memset(props, 0, sizeof(*props));
654 props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
655 props->lmc = dev->mkeyprot_resv_lmc & 7;
656 props->sm_lid = dev->sm_lid;
657 props->sm_sl = dev->sm_sl;
658 ibcstat = ipath_layer_get_lastibcstat(dev->dd);
659 props->state = ((ibcstat >> 4) & 0x3) + 1;
660 /* See phys_state_show() */
661 props->phys_state = ipath_cvt_physportstate[
662 ipath_layer_get_lastibcstat(dev->dd) & 0xf];
663 props->port_cap_flags = dev->port_cap_flags;
664 props->gid_tbl_len = 1;
665 props->max_msg_sz = 4096;
666 props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
667 props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
668 dev->n_pkey_violations;
669 props->qkey_viol_cntr = dev->qkey_violations;
670 props->active_width = IB_WIDTH_4X;
671 /* See rate_show() */
672 props->active_speed = 1; /* Regular 10Mbs speed. */
673 props->max_vl_num = 1; /* VLCap = VL0 */
674 props->init_type_reply = 0;
675
676 props->max_mtu = IB_MTU_4096;
677 switch (ipath_layer_get_ibmtu(dev->dd)) {
678 case 4096:
679 mtu = IB_MTU_4096;
680 break;
681 case 2048:
682 mtu = IB_MTU_2048;
683 break;
684 case 1024:
685 mtu = IB_MTU_1024;
686 break;
687 case 512:
688 mtu = IB_MTU_512;
689 break;
690 case 256:
691 mtu = IB_MTU_256;
692 break;
693 default:
694 mtu = IB_MTU_2048;
695 }
696 props->active_mtu = mtu;
697 props->subnet_timeout = dev->subnet_timeout;
698
699 return 0;
700}
701
702static int ipath_modify_device(struct ib_device *device,
703 int device_modify_mask,
704 struct ib_device_modify *device_modify)
705{
706 int ret;
707
708 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
709 IB_DEVICE_MODIFY_NODE_DESC)) {
710 ret = -EOPNOTSUPP;
711 goto bail;
712 }
713
714 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
715 memcpy(device->node_desc, device_modify->node_desc, 64);
716
717 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
718 to_idev(device)->sys_image_guid =
719 cpu_to_be64(device_modify->sys_image_guid);
720
721 ret = 0;
722
723bail:
724 return ret;
725}
726
727static int ipath_modify_port(struct ib_device *ibdev,
728 u8 port, int port_modify_mask,
729 struct ib_port_modify *props)
730{
731 struct ipath_ibdev *dev = to_idev(ibdev);
732
733 dev->port_cap_flags |= props->set_port_cap_mask;
734 dev->port_cap_flags &= ~props->clr_port_cap_mask;
735 if (port_modify_mask & IB_PORT_SHUTDOWN)
736 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
737 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
738 dev->qkey_violations = 0;
739 return 0;
740}
741
742static int ipath_query_gid(struct ib_device *ibdev, u8 port,
743 int index, union ib_gid *gid)
744{
745 struct ipath_ibdev *dev = to_idev(ibdev);
746 int ret;
747
748 if (index >= 1) {
749 ret = -EINVAL;
750 goto bail;
751 }
752 gid->global.subnet_prefix = dev->gid_prefix;
753 gid->global.interface_id = ipath_layer_get_guid(dev->dd);
754
755 ret = 0;
756
757bail:
758 return ret;
759}
760
761static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
762 struct ib_ucontext *context,
763 struct ib_udata *udata)
764{
765 struct ipath_pd *pd;
766 struct ib_pd *ret;
767
768 pd = kmalloc(sizeof *pd, GFP_KERNEL);
769 if (!pd) {
770 ret = ERR_PTR(-ENOMEM);
771 goto bail;
772 }
773
774 /* ib_alloc_pd() will initialize pd->ibpd. */
775 pd->user = udata != NULL;
776
777 ret = &pd->ibpd;
778
779bail:
780 return ret;
781}
782
783static int ipath_dealloc_pd(struct ib_pd *ibpd)
784{
785 struct ipath_pd *pd = to_ipd(ibpd);
786
787 kfree(pd);
788
789 return 0;
790}
791
792/**
793 * ipath_create_ah - create an address handle
794 * @pd: the protection domain
795 * @ah_attr: the attributes of the AH
796 *
797 * This may be called from interrupt context.
798 */
799static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
800 struct ib_ah_attr *ah_attr)
801{
802 struct ipath_ah *ah;
803 struct ib_ah *ret;
804
805 /* A multicast address requires a GRH (see ch. 8.4.1). */
806 if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
807 ah_attr->dlid != IPS_PERMISSIVE_LID &&
808 !(ah_attr->ah_flags & IB_AH_GRH)) {
809 ret = ERR_PTR(-EINVAL);
810 goto bail;
811 }
812
813 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
814 if (!ah) {
815 ret = ERR_PTR(-ENOMEM);
816 goto bail;
817 }
818
819 /* ib_create_ah() will initialize ah->ibah. */
820 ah->attr = *ah_attr;
821
822 ret = &ah->ibah;
823
824bail:
825 return ret;
826}
827
828/**
829 * ipath_destroy_ah - destroy an address handle
830 * @ibah: the AH to destroy
831 *
832 * This may be called from interrupt context.
833 */
834static int ipath_destroy_ah(struct ib_ah *ibah)
835{
836 struct ipath_ah *ah = to_iah(ibah);
837
838 kfree(ah);
839
840 return 0;
841}
842
843static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
844{
845 struct ipath_ah *ah = to_iah(ibah);
846
847 *ah_attr = ah->attr;
848
849 return 0;
850}
851
852static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
853 u16 *pkey)
854{
855 struct ipath_ibdev *dev = to_idev(ibdev);
856 int ret;
857
858 if (index >= ipath_layer_get_npkeys(dev->dd)) {
859 ret = -EINVAL;
860 goto bail;
861 }
862
863 *pkey = ipath_layer_get_pkey(dev->dd, index);
864 ret = 0;
865
866bail:
867 return ret;
868}
869
870
871/**
872 * ipath_alloc_ucontext - allocate a ucontest
873 * @ibdev: the infiniband device
874 * @udata: not used by the InfiniPath driver
875 */
876
877static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
878 struct ib_udata *udata)
879{
880 struct ipath_ucontext *context;
881 struct ib_ucontext *ret;
882
883 context = kmalloc(sizeof *context, GFP_KERNEL);
884 if (!context) {
885 ret = ERR_PTR(-ENOMEM);
886 goto bail;
887 }
888
889 ret = &context->ibucontext;
890
891bail:
892 return ret;
893}
894
895static int ipath_dealloc_ucontext(struct ib_ucontext *context)
896{
897 kfree(to_iucontext(context));
898 return 0;
899}
900
901static int ipath_verbs_register_sysfs(struct ib_device *dev);
902
903/**
904 * ipath_register_ib_device - register our device with the infiniband core
905 * @unit: the device number to register
906 * @dd: the device data structure
907 * Return the allocated ipath_ibdev pointer or NULL on error.
908 */
909static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
910{
911 struct ipath_ibdev *idev;
912 struct ib_device *dev;
913 int ret;
914
915 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
916 if (idev == NULL)
917 goto bail;
918
919 dev = &idev->ibdev;
920
921 /* Only need to initialize non-zero fields. */
922 spin_lock_init(&idev->qp_table.lock);
923 spin_lock_init(&idev->lk_table.lock);
924 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
925 /* Set the prefix to the default value (see ch. 4.1.1) */
926 idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
927
928 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
929 if (ret)
930 goto err_qp;
931
932 /*
933 * The top ib_ipath_lkey_table_size bits are used to index the
934 * table. The lower 8 bits can be owned by the user (copied from
935 * the LKEY). The remaining bits act as a generation number or tag.
936 */
937 idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
938 idev->lk_table.table = kzalloc(idev->lk_table.max *
939 sizeof(*idev->lk_table.table),
940 GFP_KERNEL);
941 if (idev->lk_table.table == NULL) {
942 ret = -ENOMEM;
943 goto err_lk;
944 }
945 spin_lock_init(&idev->pending_lock);
946 INIT_LIST_HEAD(&idev->pending[0]);
947 INIT_LIST_HEAD(&idev->pending[1]);
948 INIT_LIST_HEAD(&idev->pending[2]);
949 INIT_LIST_HEAD(&idev->piowait);
950 INIT_LIST_HEAD(&idev->rnrwait);
951 idev->pending_index = 0;
952 idev->port_cap_flags =
953 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
954 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
955 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
956 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
957 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
958 idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
959 idev->link_width_enabled = 3; /* 1x or 4x */
960
961 /*
962 * The system image GUID is supposed to be the same for all
963 * IB HCAs in a single system but since there can be other
964 * device types in the system, we can't be sure this is unique.
965 */
966 if (!sys_image_guid)
967 sys_image_guid = ipath_layer_get_guid(dd);
968 idev->sys_image_guid = sys_image_guid;
969 idev->ib_unit = unit;
970 idev->dd = dd;
971
972 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
973 dev->node_guid = ipath_layer_get_guid(dd);
974 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
975 dev->uverbs_cmd_mask =
976 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
977 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
978 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
979 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
980 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
981 (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
982 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
983 (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
984 (1ull << IB_USER_VERBS_CMD_REG_MR) |
985 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
986 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
987 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
988 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
989 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
990 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
991 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
992 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
993 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
994 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
995 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
996 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
997 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
998 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
999 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1000 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1001 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1002 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1003 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1004 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
1005 dev->node_type = IB_NODE_CA;
1006 dev->phys_port_cnt = 1;
1007 dev->dma_device = ipath_layer_get_device(dd);
1008 dev->class_dev.dev = dev->dma_device;
1009 dev->query_device = ipath_query_device;
1010 dev->modify_device = ipath_modify_device;
1011 dev->query_port = ipath_query_port;
1012 dev->modify_port = ipath_modify_port;
1013 dev->query_pkey = ipath_query_pkey;
1014 dev->query_gid = ipath_query_gid;
1015 dev->alloc_ucontext = ipath_alloc_ucontext;
1016 dev->dealloc_ucontext = ipath_dealloc_ucontext;
1017 dev->alloc_pd = ipath_alloc_pd;
1018 dev->dealloc_pd = ipath_dealloc_pd;
1019 dev->create_ah = ipath_create_ah;
1020 dev->destroy_ah = ipath_destroy_ah;
1021 dev->query_ah = ipath_query_ah;
1022 dev->create_srq = ipath_create_srq;
1023 dev->modify_srq = ipath_modify_srq;
1024 dev->query_srq = ipath_query_srq;
1025 dev->destroy_srq = ipath_destroy_srq;
1026 dev->create_qp = ipath_create_qp;
1027 dev->modify_qp = ipath_modify_qp;
1028 dev->query_qp = ipath_query_qp;
1029 dev->destroy_qp = ipath_destroy_qp;
1030 dev->post_send = ipath_post_send;
1031 dev->post_recv = ipath_post_receive;
1032 dev->post_srq_recv = ipath_post_srq_receive;
1033 dev->create_cq = ipath_create_cq;
1034 dev->destroy_cq = ipath_destroy_cq;
1035 dev->resize_cq = ipath_resize_cq;
1036 dev->poll_cq = ipath_poll_cq;
1037 dev->req_notify_cq = ipath_req_notify_cq;
1038 dev->get_dma_mr = ipath_get_dma_mr;
1039 dev->reg_phys_mr = ipath_reg_phys_mr;
1040 dev->reg_user_mr = ipath_reg_user_mr;
1041 dev->dereg_mr = ipath_dereg_mr;
1042 dev->alloc_fmr = ipath_alloc_fmr;
1043 dev->map_phys_fmr = ipath_map_phys_fmr;
1044 dev->unmap_fmr = ipath_unmap_fmr;
1045 dev->dealloc_fmr = ipath_dealloc_fmr;
1046 dev->attach_mcast = ipath_multicast_attach;
1047 dev->detach_mcast = ipath_multicast_detach;
1048 dev->process_mad = ipath_process_mad;
1049
1050 snprintf(dev->node_desc, sizeof(dev->node_desc),
1051 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
1052
1053 ret = ib_register_device(dev);
1054 if (ret)
1055 goto err_reg;
1056
1057 if (ipath_verbs_register_sysfs(dev))
1058 goto err_class;
1059
1060 ipath_layer_enable_timer(dd);
1061
1062 goto bail;
1063
1064err_class:
1065 ib_unregister_device(dev);
1066err_reg:
1067 kfree(idev->lk_table.table);
1068err_lk:
1069 kfree(idev->qp_table.table);
1070err_qp:
1071 ib_dealloc_device(dev);
1072 _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
1073 unit, -ret);
1074 idev = NULL;
1075
1076bail:
1077 return idev;
1078}
1079
1080static void ipath_unregister_ib_device(void *arg)
1081{
1082 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
1083 struct ib_device *ibdev = &dev->ibdev;
1084
1085 ipath_layer_disable_timer(dev->dd);
1086
1087 ib_unregister_device(ibdev);
1088
1089 if (!list_empty(&dev->pending[0]) ||
1090 !list_empty(&dev->pending[1]) ||
1091 !list_empty(&dev->pending[2]))
1092 _VERBS_ERROR("ipath%d pending list not empty!\n",
1093 dev->ib_unit);
1094 if (!list_empty(&dev->piowait))
1095 _VERBS_ERROR("ipath%d piowait list not empty!\n",
1096 dev->ib_unit);
1097 if (!list_empty(&dev->rnrwait))
1098 _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
1099 dev->ib_unit);
1100 if (!ipath_mcast_tree_empty())
1101 _VERBS_ERROR("ipath%d multicast table memory leak!\n",
1102 dev->ib_unit);
1103 /*
1104 * Note that ipath_unregister_ib_device() can be called before all
1105 * the QPs are destroyed!
1106 */
1107 ipath_free_all_qps(&dev->qp_table);
1108 kfree(dev->qp_table.table);
1109 kfree(dev->lk_table.table);
1110 ib_dealloc_device(ibdev);
1111}
1112
1113int __init ipath_verbs_init(void)
1114{
1115 return ipath_verbs_register(ipath_register_ib_device,
1116 ipath_unregister_ib_device,
1117 ipath_ib_piobufavail, ipath_ib_rcv,
1118 ipath_ib_timer);
1119}
1120
1121void __exit ipath_verbs_cleanup(void)
1122{
1123 ipath_verbs_unregister();
1124}
1125
1126static ssize_t show_rev(struct class_device *cdev, char *buf)
1127{
1128 struct ipath_ibdev *dev =
1129 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1130 int vendor, boardrev, majrev, minrev;
1131
1132 ipath_layer_query_device(dev->dd, &vendor, &boardrev,
1133 &majrev, &minrev);
1134 return sprintf(buf, "%d.%d\n", majrev, minrev);
1135}
1136
1137static ssize_t show_hca(struct class_device *cdev, char *buf)
1138{
1139 struct ipath_ibdev *dev =
1140 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1141 int ret;
1142
1143 ret = ipath_layer_get_boardname(dev->dd, buf, 128);
1144 if (ret < 0)
1145 goto bail;
1146 strcat(buf, "\n");
1147 ret = strlen(buf);
1148
1149bail:
1150 return ret;
1151}
1152
1153static ssize_t show_stats(struct class_device *cdev, char *buf)
1154{
1155 struct ipath_ibdev *dev =
1156 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1157 int i;
1158 int len;
1159
1160 len = sprintf(buf,
1161 "RC resends %d\n"
1162 "RC QACKs %d\n"
1163 "RC ACKs %d\n"
1164 "RC SEQ NAKs %d\n"
1165 "RC RDMA seq %d\n"
1166 "RC RNR NAKs %d\n"
1167 "RC OTH NAKs %d\n"
1168 "RC timeouts %d\n"
1169 "RC RDMA dup %d\n"
1170 "piobuf wait %d\n"
1171 "no piobuf %d\n"
1172 "PKT drops %d\n"
1173 "WQE errs %d\n",
1174 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
1175 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
1176 dev->n_other_naks, dev->n_timeouts,
1177 dev->n_rdma_dup_busy, dev->n_piowait,
1178 dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
1179 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
1180 const struct ipath_opcode_stats *si = &dev->opstats[i];
1181
1182 if (!si->n_packets && !si->n_bytes)
1183 continue;
1184 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
1185 (unsigned long long) si->n_packets,
1186 (unsigned long long) si->n_bytes);
1187 }
1188 return len;
1189}
1190
1191static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1192static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1193static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
1194static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
1195
1196static struct class_device_attribute *ipath_class_attributes[] = {
1197 &class_device_attr_hw_rev,
1198 &class_device_attr_hca_type,
1199 &class_device_attr_board_id,
1200 &class_device_attr_stats
1201};
1202
1203static int ipath_verbs_register_sysfs(struct ib_device *dev)
1204{
1205 int i;
1206 int ret;
1207
1208 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
1209 if (class_device_create_file(&dev->class_dev,
1210 ipath_class_attributes[i])) {
1211 ret = 1;
1212 goto bail;
1213 }
1214
1215 ret = 0;
1216
1217bail:
1218 return ret;
1219}
1220
1221module_init(ipath_verbs_init);
1222module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
new file mode 100644
index 000000000000..10b31d2c4f20
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -0,0 +1,333 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/list.h>
34#include <linux/rcupdate.h>
35
36#include "ipath_verbs.h"
37
38/*
39 * Global table of GID to attached QPs.
40 * The table is global to all ipath devices since a send from one QP/device
41 * needs to be locally routed to any locally attached QPs on the same
42 * or different device.
43 */
44static struct rb_root mcast_tree;
45static DEFINE_SPINLOCK(mcast_lock);
46
47/**
48 * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
49 * @qp: the QP to link
50 */
51static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
52{
53 struct ipath_mcast_qp *mqp;
54
55 mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
56 if (!mqp)
57 goto bail;
58
59 mqp->qp = qp;
60 atomic_inc(&qp->refcount);
61
62bail:
63 return mqp;
64}
65
66static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
67{
68 struct ipath_qp *qp = mqp->qp;
69
70 /* Notify ipath_destroy_qp() if it is waiting. */
71 if (atomic_dec_and_test(&qp->refcount))
72 wake_up(&qp->wait);
73
74 kfree(mqp);
75}
76
77/**
78 * ipath_mcast_alloc - allocate the multicast GID structure
79 * @mgid: the multicast GID
80 *
81 * A list of QPs will be attached to this structure.
82 */
83static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
84{
85 struct ipath_mcast *mcast;
86
87 mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
88 if (!mcast)
89 goto bail;
90
91 mcast->mgid = *mgid;
92 INIT_LIST_HEAD(&mcast->qp_list);
93 init_waitqueue_head(&mcast->wait);
94 atomic_set(&mcast->refcount, 0);
95
96bail:
97 return mcast;
98}
99
100static void ipath_mcast_free(struct ipath_mcast *mcast)
101{
102 struct ipath_mcast_qp *p, *tmp;
103
104 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
105 ipath_mcast_qp_free(p);
106
107 kfree(mcast);
108}
109
110/**
111 * ipath_mcast_find - search the global table for the given multicast GID
112 * @mgid: the multicast GID to search for
113 *
114 * Returns NULL if not found.
115 *
116 * The caller is responsible for decrementing the reference count if found.
117 */
118struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
119{
120 struct rb_node *n;
121 unsigned long flags;
122 struct ipath_mcast *mcast;
123
124 spin_lock_irqsave(&mcast_lock, flags);
125 n = mcast_tree.rb_node;
126 while (n) {
127 int ret;
128
129 mcast = rb_entry(n, struct ipath_mcast, rb_node);
130
131 ret = memcmp(mgid->raw, mcast->mgid.raw,
132 sizeof(union ib_gid));
133 if (ret < 0)
134 n = n->rb_left;
135 else if (ret > 0)
136 n = n->rb_right;
137 else {
138 atomic_inc(&mcast->refcount);
139 spin_unlock_irqrestore(&mcast_lock, flags);
140 goto bail;
141 }
142 }
143 spin_unlock_irqrestore(&mcast_lock, flags);
144
145 mcast = NULL;
146
147bail:
148 return mcast;
149}
150
151/**
152 * ipath_mcast_add - insert mcast GID into table and attach QP struct
153 * @mcast: the mcast GID table
154 * @mqp: the QP to attach
155 *
156 * Return zero if both were added. Return EEXIST if the GID was already in
157 * the table but the QP was added. Return ESRCH if the QP was already
158 * attached and neither structure was added.
159 */
160static int ipath_mcast_add(struct ipath_mcast *mcast,
161 struct ipath_mcast_qp *mqp)
162{
163 struct rb_node **n = &mcast_tree.rb_node;
164 struct rb_node *pn = NULL;
165 unsigned long flags;
166 int ret;
167
168 spin_lock_irqsave(&mcast_lock, flags);
169
170 while (*n) {
171 struct ipath_mcast *tmcast;
172 struct ipath_mcast_qp *p;
173
174 pn = *n;
175 tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
176
177 ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
178 sizeof(union ib_gid));
179 if (ret < 0) {
180 n = &pn->rb_left;
181 continue;
182 }
183 if (ret > 0) {
184 n = &pn->rb_right;
185 continue;
186 }
187
188 /* Search the QP list to see if this is already there. */
189 list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
190 if (p->qp == mqp->qp) {
191 spin_unlock_irqrestore(&mcast_lock, flags);
192 ret = ESRCH;
193 goto bail;
194 }
195 }
196 list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
197 spin_unlock_irqrestore(&mcast_lock, flags);
198 ret = EEXIST;
199 goto bail;
200 }
201
202 list_add_tail_rcu(&mqp->list, &mcast->qp_list);
203
204 atomic_inc(&mcast->refcount);
205 rb_link_node(&mcast->rb_node, pn, n);
206 rb_insert_color(&mcast->rb_node, &mcast_tree);
207
208 spin_unlock_irqrestore(&mcast_lock, flags);
209
210 ret = 0;
211
212bail:
213 return ret;
214}
215
216int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
217{
218 struct ipath_qp *qp = to_iqp(ibqp);
219 struct ipath_mcast *mcast;
220 struct ipath_mcast_qp *mqp;
221 int ret;
222
223 /*
224 * Allocate data structures since its better to do this outside of
225 * spin locks and it will most likely be needed.
226 */
227 mcast = ipath_mcast_alloc(gid);
228 if (mcast == NULL) {
229 ret = -ENOMEM;
230 goto bail;
231 }
232 mqp = ipath_mcast_qp_alloc(qp);
233 if (mqp == NULL) {
234 ipath_mcast_free(mcast);
235 ret = -ENOMEM;
236 goto bail;
237 }
238 switch (ipath_mcast_add(mcast, mqp)) {
239 case ESRCH:
240 /* Neither was used: can't attach the same QP twice. */
241 ipath_mcast_qp_free(mqp);
242 ipath_mcast_free(mcast);
243 ret = -EINVAL;
244 goto bail;
245 case EEXIST: /* The mcast wasn't used */
246 ipath_mcast_free(mcast);
247 break;
248 default:
249 break;
250 }
251
252 ret = 0;
253
254bail:
255 return ret;
256}
257
258int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
259{
260 struct ipath_qp *qp = to_iqp(ibqp);
261 struct ipath_mcast *mcast = NULL;
262 struct ipath_mcast_qp *p, *tmp;
263 struct rb_node *n;
264 unsigned long flags;
265 int last = 0;
266 int ret;
267
268 spin_lock_irqsave(&mcast_lock, flags);
269
270 /* Find the GID in the mcast table. */
271 n = mcast_tree.rb_node;
272 while (1) {
273 if (n == NULL) {
274 spin_unlock_irqrestore(&mcast_lock, flags);
275 ret = 0;
276 goto bail;
277 }
278
279 mcast = rb_entry(n, struct ipath_mcast, rb_node);
280 ret = memcmp(gid->raw, mcast->mgid.raw,
281 sizeof(union ib_gid));
282 if (ret < 0)
283 n = n->rb_left;
284 else if (ret > 0)
285 n = n->rb_right;
286 else
287 break;
288 }
289
290 /* Search the QP list. */
291 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
292 if (p->qp != qp)
293 continue;
294 /*
295 * We found it, so remove it, but don't poison the forward
296 * link until we are sure there are no list walkers.
297 */
298 list_del_rcu(&p->list);
299
300 /* If this was the last attached QP, remove the GID too. */
301 if (list_empty(&mcast->qp_list)) {
302 rb_erase(&mcast->rb_node, &mcast_tree);
303 last = 1;
304 }
305 break;
306 }
307
308 spin_unlock_irqrestore(&mcast_lock, flags);
309
310 if (p) {
311 /*
312 * Wait for any list walkers to finish before freeing the
313 * list element.
314 */
315 wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
316 ipath_mcast_qp_free(p);
317 }
318 if (last) {
319 atomic_dec(&mcast->refcount);
320 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
321 ipath_mcast_free(mcast);
322 }
323
324 ret = 0;
325
326bail:
327 return ret;
328}
329
330int ipath_mcast_tree_empty(void)
331{
332 return mcast_tree.rb_node == NULL;
333}