aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-03-29 18:23:37 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-31 16:14:21 -0500
commite28c00ad67164dba688c1d19c208c5fb554465f2 (patch)
treeb127fbb9799cebf2c1fe2fa08ad5a2e567033d84 /drivers/infiniband
parentcef1cce5c87d84f76e44f0e7b4de72ab3818ac3a (diff)
IB/ipath: misc infiniband code, part 2
Management datagram support, queue pairs, and reliable and unreliable connections. Signed-off-by: Bryan O'Sullivan <bos@pathscale.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c1352
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c913
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c552
3 files changed, 2817 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
new file mode 100644
index 000000000000..f7f8391fe43f
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -0,0 +1,1352 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_smi.h>
34
35#include "ipath_kernel.h"
36#include "ipath_verbs.h"
37#include "ips_common.h"
38
39#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
40#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
41#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C)
42#define IB_SMP_INVALID_FIELD __constant_htons(0x001C)
43
44static int reply(struct ib_smp *smp)
45{
46 /*
47 * The verbs framework will handle the directed/LID route
48 * packet changes.
49 */
50 smp->method = IB_MGMT_METHOD_GET_RESP;
51 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
52 smp->status |= IB_SMP_DIRECTION;
53 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
54}
55
56static int recv_subn_get_nodedescription(struct ib_smp *smp,
57 struct ib_device *ibdev)
58{
59 if (smp->attr_mod)
60 smp->status |= IB_SMP_INVALID_FIELD;
61
62 strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
63
64 return reply(smp);
65}
66
67struct nodeinfo {
68 u8 base_version;
69 u8 class_version;
70 u8 node_type;
71 u8 num_ports;
72 __be64 sys_guid;
73 __be64 node_guid;
74 __be64 port_guid;
75 __be16 partition_cap;
76 __be16 device_id;
77 __be32 revision;
78 u8 local_port_num;
79 u8 vendor_id[3];
80} __attribute__ ((packed));
81
82static int recv_subn_get_nodeinfo(struct ib_smp *smp,
83 struct ib_device *ibdev, u8 port)
84{
85 struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
86 struct ipath_devdata *dd = to_idev(ibdev)->dd;
87 u32 vendor, boardid, majrev, minrev;
88
89 if (smp->attr_mod)
90 smp->status |= IB_SMP_INVALID_FIELD;
91
92 nip->base_version = 1;
93 nip->class_version = 1;
94 nip->node_type = 1; /* channel adapter */
95 /*
96 * XXX The num_ports value will need a layer function to get
97 * the value if we ever have more than one IB port on a chip.
98 * We will also need to get the GUID for the port.
99 */
100 nip->num_ports = ibdev->phys_port_cnt;
101 /* This is already in network order */
102 nip->sys_guid = to_idev(ibdev)->sys_image_guid;
103 nip->node_guid = ipath_layer_get_guid(dd);
104 nip->port_guid = nip->sys_guid;
105 nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
106 nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
107 ipath_layer_query_device(dd, &vendor, &boardid, &majrev, &minrev);
108 nip->revision = cpu_to_be32((majrev << 16) | minrev);
109 nip->local_port_num = port;
110 nip->vendor_id[0] = 0;
111 nip->vendor_id[1] = vendor >> 8;
112 nip->vendor_id[2] = vendor;
113
114 return reply(smp);
115}
116
117static int recv_subn_get_guidinfo(struct ib_smp *smp,
118 struct ib_device *ibdev)
119{
120 u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
121 __be64 *p = (__be64 *) smp->data;
122
123 /* 32 blocks of 8 64-bit GUIDs per block */
124
125 memset(smp->data, 0, sizeof(smp->data));
126
127 /*
128 * We only support one GUID for now. If this changes, the
129 * portinfo.guid_cap field needs to be updated too.
130 */
131 if (startgx == 0)
132 /* The first is a copy of the read-only HW GUID. */
133 *p = ipath_layer_get_guid(to_idev(ibdev)->dd);
134 else
135 smp->status |= IB_SMP_INVALID_FIELD;
136
137 return reply(smp);
138}
139
140struct port_info {
141 __be64 mkey;
142 __be64 gid_prefix;
143 __be16 lid;
144 __be16 sm_lid;
145 __be32 cap_mask;
146 __be16 diag_code;
147 __be16 mkey_lease_period;
148 u8 local_port_num;
149 u8 link_width_enabled;
150 u8 link_width_supported;
151 u8 link_width_active;
152 u8 linkspeed_portstate; /* 4 bits, 4 bits */
153 u8 portphysstate_linkdown; /* 4 bits, 4 bits */
154 u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */
155 u8 linkspeedactive_enabled; /* 4 bits, 4 bits */
156 u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */
157 u8 vlcap_inittype; /* 4 bits, 4 bits */
158 u8 vl_high_limit;
159 u8 vl_arb_high_cap;
160 u8 vl_arb_low_cap;
161 u8 inittypereply_mtucap; /* 4 bits, 4 bits */
162 u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */
163 u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */
164 __be16 mkey_violations;
165 __be16 pkey_violations;
166 __be16 qkey_violations;
167 u8 guid_cap;
168 u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */
169 u8 resv_resptimevalue; /* 3 bits, 5 bits */
170 u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */
171 __be16 max_credit_hint;
172 u8 resv;
173 u8 link_roundtrip_latency[3];
174} __attribute__ ((packed));
175
176static int recv_subn_get_portinfo(struct ib_smp *smp,
177 struct ib_device *ibdev, u8 port)
178{
179 struct ipath_ibdev *dev;
180 struct port_info *pip = (struct port_info *)smp->data;
181 u16 lid;
182 u8 ibcstat;
183 u8 mtu;
184 int ret;
185
186 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
187 smp->status |= IB_SMP_INVALID_FIELD;
188 ret = reply(smp);
189 goto bail;
190 }
191
192 dev = to_idev(ibdev);
193
194 /* Clear all fields. Only set the non-zero fields. */
195 memset(smp->data, 0, sizeof(smp->data));
196
197 /* Only return the mkey if the protection field allows it. */
198 if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
199 (dev->mkeyprot_resv_lmc >> 6) == 0)
200 pip->mkey = dev->mkey;
201 pip->gid_prefix = dev->gid_prefix;
202 lid = ipath_layer_get_lid(dev->dd);
203 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
204 pip->sm_lid = cpu_to_be16(dev->sm_lid);
205 pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
206 /* pip->diag_code; */
207 pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
208 pip->local_port_num = port;
209 pip->link_width_enabled = dev->link_width_enabled;
210 pip->link_width_supported = 3; /* 1x or 4x */
211 pip->link_width_active = 2; /* 4x */
212 pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
213 ibcstat = ipath_layer_get_lastibcstat(dev->dd);
214 pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
215 pip->portphysstate_linkdown =
216 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
217 (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
218 pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
219 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
220 switch (ipath_layer_get_ibmtu(dev->dd)) {
221 case 4096:
222 mtu = IB_MTU_4096;
223 break;
224 case 2048:
225 mtu = IB_MTU_2048;
226 break;
227 case 1024:
228 mtu = IB_MTU_1024;
229 break;
230 case 512:
231 mtu = IB_MTU_512;
232 break;
233 case 256:
234 mtu = IB_MTU_256;
235 break;
236 default: /* oops, something is wrong */
237 mtu = IB_MTU_2048;
238 break;
239 }
240 pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
241 pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */
242 pip->vl_high_limit = dev->vl_high_limit;
243 /* pip->vl_arb_high_cap; // only one VL */
244 /* pip->vl_arb_low_cap; // only one VL */
245 /* InitTypeReply = 0 */
246 pip->inittypereply_mtucap = IB_MTU_4096;
247 // HCAs ignore VLStallCount and HOQLife
248 /* pip->vlstallcnt_hoqlife; */
249 pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
250 pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
251 /* P_KeyViolations are counted by hardware. */
252 pip->pkey_violations =
253 cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
254 dev->n_pkey_violations) & 0xFFFF);
255 pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
256 /* Only the hardware GUID is supported for now */
257 pip->guid_cap = 1;
258 pip->clientrereg_resv_subnetto = dev->subnet_timeout;
259 /* 32.768 usec. response time (guessing) */
260 pip->resv_resptimevalue = 3;
261 pip->localphyerrors_overrunerrors =
262 (ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
263 ipath_layer_get_overrunthreshold(dev->dd);
264 /* pip->max_credit_hint; */
265 /* pip->link_roundtrip_latency[3]; */
266
267 ret = reply(smp);
268
269bail:
270 return ret;
271}
272
273static int recv_subn_get_pkeytable(struct ib_smp *smp,
274 struct ib_device *ibdev)
275{
276 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
277 u16 *p = (u16 *) smp->data;
278 __be16 *q = (__be16 *) smp->data;
279
280 /* 64 blocks of 32 16-bit P_Key entries */
281
282 memset(smp->data, 0, sizeof(smp->data));
283 if (startpx == 0) {
284 struct ipath_ibdev *dev = to_idev(ibdev);
285 unsigned i, n = ipath_layer_get_npkeys(dev->dd);
286
287 ipath_layer_get_pkeys(dev->dd, p);
288
289 for (i = 0; i < n; i++)
290 q[i] = cpu_to_be16(p[i]);
291 } else
292 smp->status |= IB_SMP_INVALID_FIELD;
293
294 return reply(smp);
295}
296
297static int recv_subn_set_guidinfo(struct ib_smp *smp,
298 struct ib_device *ibdev)
299{
300 /* The only GUID we support is the first read-only entry. */
301 return recv_subn_get_guidinfo(smp, ibdev);
302}
303
304/**
305 * recv_subn_set_portinfo - set port information
306 * @smp: the incoming SM packet
307 * @ibdev: the infiniband device
308 * @port: the port on the device
309 *
310 * Set Portinfo (see ch. 14.2.5.6).
311 */
312static int recv_subn_set_portinfo(struct ib_smp *smp,
313 struct ib_device *ibdev, u8 port)
314{
315 struct port_info *pip = (struct port_info *)smp->data;
316 struct ib_event event;
317 struct ipath_ibdev *dev;
318 u32 flags;
319 char clientrereg = 0;
320 u16 lid, smlid;
321 u8 lwe;
322 u8 lse;
323 u8 state;
324 u16 lstate;
325 u32 mtu;
326 int ret;
327
328 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
329 goto err;
330
331 dev = to_idev(ibdev);
332 event.device = ibdev;
333 event.element.port_num = port;
334
335 dev->mkey = pip->mkey;
336 dev->gid_prefix = pip->gid_prefix;
337 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
338
339 lid = be16_to_cpu(pip->lid);
340 if (lid != ipath_layer_get_lid(dev->dd)) {
341 /* Must be a valid unicast LID address. */
342 if (lid == 0 || lid >= IPS_MULTICAST_LID_BASE)
343 goto err;
344 ipath_set_sps_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
345 event.event = IB_EVENT_LID_CHANGE;
346 ib_dispatch_event(&event);
347 }
348
349 smlid = be16_to_cpu(pip->sm_lid);
350 if (smlid != dev->sm_lid) {
351 /* Must be a valid unicast LID address. */
352 if (smlid == 0 || smlid >= IPS_MULTICAST_LID_BASE)
353 goto err;
354 dev->sm_lid = smlid;
355 event.event = IB_EVENT_SM_CHANGE;
356 ib_dispatch_event(&event);
357 }
358
359 /* Only 4x supported but allow 1x or 4x to be set (see 14.2.6.6). */
360 lwe = pip->link_width_enabled;
361 if ((lwe >= 4 && lwe <= 8) || (lwe >= 0xC && lwe <= 0xFE))
362 goto err;
363 if (lwe == 0xFF)
364 dev->link_width_enabled = 3; /* 1x or 4x */
365 else if (lwe)
366 dev->link_width_enabled = lwe;
367
368 /* Only 2.5 Gbs supported. */
369 lse = pip->linkspeedactive_enabled & 0xF;
370 if (lse >= 2 && lse <= 0xE)
371 goto err;
372
373 /* Set link down default state. */
374 switch (pip->portphysstate_linkdown & 0xF) {
375 case 0: /* NOP */
376 break;
377 case 1: /* SLEEP */
378 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
379 goto err;
380 break;
381 case 2: /* POLL */
382 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
383 goto err;
384 break;
385 default:
386 goto err;
387 }
388
389 dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc;
390 dev->vl_high_limit = pip->vl_high_limit;
391
392 switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
393 case IB_MTU_256:
394 mtu = 256;
395 break;
396 case IB_MTU_512:
397 mtu = 512;
398 break;
399 case IB_MTU_1024:
400 mtu = 1024;
401 break;
402 case IB_MTU_2048:
403 mtu = 2048;
404 break;
405 case IB_MTU_4096:
406 mtu = 4096;
407 break;
408 default:
409 /* XXX We have already partially updated our state! */
410 goto err;
411 }
412 ipath_layer_set_mtu(dev->dd, mtu);
413
414 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
415
416 /* We only support VL0 */
417 if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
418 goto err;
419
420 if (pip->mkey_violations == 0)
421 dev->mkey_violations = 0;
422
423 /*
424 * Hardware counter can't be reset so snapshot and subtract
425 * later.
426 */
427 if (pip->pkey_violations == 0)
428 dev->n_pkey_violations =
429 ipath_layer_get_cr_errpkey(dev->dd);
430
431 if (pip->qkey_violations == 0)
432 dev->qkey_violations = 0;
433
434 if (ipath_layer_set_phyerrthreshold(
435 dev->dd,
436 (pip->localphyerrors_overrunerrors >> 4) & 0xF))
437 goto err;
438
439 if (ipath_layer_set_overrunthreshold(
440 dev->dd,
441 (pip->localphyerrors_overrunerrors & 0xF)))
442 goto err;
443
444 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
445
446 if (pip->clientrereg_resv_subnetto & 0x80) {
447 clientrereg = 1;
448 event.event = IB_EVENT_LID_CHANGE;
449 ib_dispatch_event(&event);
450 }
451
452 /*
453 * Do the port state change now that the other link parameters
454 * have been set.
455 * Changing the port physical state only makes sense if the link
456 * is down or is being set to down.
457 */
458 state = pip->linkspeed_portstate & 0xF;
459 flags = ipath_layer_get_flags(dev->dd);
460 lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
461 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
462 goto err;
463
464 /*
465 * Only state changes of DOWN, ARM, and ACTIVE are valid
466 * and must be in the correct state to take effect (see 7.2.6).
467 */
468 switch (state) {
469 case IB_PORT_NOP:
470 if (lstate == 0)
471 break;
472 /* FALLTHROUGH */
473 case IB_PORT_DOWN:
474 if (lstate == 0)
475 if (ipath_layer_get_linkdowndefaultstate(dev->dd))
476 lstate = IPATH_IB_LINKDOWN_SLEEP;
477 else
478 lstate = IPATH_IB_LINKDOWN;
479 else if (lstate == 1)
480 lstate = IPATH_IB_LINKDOWN_SLEEP;
481 else if (lstate == 2)
482 lstate = IPATH_IB_LINKDOWN;
483 else if (lstate == 3)
484 lstate = IPATH_IB_LINKDOWN_DISABLE;
485 else
486 goto err;
487 ipath_layer_set_linkstate(dev->dd, lstate);
488 if (flags & IPATH_LINKACTIVE) {
489 event.event = IB_EVENT_PORT_ERR;
490 ib_dispatch_event(&event);
491 }
492 break;
493 case IB_PORT_ARMED:
494 if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
495 break;
496 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
497 if (flags & IPATH_LINKACTIVE) {
498 event.event = IB_EVENT_PORT_ERR;
499 ib_dispatch_event(&event);
500 }
501 break;
502 case IB_PORT_ACTIVE:
503 if (!(flags & IPATH_LINKARMED))
504 break;
505 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
506 event.event = IB_EVENT_PORT_ACTIVE;
507 ib_dispatch_event(&event);
508 break;
509 default:
510 /* XXX We have already partially updated our state! */
511 goto err;
512 }
513
514 ret = recv_subn_get_portinfo(smp, ibdev, port);
515
516 if (clientrereg)
517 pip->clientrereg_resv_subnetto |= 0x80;
518
519 goto done;
520
521err:
522 smp->status |= IB_SMP_INVALID_FIELD;
523 ret = recv_subn_get_portinfo(smp, ibdev, port);
524
525done:
526 return ret;
527}
528
529static int recv_subn_set_pkeytable(struct ib_smp *smp,
530 struct ib_device *ibdev)
531{
532 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
533 __be16 *p = (__be16 *) smp->data;
534 u16 *q = (u16 *) smp->data;
535 struct ipath_ibdev *dev = to_idev(ibdev);
536 unsigned i, n = ipath_layer_get_npkeys(dev->dd);
537
538 for (i = 0; i < n; i++)
539 q[i] = be16_to_cpu(p[i]);
540
541 if (startpx != 0 ||
542 ipath_layer_set_pkeys(dev->dd, q) != 0)
543 smp->status |= IB_SMP_INVALID_FIELD;
544
545 return recv_subn_get_pkeytable(smp, ibdev);
546}
547
548#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
549#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010)
550#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011)
551#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012)
552#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D)
553#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E)
554
555struct ib_perf {
556 u8 base_version;
557 u8 mgmt_class;
558 u8 class_version;
559 u8 method;
560 __be16 status;
561 __be16 unused;
562 __be64 tid;
563 __be16 attr_id;
564 __be16 resv;
565 __be32 attr_mod;
566 u8 reserved[40];
567 u8 data[192];
568} __attribute__ ((packed));
569
570struct ib_pma_classportinfo {
571 u8 base_version;
572 u8 class_version;
573 __be16 cap_mask;
574 u8 reserved[3];
575 u8 resp_time_value; /* only lower 5 bits */
576 union ib_gid redirect_gid;
577 __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
578 __be16 redirect_lid;
579 __be16 redirect_pkey;
580 __be32 redirect_qp; /* only lower 24 bits */
581 __be32 redirect_qkey;
582 union ib_gid trap_gid;
583 __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
584 __be16 trap_lid;
585 __be16 trap_pkey;
586 __be32 trap_hl_qp; /* 8, 24 bits respectively */
587 __be32 trap_qkey;
588} __attribute__ ((packed));
589
590struct ib_pma_portsamplescontrol {
591 u8 opcode;
592 u8 port_select;
593 u8 tick;
594 u8 counter_width; /* only lower 3 bits */
595 __be32 counter_mask0_9; /* 2, 10 * 3, bits */
596 __be16 counter_mask10_14; /* 1, 5 * 3, bits */
597 u8 sample_mechanisms;
598 u8 sample_status; /* only lower 2 bits */
599 __be64 option_mask;
600 __be64 vendor_mask;
601 __be32 sample_start;
602 __be32 sample_interval;
603 __be16 tag;
604 __be16 counter_select[15];
605} __attribute__ ((packed));
606
607struct ib_pma_portsamplesresult {
608 __be16 tag;
609 __be16 sample_status; /* only lower 2 bits */
610 __be32 counter[15];
611} __attribute__ ((packed));
612
613struct ib_pma_portsamplesresult_ext {
614 __be16 tag;
615 __be16 sample_status; /* only lower 2 bits */
616 __be32 extended_width; /* only upper 2 bits */
617 __be64 counter[15];
618} __attribute__ ((packed));
619
620struct ib_pma_portcounters {
621 u8 reserved;
622 u8 port_select;
623 __be16 counter_select;
624 __be16 symbol_error_counter;
625 u8 link_error_recovery_counter;
626 u8 link_downed_counter;
627 __be16 port_rcv_errors;
628 __be16 port_rcv_remphys_errors;
629 __be16 port_rcv_switch_relay_errors;
630 __be16 port_xmit_discards;
631 u8 port_xmit_constraint_errors;
632 u8 port_rcv_constraint_errors;
633 u8 reserved1;
634 u8 lli_ebor_errors; /* 4, 4, bits */
635 __be16 reserved2;
636 __be16 vl15_dropped;
637 __be32 port_xmit_data;
638 __be32 port_rcv_data;
639 __be32 port_xmit_packets;
640 __be32 port_rcv_packets;
641} __attribute__ ((packed));
642
643#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001)
644#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002)
645#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004)
646#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
647#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
648#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
649#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
650#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
651#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
652#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000)
653
654struct ib_pma_portcounters_ext {
655 u8 reserved;
656 u8 port_select;
657 __be16 counter_select;
658 __be32 reserved1;
659 __be64 port_xmit_data;
660 __be64 port_rcv_data;
661 __be64 port_xmit_packets;
662 __be64 port_rcv_packets;
663 __be64 port_unicast_xmit_packets;
664 __be64 port_unicast_rcv_packets;
665 __be64 port_multicast_xmit_packets;
666 __be64 port_multicast_rcv_packets;
667} __attribute__ ((packed));
668
669#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001)
670#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002)
671#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004)
672#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008)
673#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010)
674#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020)
675#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040)
676#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080)
677
678static int recv_pma_get_classportinfo(struct ib_perf *pmp)
679{
680 struct ib_pma_classportinfo *p =
681 (struct ib_pma_classportinfo *)pmp->data;
682
683 memset(pmp->data, 0, sizeof(pmp->data));
684
685 if (pmp->attr_mod != 0)
686 pmp->status |= IB_SMP_INVALID_FIELD;
687
688 /* Indicate AllPortSelect is valid (only one port anyway) */
689 p->cap_mask = __constant_cpu_to_be16(1 << 8);
690 p->base_version = 1;
691 p->class_version = 1;
692 /*
693 * Expected response time is 4.096 usec. * 2^18 == 1.073741824
694 * sec.
695 */
696 p->resp_time_value = 18;
697
698 return reply((struct ib_smp *) pmp);
699}
700
701/*
702 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
703 * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
704 * We support 5 counters which only count the mandatory quantities.
705 */
706#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
707#define COUNTER_MASK0_9 \
708 __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \
709 COUNTER_MASK(1, 1) | \
710 COUNTER_MASK(1, 2) | \
711 COUNTER_MASK(1, 3) | \
712 COUNTER_MASK(1, 4))
713
714static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
715 struct ib_device *ibdev, u8 port)
716{
717 struct ib_pma_portsamplescontrol *p =
718 (struct ib_pma_portsamplescontrol *)pmp->data;
719 struct ipath_ibdev *dev = to_idev(ibdev);
720 unsigned long flags;
721 u8 port_select = p->port_select;
722
723 memset(pmp->data, 0, sizeof(pmp->data));
724
725 p->port_select = port_select;
726 if (pmp->attr_mod != 0 ||
727 (port_select != port && port_select != 0xFF))
728 pmp->status |= IB_SMP_INVALID_FIELD;
729 /*
730 * Ticks are 10x the link transfer period which for 2.5Gbs is 4
731 * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
732 * intervals are counted in ticks. Since we use Linux timers, that
733 * count in jiffies, we can't sample for less than 1000 ticks if HZ
734 * == 1000 (4000 ticks if HZ is 250).
735 */
736 /* XXX This is WRONG. */
737 p->tick = 250; /* 1 usec. */
738 p->counter_width = 4; /* 32 bit counters */
739 p->counter_mask0_9 = COUNTER_MASK0_9;
740 spin_lock_irqsave(&dev->pending_lock, flags);
741 p->sample_status = dev->pma_sample_status;
742 p->sample_start = cpu_to_be32(dev->pma_sample_start);
743 p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
744 p->tag = cpu_to_be16(dev->pma_tag);
745 p->counter_select[0] = dev->pma_counter_select[0];
746 p->counter_select[1] = dev->pma_counter_select[1];
747 p->counter_select[2] = dev->pma_counter_select[2];
748 p->counter_select[3] = dev->pma_counter_select[3];
749 p->counter_select[4] = dev->pma_counter_select[4];
750 spin_unlock_irqrestore(&dev->pending_lock, flags);
751
752 return reply((struct ib_smp *) pmp);
753}
754
755static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
756 struct ib_device *ibdev, u8 port)
757{
758 struct ib_pma_portsamplescontrol *p =
759 (struct ib_pma_portsamplescontrol *)pmp->data;
760 struct ipath_ibdev *dev = to_idev(ibdev);
761 unsigned long flags;
762 u32 start;
763 int ret;
764
765 if (pmp->attr_mod != 0 ||
766 (p->port_select != port && p->port_select != 0xFF)) {
767 pmp->status |= IB_SMP_INVALID_FIELD;
768 ret = reply((struct ib_smp *) pmp);
769 goto bail;
770 }
771
772 start = be32_to_cpu(p->sample_start);
773 if (start != 0) {
774 spin_lock_irqsave(&dev->pending_lock, flags);
775 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) {
776 dev->pma_sample_status =
777 IB_PMA_SAMPLE_STATUS_STARTED;
778 dev->pma_sample_start = start;
779 dev->pma_sample_interval =
780 be32_to_cpu(p->sample_interval);
781 dev->pma_tag = be16_to_cpu(p->tag);
782 if (p->counter_select[0])
783 dev->pma_counter_select[0] =
784 p->counter_select[0];
785 if (p->counter_select[1])
786 dev->pma_counter_select[1] =
787 p->counter_select[1];
788 if (p->counter_select[2])
789 dev->pma_counter_select[2] =
790 p->counter_select[2];
791 if (p->counter_select[3])
792 dev->pma_counter_select[3] =
793 p->counter_select[3];
794 if (p->counter_select[4])
795 dev->pma_counter_select[4] =
796 p->counter_select[4];
797 }
798 spin_unlock_irqrestore(&dev->pending_lock, flags);
799 }
800 ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
801
802bail:
803 return ret;
804}
805
806static u64 get_counter(struct ipath_ibdev *dev, __be16 sel)
807{
808 u64 ret;
809
810 switch (sel) {
811 case IB_PMA_PORT_XMIT_DATA:
812 ret = dev->ipath_sword;
813 break;
814 case IB_PMA_PORT_RCV_DATA:
815 ret = dev->ipath_rword;
816 break;
817 case IB_PMA_PORT_XMIT_PKTS:
818 ret = dev->ipath_spkts;
819 break;
820 case IB_PMA_PORT_RCV_PKTS:
821 ret = dev->ipath_rpkts;
822 break;
823 case IB_PMA_PORT_XMIT_WAIT:
824 ret = dev->ipath_xmit_wait;
825 break;
826 default:
827 ret = 0;
828 }
829
830 return ret;
831}
832
833static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
834 struct ib_device *ibdev)
835{
836 struct ib_pma_portsamplesresult *p =
837 (struct ib_pma_portsamplesresult *)pmp->data;
838 struct ipath_ibdev *dev = to_idev(ibdev);
839 int i;
840
841 memset(pmp->data, 0, sizeof(pmp->data));
842 p->tag = cpu_to_be16(dev->pma_tag);
843 p->sample_status = cpu_to_be16(dev->pma_sample_status);
844 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
845 p->counter[i] = cpu_to_be32(
846 get_counter(dev, dev->pma_counter_select[i]));
847
848 return reply((struct ib_smp *) pmp);
849}
850
851static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
852 struct ib_device *ibdev)
853{
854 struct ib_pma_portsamplesresult_ext *p =
855 (struct ib_pma_portsamplesresult_ext *)pmp->data;
856 struct ipath_ibdev *dev = to_idev(ibdev);
857 int i;
858
859 memset(pmp->data, 0, sizeof(pmp->data));
860 p->tag = cpu_to_be16(dev->pma_tag);
861 p->sample_status = cpu_to_be16(dev->pma_sample_status);
862 /* 64 bits */
863 p->extended_width = __constant_cpu_to_be32(0x80000000);
864 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
865 p->counter[i] = cpu_to_be64(
866 get_counter(dev, dev->pma_counter_select[i]));
867
868 return reply((struct ib_smp *) pmp);
869}
870
871static int recv_pma_get_portcounters(struct ib_perf *pmp,
872 struct ib_device *ibdev, u8 port)
873{
874 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
875 pmp->data;
876 struct ipath_ibdev *dev = to_idev(ibdev);
877 struct ipath_layer_counters cntrs;
878 u8 port_select = p->port_select;
879
880 ipath_layer_get_counters(dev->dd, &cntrs);
881
882 /* Adjust counters for any resets done. */
883 cntrs.symbol_error_counter -= dev->n_symbol_error_counter;
884 cntrs.link_error_recovery_counter -=
885 dev->n_link_error_recovery_counter;
886 cntrs.link_downed_counter -= dev->n_link_downed_counter;
887 cntrs.port_rcv_errors += dev->rcv_errors;
888 cntrs.port_rcv_errors -= dev->n_port_rcv_errors;
889 cntrs.port_rcv_remphys_errors -= dev->n_port_rcv_remphys_errors;
890 cntrs.port_xmit_discards -= dev->n_port_xmit_discards;
891 cntrs.port_xmit_data -= dev->n_port_xmit_data;
892 cntrs.port_rcv_data -= dev->n_port_rcv_data;
893 cntrs.port_xmit_packets -= dev->n_port_xmit_packets;
894 cntrs.port_rcv_packets -= dev->n_port_rcv_packets;
895
896 memset(pmp->data, 0, sizeof(pmp->data));
897
898 p->port_select = port_select;
899 if (pmp->attr_mod != 0 ||
900 (port_select != port && port_select != 0xFF))
901 pmp->status |= IB_SMP_INVALID_FIELD;
902
903 if (cntrs.symbol_error_counter > 0xFFFFUL)
904 p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF);
905 else
906 p->symbol_error_counter =
907 cpu_to_be16((u16)cntrs.symbol_error_counter);
908 if (cntrs.link_error_recovery_counter > 0xFFUL)
909 p->link_error_recovery_counter = 0xFF;
910 else
911 p->link_error_recovery_counter =
912 (u8)cntrs.link_error_recovery_counter;
913 if (cntrs.link_downed_counter > 0xFFUL)
914 p->link_downed_counter = 0xFF;
915 else
916 p->link_downed_counter = (u8)cntrs.link_downed_counter;
917 if (cntrs.port_rcv_errors > 0xFFFFUL)
918 p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF);
919 else
920 p->port_rcv_errors =
921 cpu_to_be16((u16) cntrs.port_rcv_errors);
922 if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
923 p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF);
924 else
925 p->port_rcv_remphys_errors =
926 cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
927 if (cntrs.port_xmit_discards > 0xFFFFUL)
928 p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF);
929 else
930 p->port_xmit_discards =
931 cpu_to_be16((u16)cntrs.port_xmit_discards);
932 if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
933 p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
934 else
935 p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
936 if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
937 p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF);
938 else
939 p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
940 if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
941 p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF);
942 else
943 p->port_xmit_packets =
944 cpu_to_be32((u32)cntrs.port_xmit_packets);
945 if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
946 p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF);
947 else
948 p->port_rcv_packets =
949 cpu_to_be32((u32) cntrs.port_rcv_packets);
950
951 return reply((struct ib_smp *) pmp);
952}
953
954static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
955 struct ib_device *ibdev, u8 port)
956{
957 struct ib_pma_portcounters_ext *p =
958 (struct ib_pma_portcounters_ext *)pmp->data;
959 struct ipath_ibdev *dev = to_idev(ibdev);
960 u64 swords, rwords, spkts, rpkts, xwait;
961 u8 port_select = p->port_select;
962
963 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
964 &rpkts, &xwait);
965
966 /* Adjust counters for any resets done. */
967 swords -= dev->n_port_xmit_data;
968 rwords -= dev->n_port_rcv_data;
969 spkts -= dev->n_port_xmit_packets;
970 rpkts -= dev->n_port_rcv_packets;
971
972 memset(pmp->data, 0, sizeof(pmp->data));
973
974 p->port_select = port_select;
975 if (pmp->attr_mod != 0 ||
976 (port_select != port && port_select != 0xFF))
977 pmp->status |= IB_SMP_INVALID_FIELD;
978
979 p->port_xmit_data = cpu_to_be64(swords);
980 p->port_rcv_data = cpu_to_be64(rwords);
981 p->port_xmit_packets = cpu_to_be64(spkts);
982 p->port_rcv_packets = cpu_to_be64(rpkts);
983 p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
984 p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
985 p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
986 p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
987
988 return reply((struct ib_smp *) pmp);
989}
990
991static int recv_pma_set_portcounters(struct ib_perf *pmp,
992 struct ib_device *ibdev, u8 port)
993{
994 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
995 pmp->data;
996 struct ipath_ibdev *dev = to_idev(ibdev);
997 struct ipath_layer_counters cntrs;
998
999 /*
1000 * Since the HW doesn't support clearing counters, we save the
1001 * current count and subtract it from future responses.
1002 */
1003 ipath_layer_get_counters(dev->dd, &cntrs);
1004
1005 if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
1006 dev->n_symbol_error_counter = cntrs.symbol_error_counter;
1007
1008 if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
1009 dev->n_link_error_recovery_counter =
1010 cntrs.link_error_recovery_counter;
1011
1012 if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
1013 dev->n_link_downed_counter = cntrs.link_downed_counter;
1014
1015 if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
1016 dev->n_port_rcv_errors =
1017 cntrs.port_rcv_errors + dev->rcv_errors;
1018
1019 if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
1020 dev->n_port_rcv_remphys_errors =
1021 cntrs.port_rcv_remphys_errors;
1022
1023 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
1024 dev->n_port_xmit_discards = cntrs.port_xmit_discards;
1025
1026 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
1027 dev->n_port_xmit_data = cntrs.port_xmit_data;
1028
1029 if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
1030 dev->n_port_rcv_data = cntrs.port_rcv_data;
1031
1032 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
1033 dev->n_port_xmit_packets = cntrs.port_xmit_packets;
1034
1035 if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
1036 dev->n_port_rcv_packets = cntrs.port_rcv_packets;
1037
1038 return recv_pma_get_portcounters(pmp, ibdev, port);
1039}
1040
1041static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
1042 struct ib_device *ibdev, u8 port)
1043{
1044 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
1045 pmp->data;
1046 struct ipath_ibdev *dev = to_idev(ibdev);
1047 u64 swords, rwords, spkts, rpkts, xwait;
1048
1049 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
1050 &rpkts, &xwait);
1051
1052 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
1053 dev->n_port_xmit_data = swords;
1054
1055 if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
1056 dev->n_port_rcv_data = rwords;
1057
1058 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
1059 dev->n_port_xmit_packets = spkts;
1060
1061 if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
1062 dev->n_port_rcv_packets = rpkts;
1063
1064 if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
1065 dev->n_unicast_xmit = 0;
1066
1067 if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
1068 dev->n_unicast_rcv = 0;
1069
1070 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
1071 dev->n_multicast_xmit = 0;
1072
1073 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
1074 dev->n_multicast_rcv = 0;
1075
1076 return recv_pma_get_portcounters_ext(pmp, ibdev, port);
1077}
1078
1079static int process_subn(struct ib_device *ibdev, int mad_flags,
1080 u8 port_num, struct ib_mad *in_mad,
1081 struct ib_mad *out_mad)
1082{
1083 struct ib_smp *smp = (struct ib_smp *)out_mad;
1084 struct ipath_ibdev *dev = to_idev(ibdev);
1085 int ret;
1086
1087 *out_mad = *in_mad;
1088 if (smp->class_version != 1) {
1089 smp->status |= IB_SMP_UNSUP_VERSION;
1090 ret = reply(smp);
1091 goto bail;
1092 }
1093
1094 /* Is the mkey in the process of expiring? */
1095 if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
1096 /* Clear timeout and mkey protection field. */
1097 dev->mkey_lease_timeout = 0;
1098 dev->mkeyprot_resv_lmc &= 0x3F;
1099 }
1100
1101 /*
1102 * M_Key checking depends on
1103 * Portinfo:M_Key_protect_bits
1104 */
1105 if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
1106 dev->mkey != smp->mkey &&
1107 (smp->method == IB_MGMT_METHOD_SET ||
1108 (smp->method == IB_MGMT_METHOD_GET &&
1109 (dev->mkeyprot_resv_lmc >> 7) != 0))) {
1110 if (dev->mkey_violations != 0xFFFF)
1111 ++dev->mkey_violations;
1112 if (dev->mkey_lease_timeout ||
1113 dev->mkey_lease_period == 0) {
1114 ret = IB_MAD_RESULT_SUCCESS |
1115 IB_MAD_RESULT_CONSUMED;
1116 goto bail;
1117 }
1118 dev->mkey_lease_timeout = jiffies +
1119 dev->mkey_lease_period * HZ;
1120 /* Future: Generate a trap notice. */
1121 ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1122 goto bail;
1123 } else if (dev->mkey_lease_timeout)
1124 dev->mkey_lease_timeout = 0;
1125
1126 switch (smp->method) {
1127 case IB_MGMT_METHOD_GET:
1128 switch (smp->attr_id) {
1129 case IB_SMP_ATTR_NODE_DESC:
1130 ret = recv_subn_get_nodedescription(smp, ibdev);
1131 goto bail;
1132 case IB_SMP_ATTR_NODE_INFO:
1133 ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
1134 goto bail;
1135 case IB_SMP_ATTR_GUID_INFO:
1136 ret = recv_subn_get_guidinfo(smp, ibdev);
1137 goto bail;
1138 case IB_SMP_ATTR_PORT_INFO:
1139 ret = recv_subn_get_portinfo(smp, ibdev, port_num);
1140 goto bail;
1141 case IB_SMP_ATTR_PKEY_TABLE:
1142 ret = recv_subn_get_pkeytable(smp, ibdev);
1143 goto bail;
1144 case IB_SMP_ATTR_SM_INFO:
1145 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1146 ret = IB_MAD_RESULT_SUCCESS |
1147 IB_MAD_RESULT_CONSUMED;
1148 goto bail;
1149 }
1150 if (dev->port_cap_flags & IB_PORT_SM) {
1151 ret = IB_MAD_RESULT_SUCCESS;
1152 goto bail;
1153 }
1154 /* FALLTHROUGH */
1155 default:
1156 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1157 ret = reply(smp);
1158 goto bail;
1159 }
1160
1161 case IB_MGMT_METHOD_SET:
1162 switch (smp->attr_id) {
1163 case IB_SMP_ATTR_GUID_INFO:
1164 ret = recv_subn_set_guidinfo(smp, ibdev);
1165 goto bail;
1166 case IB_SMP_ATTR_PORT_INFO:
1167 ret = recv_subn_set_portinfo(smp, ibdev, port_num);
1168 goto bail;
1169 case IB_SMP_ATTR_PKEY_TABLE:
1170 ret = recv_subn_set_pkeytable(smp, ibdev);
1171 goto bail;
1172 case IB_SMP_ATTR_SM_INFO:
1173 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1174 ret = IB_MAD_RESULT_SUCCESS |
1175 IB_MAD_RESULT_CONSUMED;
1176 goto bail;
1177 }
1178 if (dev->port_cap_flags & IB_PORT_SM) {
1179 ret = IB_MAD_RESULT_SUCCESS;
1180 goto bail;
1181 }
1182 /* FALLTHROUGH */
1183 default:
1184 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1185 ret = reply(smp);
1186 goto bail;
1187 }
1188
1189 case IB_MGMT_METHOD_GET_RESP:
1190 /*
1191 * The ib_mad module will call us to process responses
1192 * before checking for other consumers.
1193 * Just tell the caller to process it normally.
1194 */
1195 ret = IB_MAD_RESULT_FAILURE;
1196 goto bail;
1197 default:
1198 smp->status |= IB_SMP_UNSUP_METHOD;
1199 ret = reply(smp);
1200 }
1201
1202bail:
1203 return ret;
1204}
1205
1206static int process_perf(struct ib_device *ibdev, u8 port_num,
1207 struct ib_mad *in_mad,
1208 struct ib_mad *out_mad)
1209{
1210 struct ib_perf *pmp = (struct ib_perf *)out_mad;
1211 int ret;
1212
1213 *out_mad = *in_mad;
1214 if (pmp->class_version != 1) {
1215 pmp->status |= IB_SMP_UNSUP_VERSION;
1216 ret = reply((struct ib_smp *) pmp);
1217 goto bail;
1218 }
1219
1220 switch (pmp->method) {
1221 case IB_MGMT_METHOD_GET:
1222 switch (pmp->attr_id) {
1223 case IB_PMA_CLASS_PORT_INFO:
1224 ret = recv_pma_get_classportinfo(pmp);
1225 goto bail;
1226 case IB_PMA_PORT_SAMPLES_CONTROL:
1227 ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
1228 port_num);
1229 goto bail;
1230 case IB_PMA_PORT_SAMPLES_RESULT:
1231 ret = recv_pma_get_portsamplesresult(pmp, ibdev);
1232 goto bail;
1233 case IB_PMA_PORT_SAMPLES_RESULT_EXT:
1234 ret = recv_pma_get_portsamplesresult_ext(pmp,
1235 ibdev);
1236 goto bail;
1237 case IB_PMA_PORT_COUNTERS:
1238 ret = recv_pma_get_portcounters(pmp, ibdev,
1239 port_num);
1240 goto bail;
1241 case IB_PMA_PORT_COUNTERS_EXT:
1242 ret = recv_pma_get_portcounters_ext(pmp, ibdev,
1243 port_num);
1244 goto bail;
1245 default:
1246 pmp->status |= IB_SMP_UNSUP_METH_ATTR;
1247 ret = reply((struct ib_smp *) pmp);
1248 goto bail;
1249 }
1250
1251 case IB_MGMT_METHOD_SET:
1252 switch (pmp->attr_id) {
1253 case IB_PMA_PORT_SAMPLES_CONTROL:
1254 ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
1255 port_num);
1256 goto bail;
1257 case IB_PMA_PORT_COUNTERS:
1258 ret = recv_pma_set_portcounters(pmp, ibdev,
1259 port_num);
1260 goto bail;
1261 case IB_PMA_PORT_COUNTERS_EXT:
1262 ret = recv_pma_set_portcounters_ext(pmp, ibdev,
1263 port_num);
1264 goto bail;
1265 default:
1266 pmp->status |= IB_SMP_UNSUP_METH_ATTR;
1267 ret = reply((struct ib_smp *) pmp);
1268 goto bail;
1269 }
1270
1271 case IB_MGMT_METHOD_GET_RESP:
1272 /*
1273 * The ib_mad module will call us to process responses
1274 * before checking for other consumers.
1275 * Just tell the caller to process it normally.
1276 */
1277 ret = IB_MAD_RESULT_FAILURE;
1278 goto bail;
1279 default:
1280 pmp->status |= IB_SMP_UNSUP_METHOD;
1281 ret = reply((struct ib_smp *) pmp);
1282 }
1283
1284bail:
1285 return ret;
1286}
1287
1288/**
1289 * ipath_process_mad - process an incoming MAD packet
1290 * @ibdev: the infiniband device this packet came in on
1291 * @mad_flags: MAD flags
1292 * @port_num: the port number this packet came in on
1293 * @in_wc: the work completion entry for this packet
1294 * @in_grh: the global route header for this packet
1295 * @in_mad: the incoming MAD
1296 * @out_mad: any outgoing MAD reply
1297 *
1298 * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
1299 * interested in processing.
1300 *
1301 * Note that the verbs framework has already done the MAD sanity checks,
1302 * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
1303 * MADs.
1304 *
1305 * This is called by the ib_mad module.
1306 */
1307int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
1308 struct ib_wc *in_wc, struct ib_grh *in_grh,
1309 struct ib_mad *in_mad, struct ib_mad *out_mad)
1310{
1311 struct ipath_ibdev *dev = to_idev(ibdev);
1312 int ret;
1313
1314 /*
1315 * Snapshot current HW counters to "clear" them.
1316 * This should be done when the driver is loaded except that for
1317 * some reason we get a zillion errors when brining up the link.
1318 */
1319 if (dev->rcv_errors == 0) {
1320 struct ipath_layer_counters cntrs;
1321
1322 ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs);
1323 dev->rcv_errors++;
1324 dev->n_symbol_error_counter = cntrs.symbol_error_counter;
1325 dev->n_link_error_recovery_counter =
1326 cntrs.link_error_recovery_counter;
1327 dev->n_link_downed_counter = cntrs.link_downed_counter;
1328 dev->n_port_rcv_errors = cntrs.port_rcv_errors + 1;
1329 dev->n_port_rcv_remphys_errors =
1330 cntrs.port_rcv_remphys_errors;
1331 dev->n_port_xmit_discards = cntrs.port_xmit_discards;
1332 dev->n_port_xmit_data = cntrs.port_xmit_data;
1333 dev->n_port_rcv_data = cntrs.port_rcv_data;
1334 dev->n_port_xmit_packets = cntrs.port_xmit_packets;
1335 dev->n_port_rcv_packets = cntrs.port_rcv_packets;
1336 }
1337 switch (in_mad->mad_hdr.mgmt_class) {
1338 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1339 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
1340 ret = process_subn(ibdev, mad_flags, port_num,
1341 in_mad, out_mad);
1342 goto bail;
1343 case IB_MGMT_CLASS_PERF_MGMT:
1344 ret = process_perf(ibdev, port_num, in_mad, out_mad);
1345 goto bail;
1346 default:
1347 ret = IB_MAD_RESULT_SUCCESS;
1348 }
1349
1350bail:
1351 return ret;
1352}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
new file mode 100644
index 000000000000..6058d70d7577
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -0,0 +1,913 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/err.h>
34#include <linux/vmalloc.h>
35
36#include "ipath_verbs.h"
37#include "ips_common.h"
38
39#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
40#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
41#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \
42 (off))
43#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
44 BITS_PER_PAGE, off)
45
46#define TRANS_INVALID 0
47#define TRANS_ANY2RST 1
48#define TRANS_RST2INIT 2
49#define TRANS_INIT2INIT 3
50#define TRANS_INIT2RTR 4
51#define TRANS_RTR2RTS 5
52#define TRANS_RTS2RTS 6
53#define TRANS_SQERR2RTS 7
54#define TRANS_ANY2ERR 8
55#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
56#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
57#define TRANS_SQD2RTS 11 /* error if not drained */
58
59/*
60 * Convert the AETH credit code into the number of credits.
61 */
62static u32 credit_table[31] = {
63 0, /* 0 */
64 1, /* 1 */
65 2, /* 2 */
66 3, /* 3 */
67 4, /* 4 */
68 6, /* 5 */
69 8, /* 6 */
70 12, /* 7 */
71 16, /* 8 */
72 24, /* 9 */
73 32, /* A */
74 48, /* B */
75 64, /* C */
76 96, /* D */
77 128, /* E */
78 192, /* F */
79 256, /* 10 */
80 384, /* 11 */
81 512, /* 12 */
82 768, /* 13 */
83 1024, /* 14 */
84 1536, /* 15 */
85 2048, /* 16 */
86 3072, /* 17 */
87 4096, /* 18 */
88 6144, /* 19 */
89 8192, /* 1A */
90 12288, /* 1B */
91 16384, /* 1C */
92 24576, /* 1D */
93 32768 /* 1E */
94};
95
96static u32 alloc_qpn(struct ipath_qp_table *qpt)
97{
98 u32 i, offset, max_scan, qpn;
99 struct qpn_map *map;
100 u32 ret;
101
102 qpn = qpt->last + 1;
103 if (qpn >= QPN_MAX)
104 qpn = 2;
105 offset = qpn & BITS_PER_PAGE_MASK;
106 map = &qpt->map[qpn / BITS_PER_PAGE];
107 max_scan = qpt->nmaps - !offset;
108 for (i = 0;;) {
109 if (unlikely(!map->page)) {
110 unsigned long page = get_zeroed_page(GFP_KERNEL);
111 unsigned long flags;
112
113 /*
114 * Free the page if someone raced with us
115 * installing it:
116 */
117 spin_lock_irqsave(&qpt->lock, flags);
118 if (map->page)
119 free_page(page);
120 else
121 map->page = (void *)page;
122 spin_unlock_irqrestore(&qpt->lock, flags);
123 if (unlikely(!map->page))
124 break;
125 }
126 if (likely(atomic_read(&map->n_free))) {
127 do {
128 if (!test_and_set_bit(offset, map->page)) {
129 atomic_dec(&map->n_free);
130 qpt->last = qpn;
131 ret = qpn;
132 goto bail;
133 }
134 offset = find_next_offset(map, offset);
135 qpn = mk_qpn(qpt, map, offset);
136 /*
137 * This test differs from alloc_pidmap().
138 * If find_next_offset() does find a zero
139 * bit, we don't need to check for QPN
140 * wrapping around past our starting QPN.
141 * We just need to be sure we don't loop
142 * forever.
143 */
144 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
145 }
146 /*
147 * In order to keep the number of pages allocated to a
148 * minimum, we scan the all existing pages before increasing
149 * the size of the bitmap table.
150 */
151 if (++i > max_scan) {
152 if (qpt->nmaps == QPNMAP_ENTRIES)
153 break;
154 map = &qpt->map[qpt->nmaps++];
155 offset = 0;
156 } else if (map < &qpt->map[qpt->nmaps]) {
157 ++map;
158 offset = 0;
159 } else {
160 map = &qpt->map[0];
161 offset = 2;
162 }
163 qpn = mk_qpn(qpt, map, offset);
164 }
165
166 ret = 0;
167
168bail:
169 return ret;
170}
171
172static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
173{
174 struct qpn_map *map;
175
176 map = qpt->map + qpn / BITS_PER_PAGE;
177 if (map->page)
178 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
179 atomic_inc(&map->n_free);
180}
181
182/**
183 * ipath_alloc_qpn - allocate a QP number
184 * @qpt: the QP table
185 * @qp: the QP
186 * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
187 *
188 * Allocate the next available QPN and put the QP into the hash table.
189 * The hash table holds a reference to the QP.
190 */
191int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
192 enum ib_qp_type type)
193{
194 unsigned long flags;
195 u32 qpn;
196 int ret;
197
198 if (type == IB_QPT_SMI)
199 qpn = 0;
200 else if (type == IB_QPT_GSI)
201 qpn = 1;
202 else {
203 /* Allocate the next available QPN */
204 qpn = alloc_qpn(qpt);
205 if (qpn == 0) {
206 ret = -ENOMEM;
207 goto bail;
208 }
209 }
210 qp->ibqp.qp_num = qpn;
211
212 /* Add the QP to the hash table. */
213 spin_lock_irqsave(&qpt->lock, flags);
214
215 qpn %= qpt->max;
216 qp->next = qpt->table[qpn];
217 qpt->table[qpn] = qp;
218 atomic_inc(&qp->refcount);
219
220 spin_unlock_irqrestore(&qpt->lock, flags);
221 ret = 0;
222
223bail:
224 return ret;
225}
226
227/**
228 * ipath_free_qp - remove a QP from the QP table
229 * @qpt: the QP table
230 * @qp: the QP to remove
231 *
232 * Remove the QP from the table so it can't be found asynchronously by
233 * the receive interrupt routine.
234 */
235void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
236{
237 struct ipath_qp *q, **qpp;
238 unsigned long flags;
239 int fnd = 0;
240
241 spin_lock_irqsave(&qpt->lock, flags);
242
243 /* Remove QP from the hash table. */
244 qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
245 for (; (q = *qpp) != NULL; qpp = &q->next) {
246 if (q == qp) {
247 *qpp = qp->next;
248 qp->next = NULL;
249 atomic_dec(&qp->refcount);
250 fnd = 1;
251 break;
252 }
253 }
254
255 spin_unlock_irqrestore(&qpt->lock, flags);
256
257 if (!fnd)
258 return;
259
260 /* If QPN is not reserved, mark QPN free in the bitmap. */
261 if (qp->ibqp.qp_num > 1)
262 free_qpn(qpt, qp->ibqp.qp_num);
263
264 wait_event(qp->wait, !atomic_read(&qp->refcount));
265}
266
267/**
268 * ipath_free_all_qps - remove all QPs from the table
269 * @qpt: the QP table to empty
270 */
271void ipath_free_all_qps(struct ipath_qp_table *qpt)
272{
273 unsigned long flags;
274 struct ipath_qp *qp, *nqp;
275 u32 n;
276
277 for (n = 0; n < qpt->max; n++) {
278 spin_lock_irqsave(&qpt->lock, flags);
279 qp = qpt->table[n];
280 qpt->table[n] = NULL;
281 spin_unlock_irqrestore(&qpt->lock, flags);
282
283 while (qp) {
284 nqp = qp->next;
285 if (qp->ibqp.qp_num > 1)
286 free_qpn(qpt, qp->ibqp.qp_num);
287 if (!atomic_dec_and_test(&qp->refcount) ||
288 !ipath_destroy_qp(&qp->ibqp))
289 _VERBS_INFO("QP memory leak!\n");
290 qp = nqp;
291 }
292 }
293
294 for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
295 if (qpt->map[n].page)
296 free_page((unsigned long)qpt->map[n].page);
297 }
298}
299
300/**
301 * ipath_lookup_qpn - return the QP with the given QPN
302 * @qpt: the QP table
303 * @qpn: the QP number to look up
304 *
305 * The caller is responsible for decrementing the QP reference count
306 * when done.
307 */
308struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
309{
310 unsigned long flags;
311 struct ipath_qp *qp;
312
313 spin_lock_irqsave(&qpt->lock, flags);
314
315 for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
316 if (qp->ibqp.qp_num == qpn) {
317 atomic_inc(&qp->refcount);
318 break;
319 }
320 }
321
322 spin_unlock_irqrestore(&qpt->lock, flags);
323 return qp;
324}
325
326/**
327 * ipath_reset_qp - initialize the QP state to the reset state
328 * @qp: the QP to reset
329 */
330static void ipath_reset_qp(struct ipath_qp *qp)
331{
332 qp->remote_qpn = 0;
333 qp->qkey = 0;
334 qp->qp_access_flags = 0;
335 qp->s_hdrwords = 0;
336 qp->s_psn = 0;
337 qp->r_psn = 0;
338 atomic_set(&qp->msn, 0);
339 if (qp->ibqp.qp_type == IB_QPT_RC) {
340 qp->s_state = IB_OPCODE_RC_SEND_LAST;
341 qp->r_state = IB_OPCODE_RC_SEND_LAST;
342 } else {
343 qp->s_state = IB_OPCODE_UC_SEND_LAST;
344 qp->r_state = IB_OPCODE_UC_SEND_LAST;
345 }
346 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
347 qp->s_nak_state = 0;
348 qp->s_rnr_timeout = 0;
349 qp->s_head = 0;
350 qp->s_tail = 0;
351 qp->s_cur = 0;
352 qp->s_last = 0;
353 qp->s_ssn = 1;
354 qp->s_lsn = 0;
355 qp->r_rq.head = 0;
356 qp->r_rq.tail = 0;
357 qp->r_reuse_sge = 0;
358}
359
360/**
361 * ipath_modify_qp - modify the attributes of a queue pair
362 * @ibqp: the queue pair who's attributes we're modifying
363 * @attr: the new attributes
364 * @attr_mask: the mask of attributes to modify
365 *
366 * Returns 0 on success, otherwise returns an errno.
367 */
368int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
369 int attr_mask)
370{
371 struct ipath_qp *qp = to_iqp(ibqp);
372 enum ib_qp_state cur_state, new_state;
373 unsigned long flags;
374 int ret;
375
376 spin_lock_irqsave(&qp->r_rq.lock, flags);
377 spin_lock(&qp->s_lock);
378
379 cur_state = attr_mask & IB_QP_CUR_STATE ?
380 attr->cur_qp_state : qp->state;
381 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
382
383 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
384 attr_mask))
385 goto inval;
386
387 switch (new_state) {
388 case IB_QPS_RESET:
389 ipath_reset_qp(qp);
390 break;
391
392 case IB_QPS_ERR:
393 ipath_error_qp(qp);
394 break;
395
396 default:
397 break;
398
399 }
400
401 if (attr_mask & IB_QP_PKEY_INDEX) {
402 struct ipath_ibdev *dev = to_idev(ibqp->device);
403
404 if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
405 goto inval;
406 qp->s_pkey_index = attr->pkey_index;
407 }
408
409 if (attr_mask & IB_QP_DEST_QPN)
410 qp->remote_qpn = attr->dest_qp_num;
411
412 if (attr_mask & IB_QP_SQ_PSN) {
413 qp->s_next_psn = attr->sq_psn;
414 qp->s_last_psn = qp->s_next_psn - 1;
415 }
416
417 if (attr_mask & IB_QP_RQ_PSN)
418 qp->r_psn = attr->rq_psn;
419
420 if (attr_mask & IB_QP_ACCESS_FLAGS)
421 qp->qp_access_flags = attr->qp_access_flags;
422
423 if (attr_mask & IB_QP_AV) {
424 if (attr->ah_attr.dlid == 0 ||
425 attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE)
426 goto inval;
427 qp->remote_ah_attr = attr->ah_attr;
428 }
429
430 if (attr_mask & IB_QP_PATH_MTU)
431 qp->path_mtu = attr->path_mtu;
432
433 if (attr_mask & IB_QP_RETRY_CNT)
434 qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
435
436 if (attr_mask & IB_QP_RNR_RETRY) {
437 qp->s_rnr_retry = attr->rnr_retry;
438 if (qp->s_rnr_retry > 7)
439 qp->s_rnr_retry = 7;
440 qp->s_rnr_retry_cnt = qp->s_rnr_retry;
441 }
442
443 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
444 if (attr->min_rnr_timer > 31)
445 goto inval;
446 qp->s_min_rnr_timer = attr->min_rnr_timer;
447 }
448
449 if (attr_mask & IB_QP_QKEY)
450 qp->qkey = attr->qkey;
451
452 if (attr_mask & IB_QP_PKEY_INDEX)
453 qp->s_pkey_index = attr->pkey_index;
454
455 qp->state = new_state;
456 spin_unlock(&qp->s_lock);
457 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
458
459 /*
460 * If QP1 changed to the RTS state, try to move to the link to INIT
461 * even if it was ACTIVE so the SM will reinitialize the SMA's
462 * state.
463 */
464 if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
465 struct ipath_ibdev *dev = to_idev(ibqp->device);
466
467 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
468 }
469 ret = 0;
470 goto bail;
471
472inval:
473 spin_unlock(&qp->s_lock);
474 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
475 ret = -EINVAL;
476
477bail:
478 return ret;
479}
480
481int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
482 int attr_mask, struct ib_qp_init_attr *init_attr)
483{
484 struct ipath_qp *qp = to_iqp(ibqp);
485
486 attr->qp_state = qp->state;
487 attr->cur_qp_state = attr->qp_state;
488 attr->path_mtu = qp->path_mtu;
489 attr->path_mig_state = 0;
490 attr->qkey = qp->qkey;
491 attr->rq_psn = qp->r_psn;
492 attr->sq_psn = qp->s_next_psn;
493 attr->dest_qp_num = qp->remote_qpn;
494 attr->qp_access_flags = qp->qp_access_flags;
495 attr->cap.max_send_wr = qp->s_size - 1;
496 attr->cap.max_recv_wr = qp->r_rq.size - 1;
497 attr->cap.max_send_sge = qp->s_max_sge;
498 attr->cap.max_recv_sge = qp->r_rq.max_sge;
499 attr->cap.max_inline_data = 0;
500 attr->ah_attr = qp->remote_ah_attr;
501 memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
502 attr->pkey_index = qp->s_pkey_index;
503 attr->alt_pkey_index = 0;
504 attr->en_sqd_async_notify = 0;
505 attr->sq_draining = 0;
506 attr->max_rd_atomic = 1;
507 attr->max_dest_rd_atomic = 1;
508 attr->min_rnr_timer = qp->s_min_rnr_timer;
509 attr->port_num = 1;
510 attr->timeout = 0;
511 attr->retry_cnt = qp->s_retry_cnt;
512 attr->rnr_retry = qp->s_rnr_retry;
513 attr->alt_port_num = 0;
514 attr->alt_timeout = 0;
515
516 init_attr->event_handler = qp->ibqp.event_handler;
517 init_attr->qp_context = qp->ibqp.qp_context;
518 init_attr->send_cq = qp->ibqp.send_cq;
519 init_attr->recv_cq = qp->ibqp.recv_cq;
520 init_attr->srq = qp->ibqp.srq;
521 init_attr->cap = attr->cap;
522 init_attr->sq_sig_type =
523 (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
524 ? IB_SIGNAL_REQ_WR : 0;
525 init_attr->qp_type = qp->ibqp.qp_type;
526 init_attr->port_num = 1;
527 return 0;
528}
529
530/**
531 * ipath_compute_aeth - compute the AETH (syndrome + MSN)
532 * @qp: the queue pair to compute the AETH for
533 *
534 * Returns the AETH.
535 *
536 * The QP s_lock should be held.
537 */
538__be32 ipath_compute_aeth(struct ipath_qp *qp)
539{
540 u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK;
541
542 if (qp->s_nak_state) {
543 aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
544 } else if (qp->ibqp.srq) {
545 /*
546 * Shared receive queues don't generate credits.
547 * Set the credit field to the invalid value.
548 */
549 aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT;
550 } else {
551 u32 min, max, x;
552 u32 credits;
553
554 /*
555 * Compute the number of credits available (RWQEs).
556 * XXX Not holding the r_rq.lock here so there is a small
557 * chance that the pair of reads are not atomic.
558 */
559 credits = qp->r_rq.head - qp->r_rq.tail;
560 if ((int)credits < 0)
561 credits += qp->r_rq.size;
562 /*
563 * Binary search the credit table to find the code to
564 * use.
565 */
566 min = 0;
567 max = 31;
568 for (;;) {
569 x = (min + max) / 2;
570 if (credit_table[x] == credits)
571 break;
572 if (credit_table[x] > credits)
573 max = x;
574 else if (min == x)
575 break;
576 else
577 min = x;
578 }
579 aeth |= x << IPS_AETH_CREDIT_SHIFT;
580 }
581 return cpu_to_be32(aeth);
582}
583
584/**
585 * ipath_create_qp - create a queue pair for a device
586 * @ibpd: the protection domain who's device we create the queue pair for
587 * @init_attr: the attributes of the queue pair
588 * @udata: unused by InfiniPath
589 *
590 * Returns the queue pair on success, otherwise returns an errno.
591 *
592 * Called by the ib_create_qp() core verbs function.
593 */
594struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
595 struct ib_qp_init_attr *init_attr,
596 struct ib_udata *udata)
597{
598 struct ipath_qp *qp;
599 int err;
600 struct ipath_swqe *swq = NULL;
601 struct ipath_ibdev *dev;
602 size_t sz;
603 struct ib_qp *ret;
604
605 if (init_attr->cap.max_send_sge > 255 ||
606 init_attr->cap.max_recv_sge > 255) {
607 ret = ERR_PTR(-ENOMEM);
608 goto bail;
609 }
610
611 switch (init_attr->qp_type) {
612 case IB_QPT_UC:
613 case IB_QPT_RC:
614 sz = sizeof(struct ipath_sge) *
615 init_attr->cap.max_send_sge +
616 sizeof(struct ipath_swqe);
617 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
618 if (swq == NULL) {
619 ret = ERR_PTR(-ENOMEM);
620 goto bail;
621 }
622 /* FALLTHROUGH */
623 case IB_QPT_UD:
624 case IB_QPT_SMI:
625 case IB_QPT_GSI:
626 qp = kmalloc(sizeof(*qp), GFP_KERNEL);
627 if (!qp) {
628 ret = ERR_PTR(-ENOMEM);
629 goto bail;
630 }
631 qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
632 sz = sizeof(struct ipath_sge) *
633 init_attr->cap.max_recv_sge +
634 sizeof(struct ipath_rwqe);
635 qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
636 if (!qp->r_rq.wq) {
637 kfree(qp);
638 ret = ERR_PTR(-ENOMEM);
639 goto bail;
640 }
641
642 /*
643 * ib_create_qp() will initialize qp->ibqp
644 * except for qp->ibqp.qp_num.
645 */
646 spin_lock_init(&qp->s_lock);
647 spin_lock_init(&qp->r_rq.lock);
648 atomic_set(&qp->refcount, 0);
649 init_waitqueue_head(&qp->wait);
650 tasklet_init(&qp->s_task,
651 init_attr->qp_type == IB_QPT_RC ?
652 ipath_do_rc_send : ipath_do_uc_send,
653 (unsigned long)qp);
654 qp->piowait.next = LIST_POISON1;
655 qp->piowait.prev = LIST_POISON2;
656 qp->timerwait.next = LIST_POISON1;
657 qp->timerwait.prev = LIST_POISON2;
658 qp->state = IB_QPS_RESET;
659 qp->s_wq = swq;
660 qp->s_size = init_attr->cap.max_send_wr + 1;
661 qp->s_max_sge = init_attr->cap.max_send_sge;
662 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
663 qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
664 1 << IPATH_S_SIGNAL_REQ_WR : 0;
665 dev = to_idev(ibpd->device);
666 err = ipath_alloc_qpn(&dev->qp_table, qp,
667 init_attr->qp_type);
668 if (err) {
669 vfree(swq);
670 vfree(qp->r_rq.wq);
671 kfree(qp);
672 ret = ERR_PTR(err);
673 goto bail;
674 }
675 ipath_reset_qp(qp);
676
677 /* Tell the core driver that the kernel SMA is present. */
678 if (qp->ibqp.qp_type == IB_QPT_SMI)
679 ipath_layer_set_verbs_flags(dev->dd,
680 IPATH_VERBS_KERNEL_SMA);
681 break;
682
683 default:
684 /* Don't support raw QPs */
685 ret = ERR_PTR(-ENOSYS);
686 goto bail;
687 }
688
689 init_attr->cap.max_inline_data = 0;
690
691 ret = &qp->ibqp;
692
693bail:
694 return ret;
695}
696
697/**
698 * ipath_destroy_qp - destroy a queue pair
699 * @ibqp: the queue pair to destroy
700 *
701 * Returns 0 on success.
702 *
703 * Note that this can be called while the QP is actively sending or
704 * receiving!
705 */
706int ipath_destroy_qp(struct ib_qp *ibqp)
707{
708 struct ipath_qp *qp = to_iqp(ibqp);
709 struct ipath_ibdev *dev = to_idev(ibqp->device);
710 unsigned long flags;
711
712 /* Tell the core driver that the kernel SMA is gone. */
713 if (qp->ibqp.qp_type == IB_QPT_SMI)
714 ipath_layer_set_verbs_flags(dev->dd, 0);
715
716 spin_lock_irqsave(&qp->r_rq.lock, flags);
717 spin_lock(&qp->s_lock);
718 qp->state = IB_QPS_ERR;
719 spin_unlock(&qp->s_lock);
720 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
721
722 /* Stop the sending tasklet. */
723 tasklet_kill(&qp->s_task);
724
725 /* Make sure the QP isn't on the timeout list. */
726 spin_lock_irqsave(&dev->pending_lock, flags);
727 if (qp->timerwait.next != LIST_POISON1)
728 list_del(&qp->timerwait);
729 if (qp->piowait.next != LIST_POISON1)
730 list_del(&qp->piowait);
731 spin_unlock_irqrestore(&dev->pending_lock, flags);
732
733 /*
734 * Make sure that the QP is not in the QPN table so receive
735 * interrupts will discard packets for this QP. XXX Also remove QP
736 * from multicast table.
737 */
738 if (atomic_read(&qp->refcount) != 0)
739 ipath_free_qp(&dev->qp_table, qp);
740
741 vfree(qp->s_wq);
742 vfree(qp->r_rq.wq);
743 kfree(qp);
744 return 0;
745}
746
747/**
748 * ipath_init_qp_table - initialize the QP table for a device
749 * @idev: the device who's QP table we're initializing
750 * @size: the size of the QP table
751 *
752 * Returns 0 on success, otherwise returns an errno.
753 */
754int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
755{
756 int i;
757 int ret;
758
759 idev->qp_table.last = 1; /* QPN 0 and 1 are special. */
760 idev->qp_table.max = size;
761 idev->qp_table.nmaps = 1;
762 idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table),
763 GFP_KERNEL);
764 if (idev->qp_table.table == NULL) {
765 ret = -ENOMEM;
766 goto bail;
767 }
768
769 for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
770 atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
771 idev->qp_table.map[i].page = NULL;
772 }
773
774 ret = 0;
775
776bail:
777 return ret;
778}
779
780/**
781 * ipath_sqerror_qp - put a QP's send queue into an error state
782 * @qp: QP who's send queue will be put into an error state
783 * @wc: the WC responsible for putting the QP in this state
784 *
785 * Flushes the send work queue.
786 * The QP s_lock should be held.
787 */
788
789void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
790{
791 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
792 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
793
794 _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
795 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
796
797 spin_lock(&dev->pending_lock);
798 /* XXX What if its already removed by the timeout code? */
799 if (qp->timerwait.next != LIST_POISON1)
800 list_del(&qp->timerwait);
801 if (qp->piowait.next != LIST_POISON1)
802 list_del(&qp->piowait);
803 spin_unlock(&dev->pending_lock);
804
805 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
806 if (++qp->s_last >= qp->s_size)
807 qp->s_last = 0;
808
809 wc->status = IB_WC_WR_FLUSH_ERR;
810
811 while (qp->s_last != qp->s_head) {
812 wc->wr_id = wqe->wr.wr_id;
813 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
814 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
815 if (++qp->s_last >= qp->s_size)
816 qp->s_last = 0;
817 wqe = get_swqe_ptr(qp, qp->s_last);
818 }
819 qp->s_cur = qp->s_tail = qp->s_head;
820 qp->state = IB_QPS_SQE;
821}
822
823/**
824 * ipath_error_qp - put a QP into an error state
825 * @qp: the QP to put into an error state
826 *
827 * Flushes both send and receive work queues.
828 * QP r_rq.lock and s_lock should be held.
829 */
830
831void ipath_error_qp(struct ipath_qp *qp)
832{
833 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
834 struct ib_wc wc;
835
836 _VERBS_INFO("QP%d/%d in error state\n",
837 qp->ibqp.qp_num, qp->remote_qpn);
838
839 spin_lock(&dev->pending_lock);
840 /* XXX What if its already removed by the timeout code? */
841 if (qp->timerwait.next != LIST_POISON1)
842 list_del(&qp->timerwait);
843 if (qp->piowait.next != LIST_POISON1)
844 list_del(&qp->piowait);
845 spin_unlock(&dev->pending_lock);
846
847 wc.status = IB_WC_WR_FLUSH_ERR;
848 wc.vendor_err = 0;
849 wc.byte_len = 0;
850 wc.imm_data = 0;
851 wc.qp_num = qp->ibqp.qp_num;
852 wc.src_qp = 0;
853 wc.wc_flags = 0;
854 wc.pkey_index = 0;
855 wc.slid = 0;
856 wc.sl = 0;
857 wc.dlid_path_bits = 0;
858 wc.port_num = 0;
859
860 while (qp->s_last != qp->s_head) {
861 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
862
863 wc.wr_id = wqe->wr.wr_id;
864 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
865 if (++qp->s_last >= qp->s_size)
866 qp->s_last = 0;
867 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
868 }
869 qp->s_cur = qp->s_tail = qp->s_head;
870 qp->s_hdrwords = 0;
871 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
872
873 wc.opcode = IB_WC_RECV;
874 while (qp->r_rq.tail != qp->r_rq.head) {
875 wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
876 if (++qp->r_rq.tail >= qp->r_rq.size)
877 qp->r_rq.tail = 0;
878 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
879 }
880}
881
882/**
883 * ipath_get_credit - flush the send work queue of a QP
884 * @qp: the qp who's send work queue to flush
885 * @aeth: the Acknowledge Extended Transport Header
886 *
887 * The QP s_lock should be held.
888 */
889void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
890{
891 u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK;
892
893 /*
894 * If the credit is invalid, we can send
895 * as many packets as we like. Otherwise, we have to
896 * honor the credit field.
897 */
898 if (credit == IPS_AETH_CREDIT_INVAL) {
899 qp->s_lsn = (u32) -1;
900 } else if (qp->s_lsn != (u32) -1) {
901 /* Compute new LSN (i.e., MSN + credit) */
902 credit = (aeth + credit_table[credit]) & IPS_MSN_MASK;
903 if (ipath_cmp24(credit, qp->s_lsn) > 0)
904 qp->s_lsn = credit;
905 }
906
907 /* Restart sending if it was blocked due to lack of credits. */
908 if (qp->s_cur != qp->s_head &&
909 (qp->s_lsn == (u32) -1 ||
910 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
911 qp->s_lsn + 1) <= 0))
912 tasklet_hi_schedule(&qp->s_task);
913}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
new file mode 100644
index 000000000000..f232e77b78ee
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -0,0 +1,552 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ipath_verbs.h"
34
35/*
36 * Convert the AETH RNR timeout code into the number of milliseconds.
37 */
38const u32 ib_ipath_rnr_table[32] = {
39 656, /* 0 */
40 1, /* 1 */
41 1, /* 2 */
42 1, /* 3 */
43 1, /* 4 */
44 1, /* 5 */
45 1, /* 6 */
46 1, /* 7 */
47 1, /* 8 */
48 1, /* 9 */
49 1, /* A */
50 1, /* B */
51 1, /* C */
52 1, /* D */
53 2, /* E */
54 2, /* F */
55 3, /* 10 */
56 4, /* 11 */
57 6, /* 12 */
58 8, /* 13 */
59 11, /* 14 */
60 16, /* 15 */
61 21, /* 16 */
62 31, /* 17 */
63 41, /* 18 */
64 62, /* 19 */
65 82, /* 1A */
66 123, /* 1B */
67 164, /* 1C */
68 246, /* 1D */
69 328, /* 1E */
70 492 /* 1F */
71};
72
73/**
74 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
75 * @qp: the QP
76 *
77 * XXX Use a simple list for now. We might need a priority
78 * queue if we have lots of QPs waiting for RNR timeouts
79 * but that should be rare.
80 */
81void ipath_insert_rnr_queue(struct ipath_qp *qp)
82{
83 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
84 unsigned long flags;
85
86 spin_lock_irqsave(&dev->pending_lock, flags);
87 if (list_empty(&dev->rnrwait))
88 list_add(&qp->timerwait, &dev->rnrwait);
89 else {
90 struct list_head *l = &dev->rnrwait;
91 struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
92 timerwait);
93
94 while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
95 qp->s_rnr_timeout -= nqp->s_rnr_timeout;
96 l = l->next;
97 if (l->next == &dev->rnrwait)
98 break;
99 nqp = list_entry(l->next, struct ipath_qp,
100 timerwait);
101 }
102 list_add(&qp->timerwait, l);
103 }
104 spin_unlock_irqrestore(&dev->pending_lock, flags);
105}
106
107/**
108 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
109 * @qp: the QP
110 * @wr_id_only: update wr_id only, not SGEs
111 *
112 * Return 0 if no RWQE is available, otherwise return 1.
113 *
114 * Called at interrupt level with the QP r_rq.lock held.
115 */
116int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
117{
118 struct ipath_rq *rq;
119 struct ipath_srq *srq;
120 struct ipath_rwqe *wqe;
121 int ret;
122
123 if (!qp->ibqp.srq) {
124 rq = &qp->r_rq;
125 if (unlikely(rq->tail == rq->head)) {
126 ret = 0;
127 goto bail;
128 }
129 wqe = get_rwqe_ptr(rq, rq->tail);
130 qp->r_wr_id = wqe->wr_id;
131 if (!wr_id_only) {
132 qp->r_sge.sge = wqe->sg_list[0];
133 qp->r_sge.sg_list = wqe->sg_list + 1;
134 qp->r_sge.num_sge = wqe->num_sge;
135 qp->r_len = wqe->length;
136 }
137 if (++rq->tail >= rq->size)
138 rq->tail = 0;
139 ret = 1;
140 goto bail;
141 }
142
143 srq = to_isrq(qp->ibqp.srq);
144 rq = &srq->rq;
145 spin_lock(&rq->lock);
146 if (unlikely(rq->tail == rq->head)) {
147 spin_unlock(&rq->lock);
148 ret = 0;
149 goto bail;
150 }
151 wqe = get_rwqe_ptr(rq, rq->tail);
152 qp->r_wr_id = wqe->wr_id;
153 if (!wr_id_only) {
154 qp->r_sge.sge = wqe->sg_list[0];
155 qp->r_sge.sg_list = wqe->sg_list + 1;
156 qp->r_sge.num_sge = wqe->num_sge;
157 qp->r_len = wqe->length;
158 }
159 if (++rq->tail >= rq->size)
160 rq->tail = 0;
161 if (srq->ibsrq.event_handler) {
162 struct ib_event ev;
163 u32 n;
164
165 if (rq->head < rq->tail)
166 n = rq->size + rq->head - rq->tail;
167 else
168 n = rq->head - rq->tail;
169 if (n < srq->limit) {
170 srq->limit = 0;
171 spin_unlock(&rq->lock);
172 ev.device = qp->ibqp.device;
173 ev.element.srq = qp->ibqp.srq;
174 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
175 srq->ibsrq.event_handler(&ev,
176 srq->ibsrq.srq_context);
177 } else
178 spin_unlock(&rq->lock);
179 } else
180 spin_unlock(&rq->lock);
181 ret = 1;
182
183bail:
184 return ret;
185}
186
187/**
188 * ipath_ruc_loopback - handle UC and RC lookback requests
189 * @sqp: the loopback QP
190 * @wc: the work completion entry
191 *
192 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
193 * forward a WQE addressed to the same HCA.
194 * Note that although we are single threaded due to the tasklet, we still
195 * have to protect against post_send(). We don't have to worry about
196 * receive interrupts since this is a connected protocol and all packets
197 * will pass through here.
198 */
199void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc)
200{
201 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
202 struct ipath_qp *qp;
203 struct ipath_swqe *wqe;
204 struct ipath_sge *sge;
205 unsigned long flags;
206 u64 sdata;
207
208 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
209 if (!qp) {
210 dev->n_pkt_drops++;
211 return;
212 }
213
214again:
215 spin_lock_irqsave(&sqp->s_lock, flags);
216
217 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
218 spin_unlock_irqrestore(&sqp->s_lock, flags);
219 goto done;
220 }
221
222 /* Get the next send request. */
223 if (sqp->s_last == sqp->s_head) {
224 /* Send work queue is empty. */
225 spin_unlock_irqrestore(&sqp->s_lock, flags);
226 goto done;
227 }
228
229 /*
230 * We can rely on the entry not changing without the s_lock
231 * being held until we update s_last.
232 */
233 wqe = get_swqe_ptr(sqp, sqp->s_last);
234 spin_unlock_irqrestore(&sqp->s_lock, flags);
235
236 wc->wc_flags = 0;
237 wc->imm_data = 0;
238
239 sqp->s_sge.sge = wqe->sg_list[0];
240 sqp->s_sge.sg_list = wqe->sg_list + 1;
241 sqp->s_sge.num_sge = wqe->wr.num_sge;
242 sqp->s_len = wqe->length;
243 switch (wqe->wr.opcode) {
244 case IB_WR_SEND_WITH_IMM:
245 wc->wc_flags = IB_WC_WITH_IMM;
246 wc->imm_data = wqe->wr.imm_data;
247 /* FALLTHROUGH */
248 case IB_WR_SEND:
249 spin_lock_irqsave(&qp->r_rq.lock, flags);
250 if (!ipath_get_rwqe(qp, 0)) {
251 rnr_nak:
252 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
253 /* Handle RNR NAK */
254 if (qp->ibqp.qp_type == IB_QPT_UC)
255 goto send_comp;
256 if (sqp->s_rnr_retry == 0) {
257 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
258 goto err;
259 }
260 if (sqp->s_rnr_retry_cnt < 7)
261 sqp->s_rnr_retry--;
262 dev->n_rnr_naks++;
263 sqp->s_rnr_timeout =
264 ib_ipath_rnr_table[sqp->s_min_rnr_timer];
265 ipath_insert_rnr_queue(sqp);
266 goto done;
267 }
268 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
269 break;
270
271 case IB_WR_RDMA_WRITE_WITH_IMM:
272 wc->wc_flags = IB_WC_WITH_IMM;
273 wc->imm_data = wqe->wr.imm_data;
274 spin_lock_irqsave(&qp->r_rq.lock, flags);
275 if (!ipath_get_rwqe(qp, 1))
276 goto rnr_nak;
277 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
278 /* FALLTHROUGH */
279 case IB_WR_RDMA_WRITE:
280 if (wqe->length == 0)
281 break;
282 if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
283 wqe->wr.wr.rdma.remote_addr,
284 wqe->wr.wr.rdma.rkey,
285 IB_ACCESS_REMOTE_WRITE))) {
286 acc_err:
287 wc->status = IB_WC_REM_ACCESS_ERR;
288 err:
289 wc->wr_id = wqe->wr.wr_id;
290 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
291 wc->vendor_err = 0;
292 wc->byte_len = 0;
293 wc->qp_num = sqp->ibqp.qp_num;
294 wc->src_qp = sqp->remote_qpn;
295 wc->pkey_index = 0;
296 wc->slid = sqp->remote_ah_attr.dlid;
297 wc->sl = sqp->remote_ah_attr.sl;
298 wc->dlid_path_bits = 0;
299 wc->port_num = 0;
300 ipath_sqerror_qp(sqp, wc);
301 goto done;
302 }
303 break;
304
305 case IB_WR_RDMA_READ:
306 if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
307 wqe->wr.wr.rdma.remote_addr,
308 wqe->wr.wr.rdma.rkey,
309 IB_ACCESS_REMOTE_READ)))
310 goto acc_err;
311 if (unlikely(!(qp->qp_access_flags &
312 IB_ACCESS_REMOTE_READ)))
313 goto acc_err;
314 qp->r_sge.sge = wqe->sg_list[0];
315 qp->r_sge.sg_list = wqe->sg_list + 1;
316 qp->r_sge.num_sge = wqe->wr.num_sge;
317 break;
318
319 case IB_WR_ATOMIC_CMP_AND_SWP:
320 case IB_WR_ATOMIC_FETCH_AND_ADD:
321 if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
322 wqe->wr.wr.rdma.remote_addr,
323 wqe->wr.wr.rdma.rkey,
324 IB_ACCESS_REMOTE_ATOMIC)))
325 goto acc_err;
326 /* Perform atomic OP and save result. */
327 sdata = wqe->wr.wr.atomic.swap;
328 spin_lock_irqsave(&dev->pending_lock, flags);
329 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
330 if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
331 *(u64 *) qp->r_sge.sge.vaddr =
332 qp->r_atomic_data + sdata;
333 else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
334 *(u64 *) qp->r_sge.sge.vaddr = sdata;
335 spin_unlock_irqrestore(&dev->pending_lock, flags);
336 *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
337 goto send_comp;
338
339 default:
340 goto done;
341 }
342
343 sge = &sqp->s_sge.sge;
344 while (sqp->s_len) {
345 u32 len = sqp->s_len;
346
347 if (len > sge->length)
348 len = sge->length;
349 BUG_ON(len == 0);
350 ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
351 sge->vaddr += len;
352 sge->length -= len;
353 sge->sge_length -= len;
354 if (sge->sge_length == 0) {
355 if (--sqp->s_sge.num_sge)
356 *sge = *sqp->s_sge.sg_list++;
357 } else if (sge->length == 0 && sge->mr != NULL) {
358 if (++sge->n >= IPATH_SEGSZ) {
359 if (++sge->m >= sge->mr->mapsz)
360 break;
361 sge->n = 0;
362 }
363 sge->vaddr =
364 sge->mr->map[sge->m]->segs[sge->n].vaddr;
365 sge->length =
366 sge->mr->map[sge->m]->segs[sge->n].length;
367 }
368 sqp->s_len -= len;
369 }
370
371 if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
372 wqe->wr.opcode == IB_WR_RDMA_READ)
373 goto send_comp;
374
375 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
376 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
377 else
378 wc->opcode = IB_WC_RECV;
379 wc->wr_id = qp->r_wr_id;
380 wc->status = IB_WC_SUCCESS;
381 wc->vendor_err = 0;
382 wc->byte_len = wqe->length;
383 wc->qp_num = qp->ibqp.qp_num;
384 wc->src_qp = qp->remote_qpn;
385 /* XXX do we know which pkey matched? Only needed for GSI. */
386 wc->pkey_index = 0;
387 wc->slid = qp->remote_ah_attr.dlid;
388 wc->sl = qp->remote_ah_attr.sl;
389 wc->dlid_path_bits = 0;
390 /* Signal completion event if the solicited bit is set. */
391 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
392 wqe->wr.send_flags & IB_SEND_SOLICITED);
393
394send_comp:
395 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
396
397 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
398 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
399 wc->wr_id = wqe->wr.wr_id;
400 wc->status = IB_WC_SUCCESS;
401 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
402 wc->vendor_err = 0;
403 wc->byte_len = wqe->length;
404 wc->qp_num = sqp->ibqp.qp_num;
405 wc->src_qp = 0;
406 wc->pkey_index = 0;
407 wc->slid = 0;
408 wc->sl = 0;
409 wc->dlid_path_bits = 0;
410 wc->port_num = 0;
411 ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0);
412 }
413
414 /* Update s_last now that we are finished with the SWQE */
415 spin_lock_irqsave(&sqp->s_lock, flags);
416 if (++sqp->s_last >= sqp->s_size)
417 sqp->s_last = 0;
418 spin_unlock_irqrestore(&sqp->s_lock, flags);
419 goto again;
420
421done:
422 if (atomic_dec_and_test(&qp->refcount))
423 wake_up(&qp->wait);
424}
425
426/**
427 * ipath_no_bufs_available - tell the layer driver we need buffers
428 * @qp: the QP that caused the problem
429 * @dev: the device we ran out of buffers on
430 *
431 * Called when we run out of PIO buffers.
432 */
433void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
434{
435 unsigned long flags;
436
437 spin_lock_irqsave(&dev->pending_lock, flags);
438 if (qp->piowait.next == LIST_POISON1)
439 list_add_tail(&qp->piowait, &dev->piowait);
440 spin_unlock_irqrestore(&dev->pending_lock, flags);
441 /*
442 * Note that as soon as ipath_layer_want_buffer() is called and
443 * possibly before it returns, ipath_ib_piobufavail()
444 * could be called. If we are still in the tasklet function,
445 * tasklet_hi_schedule() will not call us until the next time
446 * tasklet_hi_schedule() is called.
447 * We clear the tasklet flag now since we are committing to return
448 * from the tasklet function.
449 */
450 clear_bit(IPATH_S_BUSY, &qp->s_flags);
451 tasklet_unlock(&qp->s_task);
452 ipath_layer_want_buffer(dev->dd);
453 dev->n_piowait++;
454}
455
456/**
457 * ipath_post_rc_send - post RC and UC sends
458 * @qp: the QP to post on
459 * @wr: the work request to send
460 */
461int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
462{
463 struct ipath_swqe *wqe;
464 unsigned long flags;
465 u32 next;
466 int i, j;
467 int acc;
468 int ret;
469
470 /*
471 * Don't allow RDMA reads or atomic operations on UC or
472 * undefined operations.
473 * Make sure buffer is large enough to hold the result for atomics.
474 */
475 if (qp->ibqp.qp_type == IB_QPT_UC) {
476 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
477 ret = -EINVAL;
478 goto bail;
479 }
480 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
481 ret = -EINVAL;
482 goto bail;
483 } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
484 (wr->num_sge == 0 ||
485 wr->sg_list[0].length < sizeof(u64) ||
486 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
487 ret = -EINVAL;
488 goto bail;
489 }
490 /* IB spec says that num_sge == 0 is OK. */
491 if (wr->num_sge > qp->s_max_sge) {
492 ret = -ENOMEM;
493 goto bail;
494 }
495 spin_lock_irqsave(&qp->s_lock, flags);
496 next = qp->s_head + 1;
497 if (next >= qp->s_size)
498 next = 0;
499 if (next == qp->s_last) {
500 spin_unlock_irqrestore(&qp->s_lock, flags);
501 ret = -EINVAL;
502 goto bail;
503 }
504
505 wqe = get_swqe_ptr(qp, qp->s_head);
506 wqe->wr = *wr;
507 wqe->ssn = qp->s_ssn++;
508 wqe->sg_list[0].mr = NULL;
509 wqe->sg_list[0].vaddr = NULL;
510 wqe->sg_list[0].length = 0;
511 wqe->sg_list[0].sge_length = 0;
512 wqe->length = 0;
513 acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
514 for (i = 0, j = 0; i < wr->num_sge; i++) {
515 if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
516 spin_unlock_irqrestore(&qp->s_lock, flags);
517 ret = -EINVAL;
518 goto bail;
519 }
520 if (wr->sg_list[i].length == 0)
521 continue;
522 if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
523 &wqe->sg_list[j], &wr->sg_list[i],
524 acc)) {
525 spin_unlock_irqrestore(&qp->s_lock, flags);
526 ret = -EINVAL;
527 goto bail;
528 }
529 wqe->length += wr->sg_list[i].length;
530 j++;
531 }
532 wqe->wr.num_sge = j;
533 qp->s_head = next;
534 /*
535 * Wake up the send tasklet if the QP is not waiting
536 * for an RNR timeout.
537 */
538 next = qp->s_rnr_timeout;
539 spin_unlock_irqrestore(&qp->s_lock, flags);
540
541 if (next == 0) {
542 if (qp->ibqp.qp_type == IB_QPT_UC)
543 ipath_do_uc_send((unsigned long) qp);
544 else
545 ipath_do_rc_send((unsigned long) qp);
546 }
547
548 ret = 0;
549
550bail:
551 return ret;
552}