aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /drivers/infiniband
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig14
-rw-r--r--drivers/infiniband/Makefile3
-rw-r--r--drivers/infiniband/core/Makefile12
-rw-r--r--drivers/infiniband/core/agent.c373
-rw-r--r--drivers/infiniband/core/agent.h55
-rw-r--r--drivers/infiniband/core/agent_priv.h63
-rw-r--r--drivers/infiniband/core/cache.c365
-rw-r--r--drivers/infiniband/core/core_priv.h52
-rw-r--r--drivers/infiniband/core/device.c614
-rw-r--r--drivers/infiniband/core/fmr_pool.c507
-rw-r--r--drivers/infiniband/core/mad.c2714
-rw-r--r--drivers/infiniband/core/mad_priv.h199
-rw-r--r--drivers/infiniband/core/packer.c201
-rw-r--r--drivers/infiniband/core/sa_query.c866
-rw-r--r--drivers/infiniband/core/smi.c234
-rw-r--r--drivers/infiniband/core/smi.h67
-rw-r--r--drivers/infiniband/core/sysfs.c762
-rw-r--r--drivers/infiniband/core/ud_header.c365
-rw-r--r--drivers/infiniband/core/user_mad.c840
-rw-r--r--drivers/infiniband/core/verbs.c434
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig16
-rw-r--r--drivers/infiniband/hw/mthca/Makefile12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c179
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c241
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1767
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h310
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h51
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c918
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h437
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h95
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c964
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c1123
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c376
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c465
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h161
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c416
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c80
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c266
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h58
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c660
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h251
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2056
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c232
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c78
-rw-r--r--drivers/infiniband/include/ib_cache.h103
-rw-r--r--drivers/infiniband/include/ib_fmr_pool.h92
-rw-r--r--drivers/infiniband/include/ib_mad.h404
-rw-r--r--drivers/infiniband/include/ib_pack.h245
-rw-r--r--drivers/infiniband/include/ib_sa.h308
-rw-r--r--drivers/infiniband/include/ib_smi.h96
-rw-r--r--drivers/infiniband/include/ib_user_mad.h123
-rw-r--r--drivers/infiniband/include/ib_verbs.h1252
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig33
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h353
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c287
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c668
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c1103
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c991
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c260
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c177
62 files changed, 26781 insertions, 0 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
new file mode 100644
index 000000000000..3cc3ff0cccb1
--- /dev/null
+++ b/drivers/infiniband/Kconfig
@@ -0,0 +1,14 @@
1menu "InfiniBand support"
2
3config INFINIBAND
4 tristate "InfiniBand support"
5 ---help---
6 Core support for InfiniBand (IB). Make sure to also select
7 any protocols you wish to use as well as drivers for your
8 InfiniBand hardware.
9
10source "drivers/infiniband/hw/mthca/Kconfig"
11
12source "drivers/infiniband/ulp/ipoib/Kconfig"
13
14endmenu
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
new file mode 100644
index 000000000000..d256cf798218
--- /dev/null
+++ b/drivers/infiniband/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_INFINIBAND) += core/
2obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
3obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
new file mode 100644
index 000000000000..d2dbfb52c0a3
--- /dev/null
+++ b/drivers/infiniband/core/Makefile
@@ -0,0 +1,12 @@
1EXTRA_CFLAGS += -Idrivers/infiniband/include
2
3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
4
5ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
6 device.o fmr_pool.o cache.o
7
8ib_mad-y := mad.o smi.o agent.o
9
10ib_sa-y := sa_query.o
11
12ib_umad-y := user_mad.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
new file mode 100644
index 000000000000..7aee5ebf3f01
--- /dev/null
+++ b/drivers/infiniband/core/agent.c
@@ -0,0 +1,373 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#include <linux/dma-mapping.h>
40
41#include <asm/bug.h>
42
43#include <ib_smi.h>
44
45#include "smi.h"
46#include "agent_priv.h"
47#include "mad_priv.h"
48#include "agent.h"
49
50spinlock_t ib_agent_port_list_lock;
51static LIST_HEAD(ib_agent_port_list);
52
53/*
54 * Caller must hold ib_agent_port_list_lock
55 */
56static inline struct ib_agent_port_private *
57__ib_get_agent_port(struct ib_device *device, int port_num,
58 struct ib_mad_agent *mad_agent)
59{
60 struct ib_agent_port_private *entry;
61
62 BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */
63
64 if (device) {
65 list_for_each_entry(entry, &ib_agent_port_list, port_list) {
66 if (entry->smp_agent->device == device &&
67 entry->port_num == port_num)
68 return entry;
69 }
70 } else {
71 list_for_each_entry(entry, &ib_agent_port_list, port_list) {
72 if ((entry->smp_agent == mad_agent) ||
73 (entry->perf_mgmt_agent == mad_agent))
74 return entry;
75 }
76 }
77 return NULL;
78}
79
80static inline struct ib_agent_port_private *
81ib_get_agent_port(struct ib_device *device, int port_num,
82 struct ib_mad_agent *mad_agent)
83{
84 struct ib_agent_port_private *entry;
85 unsigned long flags;
86
87 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
88 entry = __ib_get_agent_port(device, port_num, mad_agent);
89 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
90
91 return entry;
92}
93
94int smi_check_local_dr_smp(struct ib_smp *smp,
95 struct ib_device *device,
96 int port_num)
97{
98 struct ib_agent_port_private *port_priv;
99
100 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
101 return 1;
102 port_priv = ib_get_agent_port(device, port_num, NULL);
103 if (!port_priv) {
104 printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
105 "not open\n",
106 device->name, port_num);
107 return 1;
108 }
109
110 return smi_check_local_smp(port_priv->smp_agent, smp);
111}
112
113static int agent_mad_send(struct ib_mad_agent *mad_agent,
114 struct ib_agent_port_private *port_priv,
115 struct ib_mad_private *mad_priv,
116 struct ib_grh *grh,
117 struct ib_wc *wc)
118{
119 struct ib_agent_send_wr *agent_send_wr;
120 struct ib_sge gather_list;
121 struct ib_send_wr send_wr;
122 struct ib_send_wr *bad_send_wr;
123 struct ib_ah_attr ah_attr;
124 unsigned long flags;
125 int ret = 1;
126
127 agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
128 if (!agent_send_wr)
129 goto out;
130 agent_send_wr->mad = mad_priv;
131
132 /* PCI mapping */
133 gather_list.addr = dma_map_single(mad_agent->device->dma_device,
134 &mad_priv->mad,
135 sizeof(mad_priv->mad),
136 DMA_TO_DEVICE);
137 gather_list.length = sizeof(mad_priv->mad);
138 gather_list.lkey = (*port_priv->mr).lkey;
139
140 send_wr.next = NULL;
141 send_wr.opcode = IB_WR_SEND;
142 send_wr.sg_list = &gather_list;
143 send_wr.num_sge = 1;
144 send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
145 send_wr.wr.ud.timeout_ms = 0;
146 send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
147
148 ah_attr.dlid = wc->slid;
149 ah_attr.port_num = mad_agent->port_num;
150 ah_attr.src_path_bits = wc->dlid_path_bits;
151 ah_attr.sl = wc->sl;
152 ah_attr.static_rate = 0;
153 ah_attr.ah_flags = 0; /* No GRH */
154 if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
155 if (wc->wc_flags & IB_WC_GRH) {
156 ah_attr.ah_flags = IB_AH_GRH;
157 /* Should sgid be looked up ? */
158 ah_attr.grh.sgid_index = 0;
159 ah_attr.grh.hop_limit = grh->hop_limit;
160 ah_attr.grh.flow_label = be32_to_cpup(
161 &grh->version_tclass_flow) & 0xfffff;
162 ah_attr.grh.traffic_class = (be32_to_cpup(
163 &grh->version_tclass_flow) >> 20) & 0xff;
164 memcpy(ah_attr.grh.dgid.raw,
165 grh->sgid.raw,
166 sizeof(ah_attr.grh.dgid));
167 }
168 }
169
170 agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
171 if (IS_ERR(agent_send_wr->ah)) {
172 printk(KERN_ERR SPFX "No memory for address handle\n");
173 kfree(agent_send_wr);
174 goto out;
175 }
176
177 send_wr.wr.ud.ah = agent_send_wr->ah;
178 if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
179 send_wr.wr.ud.pkey_index = wc->pkey_index;
180 send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
181 } else { /* for SMPs */
182 send_wr.wr.ud.pkey_index = 0;
183 send_wr.wr.ud.remote_qkey = 0;
184 }
185 send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
186 send_wr.wr_id = (unsigned long)agent_send_wr;
187
188 pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
189
190 /* Send */
191 spin_lock_irqsave(&port_priv->send_list_lock, flags);
192 if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
193 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
194 dma_unmap_single(mad_agent->device->dma_device,
195 pci_unmap_addr(agent_send_wr, mapping),
196 sizeof(mad_priv->mad),
197 DMA_TO_DEVICE);
198 ib_destroy_ah(agent_send_wr->ah);
199 kfree(agent_send_wr);
200 } else {
201 list_add_tail(&agent_send_wr->send_list,
202 &port_priv->send_posted_list);
203 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
204 ret = 0;
205 }
206
207out:
208 return ret;
209}
210
211int agent_send(struct ib_mad_private *mad,
212 struct ib_grh *grh,
213 struct ib_wc *wc,
214 struct ib_device *device,
215 int port_num)
216{
217 struct ib_agent_port_private *port_priv;
218 struct ib_mad_agent *mad_agent;
219
220 port_priv = ib_get_agent_port(device, port_num, NULL);
221 if (!port_priv) {
222 printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
223 device->name, port_num);
224 return 1;
225 }
226
227 /* Get mad agent based on mgmt_class in MAD */
228 switch (mad->mad.mad.mad_hdr.mgmt_class) {
229 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
230 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
231 mad_agent = port_priv->smp_agent;
232 break;
233 case IB_MGMT_CLASS_PERF_MGMT:
234 mad_agent = port_priv->perf_mgmt_agent;
235 break;
236 default:
237 return 1;
238 }
239
240 return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
241}
242
243static void agent_send_handler(struct ib_mad_agent *mad_agent,
244 struct ib_mad_send_wc *mad_send_wc)
245{
246 struct ib_agent_port_private *port_priv;
247 struct ib_agent_send_wr *agent_send_wr;
248 unsigned long flags;
249
250 /* Find matching MAD agent */
251 port_priv = ib_get_agent_port(NULL, 0, mad_agent);
252 if (!port_priv) {
253 printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
254 "agent %p\n", mad_agent);
255 return;
256 }
257
258 agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
259 spin_lock_irqsave(&port_priv->send_list_lock, flags);
260 /* Remove completed send from posted send MAD list */
261 list_del(&agent_send_wr->send_list);
262 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
263
264 /* Unmap PCI */
265 dma_unmap_single(mad_agent->device->dma_device,
266 pci_unmap_addr(agent_send_wr, mapping),
267 sizeof(agent_send_wr->mad->mad),
268 DMA_TO_DEVICE);
269
270 ib_destroy_ah(agent_send_wr->ah);
271
272 /* Release allocated memory */
273 kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
274 kfree(agent_send_wr);
275}
276
277int ib_agent_port_open(struct ib_device *device, int port_num)
278{
279 int ret;
280 struct ib_agent_port_private *port_priv;
281 unsigned long flags;
282
283 /* First, check if port already open for SMI */
284 port_priv = ib_get_agent_port(device, port_num, NULL);
285 if (port_priv) {
286 printk(KERN_DEBUG SPFX "%s port %d already open\n",
287 device->name, port_num);
288 return 0;
289 }
290
291 /* Create new device info */
292 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
293 if (!port_priv) {
294 printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
295 ret = -ENOMEM;
296 goto error1;
297 }
298
299 memset(port_priv, 0, sizeof *port_priv);
300 port_priv->port_num = port_num;
301 spin_lock_init(&port_priv->send_list_lock);
302 INIT_LIST_HEAD(&port_priv->send_posted_list);
303
304 /* Obtain send only MAD agent for SM class (SMI QP) */
305 port_priv->smp_agent = ib_register_mad_agent(device, port_num,
306 IB_QPT_SMI,
307 NULL, 0,
308 &agent_send_handler,
309 NULL, NULL);
310
311 if (IS_ERR(port_priv->smp_agent)) {
312 ret = PTR_ERR(port_priv->smp_agent);
313 goto error2;
314 }
315
316 /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
317 port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
318 IB_QPT_GSI,
319 NULL, 0,
320 &agent_send_handler,
321 NULL, NULL);
322 if (IS_ERR(port_priv->perf_mgmt_agent)) {
323 ret = PTR_ERR(port_priv->perf_mgmt_agent);
324 goto error3;
325 }
326
327 port_priv->mr = ib_get_dma_mr(port_priv->smp_agent->qp->pd,
328 IB_ACCESS_LOCAL_WRITE);
329 if (IS_ERR(port_priv->mr)) {
330 printk(KERN_ERR SPFX "Couldn't get DMA MR\n");
331 ret = PTR_ERR(port_priv->mr);
332 goto error4;
333 }
334
335 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
336 list_add_tail(&port_priv->port_list, &ib_agent_port_list);
337 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
338
339 return 0;
340
341error4:
342 ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
343error3:
344 ib_unregister_mad_agent(port_priv->smp_agent);
345error2:
346 kfree(port_priv);
347error1:
348 return ret;
349}
350
351int ib_agent_port_close(struct ib_device *device, int port_num)
352{
353 struct ib_agent_port_private *port_priv;
354 unsigned long flags;
355
356 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
357 port_priv = __ib_get_agent_port(device, port_num, NULL);
358 if (port_priv == NULL) {
359 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
360 printk(KERN_ERR SPFX "Port %d not found\n", port_num);
361 return -ENODEV;
362 }
363 list_del(&port_priv->port_list);
364 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
365
366 ib_dereg_mr(port_priv->mr);
367
368 ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
369 ib_unregister_mad_agent(port_priv->smp_agent);
370 kfree(port_priv);
371
372 return 0;
373}
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
new file mode 100644
index 000000000000..d9426842254a
--- /dev/null
+++ b/drivers/infiniband/core/agent.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#ifndef __AGENT_H_
40#define __AGENT_H_
41
42extern spinlock_t ib_agent_port_list_lock;
43
44extern int ib_agent_port_open(struct ib_device *device,
45 int port_num);
46
47extern int ib_agent_port_close(struct ib_device *device, int port_num);
48
49extern int agent_send(struct ib_mad_private *mad,
50 struct ib_grh *grh,
51 struct ib_wc *wc,
52 struct ib_device *device,
53 int port_num);
54
55#endif /* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
new file mode 100644
index 000000000000..17a0cce5813c
--- /dev/null
+++ b/drivers/infiniband/core/agent_priv.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#ifndef __IB_AGENT_PRIV_H__
40#define __IB_AGENT_PRIV_H__
41
42#include <linux/pci.h>
43
44#define SPFX "ib_agent: "
45
46struct ib_agent_send_wr {
47 struct list_head send_list;
48 struct ib_ah *ah;
49 struct ib_mad_private *mad;
50 DECLARE_PCI_UNMAP_ADDR(mapping)
51};
52
53struct ib_agent_port_private {
54 struct list_head port_list;
55 struct list_head send_posted_list;
56 spinlock_t send_list_lock;
57 int port_num;
58 struct ib_mad_agent *smp_agent; /* SM class */
59 struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
60 struct ib_mr *mr;
61};
62
63#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
new file mode 100644
index 000000000000..3042360c97e1
--- /dev/null
+++ b/drivers/infiniband/core/cache.c
@@ -0,0 +1,365 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/version.h>
36#include <linux/module.h>
37#include <linux/errno.h>
38#include <linux/slab.h>
39
40#include <ib_cache.h>
41
42#include "core_priv.h"
43
44struct ib_pkey_cache {
45 int table_len;
46 u16 table[0];
47};
48
49struct ib_gid_cache {
50 int table_len;
51 union ib_gid table[0];
52};
53
54struct ib_update_work {
55 struct work_struct work;
56 struct ib_device *device;
57 u8 port_num;
58};
59
60static inline int start_port(struct ib_device *device)
61{
62 return device->node_type == IB_NODE_SWITCH ? 0 : 1;
63}
64
65static inline int end_port(struct ib_device *device)
66{
67 return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
68}
69
70int ib_get_cached_gid(struct ib_device *device,
71 u8 port_num,
72 int index,
73 union ib_gid *gid)
74{
75 struct ib_gid_cache *cache;
76 unsigned long flags;
77 int ret = 0;
78
79 if (port_num < start_port(device) || port_num > end_port(device))
80 return -EINVAL;
81
82 read_lock_irqsave(&device->cache.lock, flags);
83
84 cache = device->cache.gid_cache[port_num - start_port(device)];
85
86 if (index < 0 || index >= cache->table_len)
87 ret = -EINVAL;
88 else
89 *gid = cache->table[index];
90
91 read_unlock_irqrestore(&device->cache.lock, flags);
92
93 return ret;
94}
95EXPORT_SYMBOL(ib_get_cached_gid);
96
97int ib_find_cached_gid(struct ib_device *device,
98 union ib_gid *gid,
99 u8 *port_num,
100 u16 *index)
101{
102 struct ib_gid_cache *cache;
103 unsigned long flags;
104 int p, i;
105 int ret = -ENOENT;
106
107 *port_num = -1;
108 if (index)
109 *index = -1;
110
111 read_lock_irqsave(&device->cache.lock, flags);
112
113 for (p = 0; p <= end_port(device) - start_port(device); ++p) {
114 cache = device->cache.gid_cache[p];
115 for (i = 0; i < cache->table_len; ++i) {
116 if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
117 *port_num = p + start_port(device);
118 if (index)
119 *index = i;
120 ret = 0;
121 goto found;
122 }
123 }
124 }
125found:
126 read_unlock_irqrestore(&device->cache.lock, flags);
127
128 return ret;
129}
130EXPORT_SYMBOL(ib_find_cached_gid);
131
132int ib_get_cached_pkey(struct ib_device *device,
133 u8 port_num,
134 int index,
135 u16 *pkey)
136{
137 struct ib_pkey_cache *cache;
138 unsigned long flags;
139 int ret = 0;
140
141 if (port_num < start_port(device) || port_num > end_port(device))
142 return -EINVAL;
143
144 read_lock_irqsave(&device->cache.lock, flags);
145
146 cache = device->cache.pkey_cache[port_num - start_port(device)];
147
148 if (index < 0 || index >= cache->table_len)
149 ret = -EINVAL;
150 else
151 *pkey = cache->table[index];
152
153 read_unlock_irqrestore(&device->cache.lock, flags);
154
155 return ret;
156}
157EXPORT_SYMBOL(ib_get_cached_pkey);
158
159int ib_find_cached_pkey(struct ib_device *device,
160 u8 port_num,
161 u16 pkey,
162 u16 *index)
163{
164 struct ib_pkey_cache *cache;
165 unsigned long flags;
166 int i;
167 int ret = -ENOENT;
168
169 if (port_num < start_port(device) || port_num > end_port(device))
170 return -EINVAL;
171
172 read_lock_irqsave(&device->cache.lock, flags);
173
174 cache = device->cache.pkey_cache[port_num - start_port(device)];
175
176 *index = -1;
177
178 for (i = 0; i < cache->table_len; ++i)
179 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
180 *index = i;
181 ret = 0;
182 break;
183 }
184
185 read_unlock_irqrestore(&device->cache.lock, flags);
186
187 return ret;
188}
189EXPORT_SYMBOL(ib_find_cached_pkey);
190
191static void ib_cache_update(struct ib_device *device,
192 u8 port)
193{
194 struct ib_port_attr *tprops = NULL;
195 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
196 struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
197 int i;
198 int ret;
199
200 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
201 if (!tprops)
202 return;
203
204 ret = ib_query_port(device, port, tprops);
205 if (ret) {
206 printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
207 ret, device->name);
208 goto err;
209 }
210
211 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
212 sizeof *pkey_cache->table, GFP_KERNEL);
213 if (!pkey_cache)
214 goto err;
215
216 pkey_cache->table_len = tprops->pkey_tbl_len;
217
218 gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
219 sizeof *gid_cache->table, GFP_KERNEL);
220 if (!gid_cache)
221 goto err;
222
223 gid_cache->table_len = tprops->gid_tbl_len;
224
225 for (i = 0; i < pkey_cache->table_len; ++i) {
226 ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
227 if (ret) {
228 printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
229 ret, device->name, i);
230 goto err;
231 }
232 }
233
234 for (i = 0; i < gid_cache->table_len; ++i) {
235 ret = ib_query_gid(device, port, i, gid_cache->table + i);
236 if (ret) {
237 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
238 ret, device->name, i);
239 goto err;
240 }
241 }
242
243 write_lock_irq(&device->cache.lock);
244
245 old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
246 old_gid_cache = device->cache.gid_cache [port - start_port(device)];
247
248 device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
249 device->cache.gid_cache [port - start_port(device)] = gid_cache;
250
251 write_unlock_irq(&device->cache.lock);
252
253 kfree(old_pkey_cache);
254 kfree(old_gid_cache);
255 kfree(tprops);
256 return;
257
258err:
259 kfree(pkey_cache);
260 kfree(gid_cache);
261 kfree(tprops);
262}
263
264static void ib_cache_task(void *work_ptr)
265{
266 struct ib_update_work *work = work_ptr;
267
268 ib_cache_update(work->device, work->port_num);
269 kfree(work);
270}
271
272static void ib_cache_event(struct ib_event_handler *handler,
273 struct ib_event *event)
274{
275 struct ib_update_work *work;
276
277 if (event->event == IB_EVENT_PORT_ERR ||
278 event->event == IB_EVENT_PORT_ACTIVE ||
279 event->event == IB_EVENT_LID_CHANGE ||
280 event->event == IB_EVENT_PKEY_CHANGE ||
281 event->event == IB_EVENT_SM_CHANGE) {
282 work = kmalloc(sizeof *work, GFP_ATOMIC);
283 if (work) {
284 INIT_WORK(&work->work, ib_cache_task, work);
285 work->device = event->device;
286 work->port_num = event->element.port_num;
287 schedule_work(&work->work);
288 }
289 }
290}
291
292static void ib_cache_setup_one(struct ib_device *device)
293{
294 int p;
295
296 rwlock_init(&device->cache.lock);
297
298 device->cache.pkey_cache =
299 kmalloc(sizeof *device->cache.pkey_cache *
300 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
301 device->cache.gid_cache =
302 kmalloc(sizeof *device->cache.pkey_cache *
303 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
304
305 if (!device->cache.pkey_cache || !device->cache.gid_cache) {
306 printk(KERN_WARNING "Couldn't allocate cache "
307 "for %s\n", device->name);
308 goto err;
309 }
310
311 for (p = 0; p <= end_port(device) - start_port(device); ++p) {
312 device->cache.pkey_cache[p] = NULL;
313 device->cache.gid_cache [p] = NULL;
314 ib_cache_update(device, p + start_port(device));
315 }
316
317 INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
318 device, ib_cache_event);
319 if (ib_register_event_handler(&device->cache.event_handler))
320 goto err_cache;
321
322 return;
323
324err_cache:
325 for (p = 0; p <= end_port(device) - start_port(device); ++p) {
326 kfree(device->cache.pkey_cache[p]);
327 kfree(device->cache.gid_cache[p]);
328 }
329
330err:
331 kfree(device->cache.pkey_cache);
332 kfree(device->cache.gid_cache);
333}
334
335static void ib_cache_cleanup_one(struct ib_device *device)
336{
337 int p;
338
339 ib_unregister_event_handler(&device->cache.event_handler);
340 flush_scheduled_work();
341
342 for (p = 0; p <= end_port(device) - start_port(device); ++p) {
343 kfree(device->cache.pkey_cache[p]);
344 kfree(device->cache.gid_cache[p]);
345 }
346
347 kfree(device->cache.pkey_cache);
348 kfree(device->cache.gid_cache);
349}
350
351static struct ib_client cache_client = {
352 .name = "cache",
353 .add = ib_cache_setup_one,
354 .remove = ib_cache_cleanup_one
355};
356
357int __init ib_cache_setup(void)
358{
359 return ib_register_client(&cache_client);
360}
361
362void __exit ib_cache_cleanup(void)
363{
364 ib_unregister_client(&cache_client);
365}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
new file mode 100644
index 000000000000..797049626ff6
--- /dev/null
+++ b/drivers/infiniband/core/core_priv.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef _CORE_PRIV_H
36#define _CORE_PRIV_H
37
38#include <linux/list.h>
39#include <linux/spinlock.h>
40
41#include <ib_verbs.h>
42
43int ib_device_register_sysfs(struct ib_device *device);
44void ib_device_unregister_sysfs(struct ib_device *device);
45
46int ib_sysfs_setup(void);
47void ib_sysfs_cleanup(void);
48
49int ib_cache_setup(void);
50void ib_cache_cleanup(void);
51
52#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
new file mode 100644
index 000000000000..9197e92d708a
--- /dev/null
+++ b/drivers/infiniband/core/device.c
@@ -0,0 +1,614 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: device.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/module.h>
36#include <linux/string.h>
37#include <linux/errno.h>
38#include <linux/slab.h>
39#include <linux/init.h>
40
41#include <asm/semaphore.h>
42
43#include "core_priv.h"
44
45MODULE_AUTHOR("Roland Dreier");
46MODULE_DESCRIPTION("core kernel InfiniBand API");
47MODULE_LICENSE("Dual BSD/GPL");
48
49struct ib_client_data {
50 struct list_head list;
51 struct ib_client *client;
52 void * data;
53};
54
55static LIST_HEAD(device_list);
56static LIST_HEAD(client_list);
57
58/*
59 * device_sem protects access to both device_list and client_list.
60 * There's no real point to using multiple locks or something fancier
61 * like an rwsem: we always access both lists, and we're always
62 * modifying one list or the other list. In any case this is not a
63 * hot path so there's no point in trying to optimize.
64 */
65static DECLARE_MUTEX(device_sem);
66
67static int ib_device_check_mandatory(struct ib_device *device)
68{
69#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
70 static const struct {
71 size_t offset;
72 char *name;
73 } mandatory_table[] = {
74 IB_MANDATORY_FUNC(query_device),
75 IB_MANDATORY_FUNC(query_port),
76 IB_MANDATORY_FUNC(query_pkey),
77 IB_MANDATORY_FUNC(query_gid),
78 IB_MANDATORY_FUNC(alloc_pd),
79 IB_MANDATORY_FUNC(dealloc_pd),
80 IB_MANDATORY_FUNC(create_ah),
81 IB_MANDATORY_FUNC(destroy_ah),
82 IB_MANDATORY_FUNC(create_qp),
83 IB_MANDATORY_FUNC(modify_qp),
84 IB_MANDATORY_FUNC(destroy_qp),
85 IB_MANDATORY_FUNC(post_send),
86 IB_MANDATORY_FUNC(post_recv),
87 IB_MANDATORY_FUNC(create_cq),
88 IB_MANDATORY_FUNC(destroy_cq),
89 IB_MANDATORY_FUNC(poll_cq),
90 IB_MANDATORY_FUNC(req_notify_cq),
91 IB_MANDATORY_FUNC(get_dma_mr),
92 IB_MANDATORY_FUNC(dereg_mr)
93 };
94 int i;
95
96 for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) {
97 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
98 printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
99 device->name, mandatory_table[i].name);
100 return -EINVAL;
101 }
102 }
103
104 return 0;
105}
106
107static struct ib_device *__ib_device_get_by_name(const char *name)
108{
109 struct ib_device *device;
110
111 list_for_each_entry(device, &device_list, core_list)
112 if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
113 return device;
114
115 return NULL;
116}
117
118
119static int alloc_name(char *name)
120{
121 long *inuse;
122 char buf[IB_DEVICE_NAME_MAX];
123 struct ib_device *device;
124 int i;
125
126 inuse = (long *) get_zeroed_page(GFP_KERNEL);
127 if (!inuse)
128 return -ENOMEM;
129
130 list_for_each_entry(device, &device_list, core_list) {
131 if (!sscanf(device->name, name, &i))
132 continue;
133 if (i < 0 || i >= PAGE_SIZE * 8)
134 continue;
135 snprintf(buf, sizeof buf, name, i);
136 if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
137 set_bit(i, inuse);
138 }
139
140 i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
141 free_page((unsigned long) inuse);
142 snprintf(buf, sizeof buf, name, i);
143
144 if (__ib_device_get_by_name(buf))
145 return -ENFILE;
146
147 strlcpy(name, buf, IB_DEVICE_NAME_MAX);
148 return 0;
149}
150
151/**
152 * ib_alloc_device - allocate an IB device struct
153 * @size:size of structure to allocate
154 *
155 * Low-level drivers should use ib_alloc_device() to allocate &struct
156 * ib_device. @size is the size of the structure to be allocated,
157 * including any private data used by the low-level driver.
158 * ib_dealloc_device() must be used to free structures allocated with
159 * ib_alloc_device().
160 */
161struct ib_device *ib_alloc_device(size_t size)
162{
163 void *dev;
164
165 BUG_ON(size < sizeof (struct ib_device));
166
167 dev = kmalloc(size, GFP_KERNEL);
168 if (!dev)
169 return NULL;
170
171 memset(dev, 0, size);
172
173 return dev;
174}
175EXPORT_SYMBOL(ib_alloc_device);
176
177/**
178 * ib_dealloc_device - free an IB device struct
179 * @device:structure to free
180 *
181 * Free a structure allocated with ib_alloc_device().
182 */
183void ib_dealloc_device(struct ib_device *device)
184{
185 if (device->reg_state == IB_DEV_UNINITIALIZED) {
186 kfree(device);
187 return;
188 }
189
190 BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
191
192 ib_device_unregister_sysfs(device);
193}
194EXPORT_SYMBOL(ib_dealloc_device);
195
196static int add_client_context(struct ib_device *device, struct ib_client *client)
197{
198 struct ib_client_data *context;
199 unsigned long flags;
200
201 context = kmalloc(sizeof *context, GFP_KERNEL);
202 if (!context) {
203 printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
204 device->name, client->name);
205 return -ENOMEM;
206 }
207
208 context->client = client;
209 context->data = NULL;
210
211 spin_lock_irqsave(&device->client_data_lock, flags);
212 list_add(&context->list, &device->client_data_list);
213 spin_unlock_irqrestore(&device->client_data_lock, flags);
214
215 return 0;
216}
217
218/**
219 * ib_register_device - Register an IB device with IB core
220 * @device:Device to register
221 *
222 * Low-level drivers use ib_register_device() to register their
223 * devices with the IB core. All registered clients will receive a
224 * callback for each device that is added. @device must be allocated
225 * with ib_alloc_device().
226 */
227int ib_register_device(struct ib_device *device)
228{
229 int ret;
230
231 down(&device_sem);
232
233 if (strchr(device->name, '%')) {
234 ret = alloc_name(device->name);
235 if (ret)
236 goto out;
237 }
238
239 if (ib_device_check_mandatory(device)) {
240 ret = -EINVAL;
241 goto out;
242 }
243
244 INIT_LIST_HEAD(&device->event_handler_list);
245 INIT_LIST_HEAD(&device->client_data_list);
246 spin_lock_init(&device->event_handler_lock);
247 spin_lock_init(&device->client_data_lock);
248
249 ret = ib_device_register_sysfs(device);
250 if (ret) {
251 printk(KERN_WARNING "Couldn't register device %s with driver model\n",
252 device->name);
253 goto out;
254 }
255
256 list_add_tail(&device->core_list, &device_list);
257
258 device->reg_state = IB_DEV_REGISTERED;
259
260 {
261 struct ib_client *client;
262
263 list_for_each_entry(client, &client_list, list)
264 if (client->add && !add_client_context(device, client))
265 client->add(device);
266 }
267
268 out:
269 up(&device_sem);
270 return ret;
271}
272EXPORT_SYMBOL(ib_register_device);
273
274/**
275 * ib_unregister_device - Unregister an IB device
276 * @device:Device to unregister
277 *
278 * Unregister an IB device. All clients will receive a remove callback.
279 */
280void ib_unregister_device(struct ib_device *device)
281{
282 struct ib_client *client;
283 struct ib_client_data *context, *tmp;
284 unsigned long flags;
285
286 down(&device_sem);
287
288 list_for_each_entry_reverse(client, &client_list, list)
289 if (client->remove)
290 client->remove(device);
291
292 list_del(&device->core_list);
293
294 up(&device_sem);
295
296 spin_lock_irqsave(&device->client_data_lock, flags);
297 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
298 kfree(context);
299 spin_unlock_irqrestore(&device->client_data_lock, flags);
300
301 device->reg_state = IB_DEV_UNREGISTERED;
302}
303EXPORT_SYMBOL(ib_unregister_device);
304
305/**
306 * ib_register_client - Register an IB client
307 * @client:Client to register
308 *
309 * Upper level users of the IB drivers can use ib_register_client() to
310 * register callbacks for IB device addition and removal. When an IB
311 * device is added, each registered client's add method will be called
312 * (in the order the clients were registered), and when a device is
313 * removed, each client's remove method will be called (in the reverse
314 * order that clients were registered). In addition, when
315 * ib_register_client() is called, the client will receive an add
316 * callback for all devices already registered.
317 */
318int ib_register_client(struct ib_client *client)
319{
320 struct ib_device *device;
321
322 down(&device_sem);
323
324 list_add_tail(&client->list, &client_list);
325 list_for_each_entry(device, &device_list, core_list)
326 if (client->add && !add_client_context(device, client))
327 client->add(device);
328
329 up(&device_sem);
330
331 return 0;
332}
333EXPORT_SYMBOL(ib_register_client);
334
335/**
336 * ib_unregister_client - Unregister an IB client
337 * @client:Client to unregister
338 *
339 * Upper level users use ib_unregister_client() to remove their client
340 * registration. When ib_unregister_client() is called, the client
341 * will receive a remove callback for each IB device still registered.
342 */
343void ib_unregister_client(struct ib_client *client)
344{
345 struct ib_client_data *context, *tmp;
346 struct ib_device *device;
347 unsigned long flags;
348
349 down(&device_sem);
350
351 list_for_each_entry(device, &device_list, core_list) {
352 if (client->remove)
353 client->remove(device);
354
355 spin_lock_irqsave(&device->client_data_lock, flags);
356 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
357 if (context->client == client) {
358 list_del(&context->list);
359 kfree(context);
360 }
361 spin_unlock_irqrestore(&device->client_data_lock, flags);
362 }
363 list_del(&client->list);
364
365 up(&device_sem);
366}
367EXPORT_SYMBOL(ib_unregister_client);
368
369/**
370 * ib_get_client_data - Get IB client context
371 * @device:Device to get context for
372 * @client:Client to get context for
373 *
374 * ib_get_client_data() returns client context set with
375 * ib_set_client_data().
376 */
377void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
378{
379 struct ib_client_data *context;
380 void *ret = NULL;
381 unsigned long flags;
382
383 spin_lock_irqsave(&device->client_data_lock, flags);
384 list_for_each_entry(context, &device->client_data_list, list)
385 if (context->client == client) {
386 ret = context->data;
387 break;
388 }
389 spin_unlock_irqrestore(&device->client_data_lock, flags);
390
391 return ret;
392}
393EXPORT_SYMBOL(ib_get_client_data);
394
395/**
396 * ib_set_client_data - Get IB client context
397 * @device:Device to set context for
398 * @client:Client to set context for
399 * @data:Context to set
400 *
401 * ib_set_client_data() sets client context that can be retrieved with
402 * ib_get_client_data().
403 */
404void ib_set_client_data(struct ib_device *device, struct ib_client *client,
405 void *data)
406{
407 struct ib_client_data *context;
408 unsigned long flags;
409
410 spin_lock_irqsave(&device->client_data_lock, flags);
411 list_for_each_entry(context, &device->client_data_list, list)
412 if (context->client == client) {
413 context->data = data;
414 goto out;
415 }
416
417 printk(KERN_WARNING "No client context found for %s/%s\n",
418 device->name, client->name);
419
420out:
421 spin_unlock_irqrestore(&device->client_data_lock, flags);
422}
423EXPORT_SYMBOL(ib_set_client_data);
424
425/**
426 * ib_register_event_handler - Register an IB event handler
427 * @event_handler:Handler to register
428 *
429 * ib_register_event_handler() registers an event handler that will be
430 * called back when asynchronous IB events occur (as defined in
431 * chapter 11 of the InfiniBand Architecture Specification). This
432 * callback may occur in interrupt context.
433 */
434int ib_register_event_handler (struct ib_event_handler *event_handler)
435{
436 unsigned long flags;
437
438 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
439 list_add_tail(&event_handler->list,
440 &event_handler->device->event_handler_list);
441 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
442
443 return 0;
444}
445EXPORT_SYMBOL(ib_register_event_handler);
446
447/**
448 * ib_unregister_event_handler - Unregister an event handler
449 * @event_handler:Handler to unregister
450 *
451 * Unregister an event handler registered with
452 * ib_register_event_handler().
453 */
454int ib_unregister_event_handler(struct ib_event_handler *event_handler)
455{
456 unsigned long flags;
457
458 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
459 list_del(&event_handler->list);
460 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
461
462 return 0;
463}
464EXPORT_SYMBOL(ib_unregister_event_handler);
465
466/**
467 * ib_dispatch_event - Dispatch an asynchronous event
468 * @event:Event to dispatch
469 *
470 * Low-level drivers must call ib_dispatch_event() to dispatch the
471 * event to all registered event handlers when an asynchronous event
472 * occurs.
473 */
474void ib_dispatch_event(struct ib_event *event)
475{
476 unsigned long flags;
477 struct ib_event_handler *handler;
478
479 spin_lock_irqsave(&event->device->event_handler_lock, flags);
480
481 list_for_each_entry(handler, &event->device->event_handler_list, list)
482 handler->handler(handler, event);
483
484 spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
485}
486EXPORT_SYMBOL(ib_dispatch_event);
487
488/**
489 * ib_query_device - Query IB device attributes
490 * @device:Device to query
491 * @device_attr:Device attributes
492 *
493 * ib_query_device() returns the attributes of a device through the
494 * @device_attr pointer.
495 */
496int ib_query_device(struct ib_device *device,
497 struct ib_device_attr *device_attr)
498{
499 return device->query_device(device, device_attr);
500}
501EXPORT_SYMBOL(ib_query_device);
502
503/**
504 * ib_query_port - Query IB port attributes
505 * @device:Device to query
506 * @port_num:Port number to query
507 * @port_attr:Port attributes
508 *
509 * ib_query_port() returns the attributes of a port through the
510 * @port_attr pointer.
511 */
512int ib_query_port(struct ib_device *device,
513 u8 port_num,
514 struct ib_port_attr *port_attr)
515{
516 return device->query_port(device, port_num, port_attr);
517}
518EXPORT_SYMBOL(ib_query_port);
519
520/**
521 * ib_query_gid - Get GID table entry
522 * @device:Device to query
523 * @port_num:Port number to query
524 * @index:GID table index to query
525 * @gid:Returned GID
526 *
527 * ib_query_gid() fetches the specified GID table entry.
528 */
529int ib_query_gid(struct ib_device *device,
530 u8 port_num, int index, union ib_gid *gid)
531{
532 return device->query_gid(device, port_num, index, gid);
533}
534EXPORT_SYMBOL(ib_query_gid);
535
536/**
537 * ib_query_pkey - Get P_Key table entry
538 * @device:Device to query
539 * @port_num:Port number to query
540 * @index:P_Key table index to query
541 * @pkey:Returned P_Key
542 *
543 * ib_query_pkey() fetches the specified P_Key table entry.
544 */
545int ib_query_pkey(struct ib_device *device,
546 u8 port_num, u16 index, u16 *pkey)
547{
548 return device->query_pkey(device, port_num, index, pkey);
549}
550EXPORT_SYMBOL(ib_query_pkey);
551
552/**
553 * ib_modify_device - Change IB device attributes
554 * @device:Device to modify
555 * @device_modify_mask:Mask of attributes to change
556 * @device_modify:New attribute values
557 *
558 * ib_modify_device() changes a device's attributes as specified by
559 * the @device_modify_mask and @device_modify structure.
560 */
561int ib_modify_device(struct ib_device *device,
562 int device_modify_mask,
563 struct ib_device_modify *device_modify)
564{
565 return device->modify_device(device, device_modify_mask,
566 device_modify);
567}
568EXPORT_SYMBOL(ib_modify_device);
569
570/**
571 * ib_modify_port - Modifies the attributes for the specified port.
572 * @device: The device to modify.
573 * @port_num: The number of the port to modify.
574 * @port_modify_mask: Mask used to specify which attributes of the port
575 * to change.
576 * @port_modify: New attribute values for the port.
577 *
578 * ib_modify_port() changes a port's attributes as specified by the
579 * @port_modify_mask and @port_modify structure.
580 */
581int ib_modify_port(struct ib_device *device,
582 u8 port_num, int port_modify_mask,
583 struct ib_port_modify *port_modify)
584{
585 return device->modify_port(device, port_num, port_modify_mask,
586 port_modify);
587}
588EXPORT_SYMBOL(ib_modify_port);
589
590static int __init ib_core_init(void)
591{
592 int ret;
593
594 ret = ib_sysfs_setup();
595 if (ret)
596 printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
597
598 ret = ib_cache_setup();
599 if (ret) {
600 printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
601 ib_sysfs_cleanup();
602 }
603
604 return ret;
605}
606
607static void __exit ib_core_cleanup(void)
608{
609 ib_cache_cleanup();
610 ib_sysfs_cleanup();
611}
612
613module_init(ib_core_init);
614module_exit(ib_core_cleanup);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
new file mode 100644
index 000000000000..2e9469f18926
--- /dev/null
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -0,0 +1,507 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/errno.h>
36#include <linux/spinlock.h>
37#include <linux/slab.h>
38#include <linux/jhash.h>
39#include <linux/kthread.h>
40
41#include <ib_fmr_pool.h>
42
43#include "core_priv.h"
44
45enum {
46 IB_FMR_MAX_REMAPS = 32,
47
48 IB_FMR_HASH_BITS = 8,
49 IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
50 IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
51};
52
53/*
54 * If an FMR is not in use, then the list member will point to either
55 * its pool's free_list (if the FMR can be mapped again; that is,
56 * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
57 * FMR needs to be unmapped before being remapped). In either of
58 * these cases it is a bug if the ref_count is not 0. In other words,
59 * if ref_count is > 0, then the list member must not be linked into
60 * either free_list or dirty_list.
61 *
62 * The cache_node member is used to link the FMR into a cache bucket
63 * (if caching is enabled). This is independent of the reference
64 * count of the FMR. When a valid FMR is released, its ref_count is
65 * decremented, and if ref_count reaches 0, the FMR is placed in
66 * either free_list or dirty_list as appropriate. However, it is not
67 * removed from the cache and may be "revived" if a call to
68 * ib_fmr_register_physical() occurs before the FMR is remapped. In
69 * this case we just increment the ref_count and remove the FMR from
70 * free_list/dirty_list.
71 *
72 * Before we remap an FMR from free_list, we remove it from the cache
73 * (to prevent another user from obtaining a stale FMR). When an FMR
74 * is released, we add it to the tail of the free list, so that our
75 * cache eviction policy is "least recently used."
76 *
77 * All manipulation of ref_count, list and cache_node is protected by
78 * pool_lock to maintain consistency.
79 */
80
81struct ib_fmr_pool {
82 spinlock_t pool_lock;
83
84 int pool_size;
85 int max_pages;
86 int dirty_watermark;
87 int dirty_len;
88 struct list_head free_list;
89 struct list_head dirty_list;
90 struct hlist_head *cache_bucket;
91
92 void (*flush_function)(struct ib_fmr_pool *pool,
93 void * arg);
94 void *flush_arg;
95
96 struct task_struct *thread;
97
98 atomic_t req_ser;
99 atomic_t flush_ser;
100
101 wait_queue_head_t force_wait;
102};
103
104static inline u32 ib_fmr_hash(u64 first_page)
105{
106 return jhash_2words((u32) first_page,
107 (u32) (first_page >> 32),
108 0);
109}
110
111/* Caller must hold pool_lock */
112static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
113 u64 *page_list,
114 int page_list_len,
115 u64 io_virtual_address)
116{
117 struct hlist_head *bucket;
118 struct ib_pool_fmr *fmr;
119 struct hlist_node *pos;
120
121 if (!pool->cache_bucket)
122 return NULL;
123
124 bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
125
126 hlist_for_each_entry(fmr, pos, bucket, cache_node)
127 if (io_virtual_address == fmr->io_virtual_address &&
128 page_list_len == fmr->page_list_len &&
129 !memcmp(page_list, fmr->page_list,
130 page_list_len * sizeof *page_list))
131 return fmr;
132
133 return NULL;
134}
135
136static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
137{
138 int ret;
139 struct ib_pool_fmr *fmr;
140 LIST_HEAD(unmap_list);
141 LIST_HEAD(fmr_list);
142
143 spin_lock_irq(&pool->pool_lock);
144
145 list_for_each_entry(fmr, &pool->dirty_list, list) {
146 hlist_del_init(&fmr->cache_node);
147 fmr->remap_count = 0;
148 list_add_tail(&fmr->fmr->list, &fmr_list);
149
150#ifdef DEBUG
151 if (fmr->ref_count !=0) {
152 printk(KERN_WARNING "Unmapping FMR 0x%08x with ref count %d",
153 fmr, fmr->ref_count);
154 }
155#endif
156 }
157
158 list_splice(&pool->dirty_list, &unmap_list);
159 INIT_LIST_HEAD(&pool->dirty_list);
160 pool->dirty_len = 0;
161
162 spin_unlock_irq(&pool->pool_lock);
163
164 if (list_empty(&unmap_list)) {
165 return;
166 }
167
168 ret = ib_unmap_fmr(&fmr_list);
169 if (ret)
170 printk(KERN_WARNING "ib_unmap_fmr returned %d", ret);
171
172 spin_lock_irq(&pool->pool_lock);
173 list_splice(&unmap_list, &pool->free_list);
174 spin_unlock_irq(&pool->pool_lock);
175}
176
177static int ib_fmr_cleanup_thread(void *pool_ptr)
178{
179 struct ib_fmr_pool *pool = pool_ptr;
180
181 do {
182 if (pool->dirty_len >= pool->dirty_watermark ||
183 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
184 ib_fmr_batch_release(pool);
185
186 atomic_inc(&pool->flush_ser);
187 wake_up_interruptible(&pool->force_wait);
188
189 if (pool->flush_function)
190 pool->flush_function(pool, pool->flush_arg);
191 }
192
193 set_current_state(TASK_INTERRUPTIBLE);
194 if (pool->dirty_len < pool->dirty_watermark &&
195 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
196 !kthread_should_stop())
197 schedule();
198 __set_current_state(TASK_RUNNING);
199 } while (!kthread_should_stop());
200
201 return 0;
202}
203
204/**
205 * ib_create_fmr_pool - Create an FMR pool
206 * @pd:Protection domain for FMRs
207 * @params:FMR pool parameters
208 *
209 * Create a pool of FMRs. Return value is pointer to new pool or
210 * error code if creation failed.
211 */
212struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
213 struct ib_fmr_pool_param *params)
214{
215 struct ib_device *device;
216 struct ib_fmr_pool *pool;
217 int i;
218 int ret;
219
220 if (!params)
221 return ERR_PTR(-EINVAL);
222
223 device = pd->device;
224 if (!device->alloc_fmr || !device->dealloc_fmr ||
225 !device->map_phys_fmr || !device->unmap_fmr) {
226 printk(KERN_WARNING "Device %s does not support fast memory regions",
227 device->name);
228 return ERR_PTR(-ENOSYS);
229 }
230
231 pool = kmalloc(sizeof *pool, GFP_KERNEL);
232 if (!pool) {
233 printk(KERN_WARNING "couldn't allocate pool struct");
234 return ERR_PTR(-ENOMEM);
235 }
236
237 pool->cache_bucket = NULL;
238
239 pool->flush_function = params->flush_function;
240 pool->flush_arg = params->flush_arg;
241
242 INIT_LIST_HEAD(&pool->free_list);
243 INIT_LIST_HEAD(&pool->dirty_list);
244
245 if (params->cache) {
246 pool->cache_bucket =
247 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
248 GFP_KERNEL);
249 if (!pool->cache_bucket) {
250 printk(KERN_WARNING "Failed to allocate cache in pool");
251 ret = -ENOMEM;
252 goto out_free_pool;
253 }
254
255 for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
256 INIT_HLIST_HEAD(pool->cache_bucket + i);
257 }
258
259 pool->pool_size = 0;
260 pool->max_pages = params->max_pages_per_fmr;
261 pool->dirty_watermark = params->dirty_watermark;
262 pool->dirty_len = 0;
263 spin_lock_init(&pool->pool_lock);
264 atomic_set(&pool->req_ser, 0);
265 atomic_set(&pool->flush_ser, 0);
266 init_waitqueue_head(&pool->force_wait);
267
268 pool->thread = kthread_create(ib_fmr_cleanup_thread,
269 pool,
270 "ib_fmr(%s)",
271 device->name);
272 if (IS_ERR(pool->thread)) {
273 printk(KERN_WARNING "couldn't start cleanup thread");
274 ret = PTR_ERR(pool->thread);
275 goto out_free_pool;
276 }
277
278 {
279 struct ib_pool_fmr *fmr;
280 struct ib_fmr_attr attr = {
281 .max_pages = params->max_pages_per_fmr,
282 .max_maps = IB_FMR_MAX_REMAPS,
283 .page_size = PAGE_SHIFT
284 };
285
286 for (i = 0; i < params->pool_size; ++i) {
287 fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64),
288 GFP_KERNEL);
289 if (!fmr) {
290 printk(KERN_WARNING "failed to allocate fmr struct "
291 "for FMR %d", i);
292 goto out_fail;
293 }
294
295 fmr->pool = pool;
296 fmr->remap_count = 0;
297 fmr->ref_count = 0;
298 INIT_HLIST_NODE(&fmr->cache_node);
299
300 fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
301 if (IS_ERR(fmr->fmr)) {
302 printk(KERN_WARNING "fmr_create failed for FMR %d", i);
303 kfree(fmr);
304 goto out_fail;
305 }
306
307 list_add_tail(&fmr->list, &pool->free_list);
308 ++pool->pool_size;
309 }
310 }
311
312 return pool;
313
314 out_free_pool:
315 kfree(pool->cache_bucket);
316 kfree(pool);
317
318 return ERR_PTR(ret);
319
320 out_fail:
321 ib_destroy_fmr_pool(pool);
322
323 return ERR_PTR(-ENOMEM);
324}
325EXPORT_SYMBOL(ib_create_fmr_pool);
326
327/**
328 * ib_destroy_fmr_pool - Free FMR pool
329 * @pool:FMR pool to free
330 *
331 * Destroy an FMR pool and free all associated resources.
332 */
333int ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
334{
335 struct ib_pool_fmr *fmr;
336 struct ib_pool_fmr *tmp;
337 int i;
338
339 kthread_stop(pool->thread);
340 ib_fmr_batch_release(pool);
341
342 i = 0;
343 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
344 ib_dealloc_fmr(fmr->fmr);
345 list_del(&fmr->list);
346 kfree(fmr);
347 ++i;
348 }
349
350 if (i < pool->pool_size)
351 printk(KERN_WARNING "pool still has %d regions registered",
352 pool->pool_size - i);
353
354 kfree(pool->cache_bucket);
355 kfree(pool);
356
357 return 0;
358}
359EXPORT_SYMBOL(ib_destroy_fmr_pool);
360
361/**
362 * ib_flush_fmr_pool - Invalidate all unmapped FMRs
363 * @pool:FMR pool to flush
364 *
365 * Ensure that all unmapped FMRs are fully invalidated.
366 */
367int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
368{
369 int serial;
370
371 atomic_inc(&pool->req_ser);
372 /*
373 * It's OK if someone else bumps req_ser again here -- we'll
374 * just wait a little longer.
375 */
376 serial = atomic_read(&pool->req_ser);
377
378 wake_up_process(pool->thread);
379
380 if (wait_event_interruptible(pool->force_wait,
381 atomic_read(&pool->flush_ser) -
382 atomic_read(&pool->req_ser) >= 0))
383 return -EINTR;
384
385 return 0;
386}
387EXPORT_SYMBOL(ib_flush_fmr_pool);
388
389/**
390 * ib_fmr_pool_map_phys -
391 * @pool:FMR pool to allocate FMR from
392 * @page_list:List of pages to map
393 * @list_len:Number of pages in @page_list
394 * @io_virtual_address:I/O virtual address for new FMR
395 *
396 * Map an FMR from an FMR pool.
397 */
398struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
399 u64 *page_list,
400 int list_len,
401 u64 *io_virtual_address)
402{
403 struct ib_fmr_pool *pool = pool_handle;
404 struct ib_pool_fmr *fmr;
405 unsigned long flags;
406 int result;
407
408 if (list_len < 1 || list_len > pool->max_pages)
409 return ERR_PTR(-EINVAL);
410
411 spin_lock_irqsave(&pool->pool_lock, flags);
412 fmr = ib_fmr_cache_lookup(pool,
413 page_list,
414 list_len,
415 *io_virtual_address);
416 if (fmr) {
417 /* found in cache */
418 ++fmr->ref_count;
419 if (fmr->ref_count == 1) {
420 list_del(&fmr->list);
421 }
422
423 spin_unlock_irqrestore(&pool->pool_lock, flags);
424
425 return fmr;
426 }
427
428 if (list_empty(&pool->free_list)) {
429 spin_unlock_irqrestore(&pool->pool_lock, flags);
430 return ERR_PTR(-EAGAIN);
431 }
432
433 fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
434 list_del(&fmr->list);
435 hlist_del_init(&fmr->cache_node);
436 spin_unlock_irqrestore(&pool->pool_lock, flags);
437
438 result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
439 *io_virtual_address);
440
441 if (result) {
442 spin_lock_irqsave(&pool->pool_lock, flags);
443 list_add(&fmr->list, &pool->free_list);
444 spin_unlock_irqrestore(&pool->pool_lock, flags);
445
446 printk(KERN_WARNING "fmr_map returns %d",
447 result);
448
449 return ERR_PTR(result);
450 }
451
452 ++fmr->remap_count;
453 fmr->ref_count = 1;
454
455 if (pool->cache_bucket) {
456 fmr->io_virtual_address = *io_virtual_address;
457 fmr->page_list_len = list_len;
458 memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
459
460 spin_lock_irqsave(&pool->pool_lock, flags);
461 hlist_add_head(&fmr->cache_node,
462 pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
463 spin_unlock_irqrestore(&pool->pool_lock, flags);
464 }
465
466 return fmr;
467}
468EXPORT_SYMBOL(ib_fmr_pool_map_phys);
469
470/**
471 * ib_fmr_pool_unmap - Unmap FMR
472 * @fmr:FMR to unmap
473 *
474 * Unmap an FMR. The FMR mapping may remain valid until the FMR is
475 * reused (or until ib_flush_fmr_pool() is called).
476 */
477int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
478{
479 struct ib_fmr_pool *pool;
480 unsigned long flags;
481
482 pool = fmr->pool;
483
484 spin_lock_irqsave(&pool->pool_lock, flags);
485
486 --fmr->ref_count;
487 if (!fmr->ref_count) {
488 if (fmr->remap_count < IB_FMR_MAX_REMAPS) {
489 list_add_tail(&fmr->list, &pool->free_list);
490 } else {
491 list_add_tail(&fmr->list, &pool->dirty_list);
492 ++pool->dirty_len;
493 wake_up_process(pool->thread);
494 }
495 }
496
497#ifdef DEBUG
498 if (fmr->ref_count < 0)
499 printk(KERN_WARNING "FMR %p has ref count %d < 0",
500 fmr, fmr->ref_count);
501#endif
502
503 spin_unlock_irqrestore(&pool->pool_lock, flags);
504
505 return 0;
506}
507EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
new file mode 100644
index 000000000000..4ec7fff29b5d
--- /dev/null
+++ b/drivers/infiniband/core/mad.c
@@ -0,0 +1,2714 @@
1/*
2 * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#include <linux/dma-mapping.h>
36#include <linux/interrupt.h>
37
38#include <ib_mad.h>
39
40#include "mad_priv.h"
41#include "smi.h"
42#include "agent.h"
43
44MODULE_LICENSE("Dual BSD/GPL");
45MODULE_DESCRIPTION("kernel IB MAD API");
46MODULE_AUTHOR("Hal Rosenstock");
47MODULE_AUTHOR("Sean Hefty");
48
49
50kmem_cache_t *ib_mad_cache;
51static struct list_head ib_mad_port_list;
52static u32 ib_mad_client_id = 0;
53
54/* Port list lock */
55static spinlock_t ib_mad_port_list_lock;
56
57
58/* Forward declarations */
59static int method_in_use(struct ib_mad_mgmt_method_table **method,
60 struct ib_mad_reg_req *mad_reg_req);
61static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
62static struct ib_mad_agent_private *find_mad_agent(
63 struct ib_mad_port_private *port_priv,
64 struct ib_mad *mad, int solicited);
65static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
66 struct ib_mad_private *mad);
67static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
68static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
69 struct ib_mad_send_wc *mad_send_wc);
70static void timeout_sends(void *data);
71static void cancel_sends(void *data);
72static void local_completions(void *data);
73static int solicited_mad(struct ib_mad *mad);
74static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
75 struct ib_mad_agent_private *agent_priv,
76 u8 mgmt_class);
77static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
78 struct ib_mad_agent_private *agent_priv);
79
80/*
81 * Returns a ib_mad_port_private structure or NULL for a device/port
82 * Assumes ib_mad_port_list_lock is being held
83 */
84static inline struct ib_mad_port_private *
85__ib_get_mad_port(struct ib_device *device, int port_num)
86{
87 struct ib_mad_port_private *entry;
88
89 list_for_each_entry(entry, &ib_mad_port_list, port_list) {
90 if (entry->device == device && entry->port_num == port_num)
91 return entry;
92 }
93 return NULL;
94}
95
96/*
97 * Wrapper function to return a ib_mad_port_private structure or NULL
98 * for a device/port
99 */
100static inline struct ib_mad_port_private *
101ib_get_mad_port(struct ib_device *device, int port_num)
102{
103 struct ib_mad_port_private *entry;
104 unsigned long flags;
105
106 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
107 entry = __ib_get_mad_port(device, port_num);
108 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
109
110 return entry;
111}
112
113static inline u8 convert_mgmt_class(u8 mgmt_class)
114{
115 /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
116 return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
117 0 : mgmt_class;
118}
119
120static int get_spl_qp_index(enum ib_qp_type qp_type)
121{
122 switch (qp_type)
123 {
124 case IB_QPT_SMI:
125 return 0;
126 case IB_QPT_GSI:
127 return 1;
128 default:
129 return -1;
130 }
131}
132
133static int vendor_class_index(u8 mgmt_class)
134{
135 return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
136}
137
138static int is_vendor_class(u8 mgmt_class)
139{
140 if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
141 (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
142 return 0;
143 return 1;
144}
145
146static int is_vendor_oui(char *oui)
147{
148 if (oui[0] || oui[1] || oui[2])
149 return 1;
150 return 0;
151}
152
153static int is_vendor_method_in_use(
154 struct ib_mad_mgmt_vendor_class *vendor_class,
155 struct ib_mad_reg_req *mad_reg_req)
156{
157 struct ib_mad_mgmt_method_table *method;
158 int i;
159
160 for (i = 0; i < MAX_MGMT_OUI; i++) {
161 if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
162 method = vendor_class->method_table[i];
163 if (method) {
164 if (method_in_use(&method, mad_reg_req))
165 return 1;
166 else
167 break;
168 }
169 }
170 }
171 return 0;
172}
173
174/*
175 * ib_register_mad_agent - Register to send/receive MADs
176 */
177struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
178 u8 port_num,
179 enum ib_qp_type qp_type,
180 struct ib_mad_reg_req *mad_reg_req,
181 u8 rmpp_version,
182 ib_mad_send_handler send_handler,
183 ib_mad_recv_handler recv_handler,
184 void *context)
185{
186 struct ib_mad_port_private *port_priv;
187 struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
188 struct ib_mad_agent_private *mad_agent_priv;
189 struct ib_mad_reg_req *reg_req = NULL;
190 struct ib_mad_mgmt_class_table *class;
191 struct ib_mad_mgmt_vendor_class_table *vendor;
192 struct ib_mad_mgmt_vendor_class *vendor_class;
193 struct ib_mad_mgmt_method_table *method;
194 int ret2, qpn;
195 unsigned long flags;
196 u8 mgmt_class, vclass;
197
198 /* Validate parameters */
199 qpn = get_spl_qp_index(qp_type);
200 if (qpn == -1)
201 goto error1;
202
203 if (rmpp_version)
204 goto error1; /* XXX: until RMPP implemented */
205
206 /* Validate MAD registration request if supplied */
207 if (mad_reg_req) {
208 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
209 goto error1;
210 if (!recv_handler)
211 goto error1;
212 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
213 /*
214 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
215 * one in this range currently allowed
216 */
217 if (mad_reg_req->mgmt_class !=
218 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
219 goto error1;
220 } else if (mad_reg_req->mgmt_class == 0) {
221 /*
222 * Class 0 is reserved in IBA and is used for
223 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
224 */
225 goto error1;
226 } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
227 /*
228 * If class is in "new" vendor range,
229 * ensure supplied OUI is not zero
230 */
231 if (!is_vendor_oui(mad_reg_req->oui))
232 goto error1;
233 }
234 /* Make sure class supplied is consistent with QP type */
235 if (qp_type == IB_QPT_SMI) {
236 if ((mad_reg_req->mgmt_class !=
237 IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
238 (mad_reg_req->mgmt_class !=
239 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
240 goto error1;
241 } else {
242 if ((mad_reg_req->mgmt_class ==
243 IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
244 (mad_reg_req->mgmt_class ==
245 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
246 goto error1;
247 }
248 } else {
249 /* No registration request supplied */
250 if (!send_handler)
251 goto error1;
252 }
253
254 /* Validate device and port */
255 port_priv = ib_get_mad_port(device, port_num);
256 if (!port_priv) {
257 ret = ERR_PTR(-ENODEV);
258 goto error1;
259 }
260
261 /* Allocate structures */
262 mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL);
263 if (!mad_agent_priv) {
264 ret = ERR_PTR(-ENOMEM);
265 goto error1;
266 }
267
268 if (mad_reg_req) {
269 reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
270 if (!reg_req) {
271 ret = ERR_PTR(-ENOMEM);
272 goto error2;
273 }
274 /* Make a copy of the MAD registration request */
275 memcpy(reg_req, mad_reg_req, sizeof *reg_req);
276 }
277
278 /* Now, fill in the various structures */
279 memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
280 mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
281 mad_agent_priv->reg_req = reg_req;
282 mad_agent_priv->rmpp_version = rmpp_version;
283 mad_agent_priv->agent.device = device;
284 mad_agent_priv->agent.recv_handler = recv_handler;
285 mad_agent_priv->agent.send_handler = send_handler;
286 mad_agent_priv->agent.context = context;
287 mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
288 mad_agent_priv->agent.port_num = port_num;
289
290 spin_lock_irqsave(&port_priv->reg_lock, flags);
291 mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
292
293 /*
294 * Make sure MAD registration (if supplied)
295 * is non overlapping with any existing ones
296 */
297 if (mad_reg_req) {
298 mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
299 if (!is_vendor_class(mgmt_class)) {
300 class = port_priv->version[mad_reg_req->
301 mgmt_class_version].class;
302 if (class) {
303 method = class->method_table[mgmt_class];
304 if (method) {
305 if (method_in_use(&method,
306 mad_reg_req))
307 goto error3;
308 }
309 }
310 ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
311 mgmt_class);
312 } else {
313 /* "New" vendor class range */
314 vendor = port_priv->version[mad_reg_req->
315 mgmt_class_version].vendor;
316 if (vendor) {
317 vclass = vendor_class_index(mgmt_class);
318 vendor_class = vendor->vendor_class[vclass];
319 if (vendor_class) {
320 if (is_vendor_method_in_use(
321 vendor_class,
322 mad_reg_req))
323 goto error3;
324 }
325 }
326 ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
327 }
328 if (ret2) {
329 ret = ERR_PTR(ret2);
330 goto error3;
331 }
332 }
333
334 /* Add mad agent into port's agent list */
335 list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
336 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
337
338 spin_lock_init(&mad_agent_priv->lock);
339 INIT_LIST_HEAD(&mad_agent_priv->send_list);
340 INIT_LIST_HEAD(&mad_agent_priv->wait_list);
341 INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
342 INIT_LIST_HEAD(&mad_agent_priv->local_list);
343 INIT_WORK(&mad_agent_priv->local_work, local_completions,
344 mad_agent_priv);
345 INIT_LIST_HEAD(&mad_agent_priv->canceled_list);
346 INIT_WORK(&mad_agent_priv->canceled_work, cancel_sends, mad_agent_priv);
347 atomic_set(&mad_agent_priv->refcount, 1);
348 init_waitqueue_head(&mad_agent_priv->wait);
349
350 return &mad_agent_priv->agent;
351
352error3:
353 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
354 kfree(reg_req);
355error2:
356 kfree(mad_agent_priv);
357error1:
358 return ret;
359}
360EXPORT_SYMBOL(ib_register_mad_agent);
361
362static inline int is_snooping_sends(int mad_snoop_flags)
363{
364 return (mad_snoop_flags &
365 (/*IB_MAD_SNOOP_POSTED_SENDS |
366 IB_MAD_SNOOP_RMPP_SENDS |*/
367 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
368 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
369}
370
371static inline int is_snooping_recvs(int mad_snoop_flags)
372{
373 return (mad_snoop_flags &
374 (IB_MAD_SNOOP_RECVS /*|
375 IB_MAD_SNOOP_RMPP_RECVS*/));
376}
377
378static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
379 struct ib_mad_snoop_private *mad_snoop_priv)
380{
381 struct ib_mad_snoop_private **new_snoop_table;
382 unsigned long flags;
383 int i;
384
385 spin_lock_irqsave(&qp_info->snoop_lock, flags);
386 /* Check for empty slot in array. */
387 for (i = 0; i < qp_info->snoop_table_size; i++)
388 if (!qp_info->snoop_table[i])
389 break;
390
391 if (i == qp_info->snoop_table_size) {
392 /* Grow table. */
393 new_snoop_table = kmalloc(sizeof mad_snoop_priv *
394 qp_info->snoop_table_size + 1,
395 GFP_ATOMIC);
396 if (!new_snoop_table) {
397 i = -ENOMEM;
398 goto out;
399 }
400 if (qp_info->snoop_table) {
401 memcpy(new_snoop_table, qp_info->snoop_table,
402 sizeof mad_snoop_priv *
403 qp_info->snoop_table_size);
404 kfree(qp_info->snoop_table);
405 }
406 qp_info->snoop_table = new_snoop_table;
407 qp_info->snoop_table_size++;
408 }
409 qp_info->snoop_table[i] = mad_snoop_priv;
410 atomic_inc(&qp_info->snoop_count);
411out:
412 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
413 return i;
414}
415
416struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
417 u8 port_num,
418 enum ib_qp_type qp_type,
419 int mad_snoop_flags,
420 ib_mad_snoop_handler snoop_handler,
421 ib_mad_recv_handler recv_handler,
422 void *context)
423{
424 struct ib_mad_port_private *port_priv;
425 struct ib_mad_agent *ret;
426 struct ib_mad_snoop_private *mad_snoop_priv;
427 int qpn;
428
429 /* Validate parameters */
430 if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
431 (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
432 ret = ERR_PTR(-EINVAL);
433 goto error1;
434 }
435 qpn = get_spl_qp_index(qp_type);
436 if (qpn == -1) {
437 ret = ERR_PTR(-EINVAL);
438 goto error1;
439 }
440 port_priv = ib_get_mad_port(device, port_num);
441 if (!port_priv) {
442 ret = ERR_PTR(-ENODEV);
443 goto error1;
444 }
445 /* Allocate structures */
446 mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
447 if (!mad_snoop_priv) {
448 ret = ERR_PTR(-ENOMEM);
449 goto error1;
450 }
451
452 /* Now, fill in the various structures */
453 memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv);
454 mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
455 mad_snoop_priv->agent.device = device;
456 mad_snoop_priv->agent.recv_handler = recv_handler;
457 mad_snoop_priv->agent.snoop_handler = snoop_handler;
458 mad_snoop_priv->agent.context = context;
459 mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
460 mad_snoop_priv->agent.port_num = port_num;
461 mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
462 init_waitqueue_head(&mad_snoop_priv->wait);
463 mad_snoop_priv->snoop_index = register_snoop_agent(
464 &port_priv->qp_info[qpn],
465 mad_snoop_priv);
466 if (mad_snoop_priv->snoop_index < 0) {
467 ret = ERR_PTR(mad_snoop_priv->snoop_index);
468 goto error2;
469 }
470
471 atomic_set(&mad_snoop_priv->refcount, 1);
472 return &mad_snoop_priv->agent;
473
474error2:
475 kfree(mad_snoop_priv);
476error1:
477 return ret;
478}
479EXPORT_SYMBOL(ib_register_mad_snoop);
480
481static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
482{
483 struct ib_mad_port_private *port_priv;
484 unsigned long flags;
485
486 /* Note that we could still be handling received MADs */
487
488 /*
489 * Canceling all sends results in dropping received response
490 * MADs, preventing us from queuing additional work
491 */
492 cancel_mads(mad_agent_priv);
493
494 port_priv = mad_agent_priv->qp_info->port_priv;
495
496 cancel_delayed_work(&mad_agent_priv->timed_work);
497 flush_workqueue(port_priv->wq);
498
499 spin_lock_irqsave(&port_priv->reg_lock, flags);
500 remove_mad_reg_req(mad_agent_priv);
501 list_del(&mad_agent_priv->agent_list);
502 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
503
504 /* XXX: Cleanup pending RMPP receives for this agent */
505
506 atomic_dec(&mad_agent_priv->refcount);
507 wait_event(mad_agent_priv->wait,
508 !atomic_read(&mad_agent_priv->refcount));
509
510 if (mad_agent_priv->reg_req)
511 kfree(mad_agent_priv->reg_req);
512 kfree(mad_agent_priv);
513}
514
515static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
516{
517 struct ib_mad_qp_info *qp_info;
518 unsigned long flags;
519
520 qp_info = mad_snoop_priv->qp_info;
521 spin_lock_irqsave(&qp_info->snoop_lock, flags);
522 qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
523 atomic_dec(&qp_info->snoop_count);
524 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
525
526 atomic_dec(&mad_snoop_priv->refcount);
527 wait_event(mad_snoop_priv->wait,
528 !atomic_read(&mad_snoop_priv->refcount));
529
530 kfree(mad_snoop_priv);
531}
532
533/*
534 * ib_unregister_mad_agent - Unregisters a client from using MAD services
535 */
536int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
537{
538 struct ib_mad_agent_private *mad_agent_priv;
539 struct ib_mad_snoop_private *mad_snoop_priv;
540
541 /* If the TID is zero, the agent can only snoop. */
542 if (mad_agent->hi_tid) {
543 mad_agent_priv = container_of(mad_agent,
544 struct ib_mad_agent_private,
545 agent);
546 unregister_mad_agent(mad_agent_priv);
547 } else {
548 mad_snoop_priv = container_of(mad_agent,
549 struct ib_mad_snoop_private,
550 agent);
551 unregister_mad_snoop(mad_snoop_priv);
552 }
553 return 0;
554}
555EXPORT_SYMBOL(ib_unregister_mad_agent);
556
557static void dequeue_mad(struct ib_mad_list_head *mad_list)
558{
559 struct ib_mad_queue *mad_queue;
560 unsigned long flags;
561
562 BUG_ON(!mad_list->mad_queue);
563 mad_queue = mad_list->mad_queue;
564 spin_lock_irqsave(&mad_queue->lock, flags);
565 list_del(&mad_list->list);
566 mad_queue->count--;
567 spin_unlock_irqrestore(&mad_queue->lock, flags);
568}
569
570static void snoop_send(struct ib_mad_qp_info *qp_info,
571 struct ib_send_wr *send_wr,
572 struct ib_mad_send_wc *mad_send_wc,
573 int mad_snoop_flags)
574{
575 struct ib_mad_snoop_private *mad_snoop_priv;
576 unsigned long flags;
577 int i;
578
579 spin_lock_irqsave(&qp_info->snoop_lock, flags);
580 for (i = 0; i < qp_info->snoop_table_size; i++) {
581 mad_snoop_priv = qp_info->snoop_table[i];
582 if (!mad_snoop_priv ||
583 !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
584 continue;
585
586 atomic_inc(&mad_snoop_priv->refcount);
587 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
588 mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
589 send_wr, mad_send_wc);
590 if (atomic_dec_and_test(&mad_snoop_priv->refcount))
591 wake_up(&mad_snoop_priv->wait);
592 spin_lock_irqsave(&qp_info->snoop_lock, flags);
593 }
594 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
595}
596
597static void snoop_recv(struct ib_mad_qp_info *qp_info,
598 struct ib_mad_recv_wc *mad_recv_wc,
599 int mad_snoop_flags)
600{
601 struct ib_mad_snoop_private *mad_snoop_priv;
602 unsigned long flags;
603 int i;
604
605 spin_lock_irqsave(&qp_info->snoop_lock, flags);
606 for (i = 0; i < qp_info->snoop_table_size; i++) {
607 mad_snoop_priv = qp_info->snoop_table[i];
608 if (!mad_snoop_priv ||
609 !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
610 continue;
611
612 atomic_inc(&mad_snoop_priv->refcount);
613 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
614 mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
615 mad_recv_wc);
616 if (atomic_dec_and_test(&mad_snoop_priv->refcount))
617 wake_up(&mad_snoop_priv->wait);
618 spin_lock_irqsave(&qp_info->snoop_lock, flags);
619 }
620 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
621}
622
623static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
624 struct ib_wc *wc)
625{
626 memset(wc, 0, sizeof *wc);
627 wc->wr_id = wr_id;
628 wc->status = IB_WC_SUCCESS;
629 wc->opcode = IB_WC_RECV;
630 wc->pkey_index = pkey_index;
631 wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
632 wc->src_qp = IB_QP0;
633 wc->qp_num = IB_QP0;
634 wc->slid = slid;
635 wc->sl = 0;
636 wc->dlid_path_bits = 0;
637 wc->port_num = port_num;
638}
639
640/*
641 * Return 0 if SMP is to be sent
642 * Return 1 if SMP was consumed locally (whether or not solicited)
643 * Return < 0 if error
644 */
645static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
646 struct ib_smp *smp,
647 struct ib_send_wr *send_wr)
648{
649 int ret, solicited;
650 unsigned long flags;
651 struct ib_mad_local_private *local;
652 struct ib_mad_private *mad_priv;
653 struct ib_mad_port_private *port_priv;
654 struct ib_mad_agent_private *recv_mad_agent = NULL;
655 struct ib_device *device = mad_agent_priv->agent.device;
656 u8 port_num = mad_agent_priv->agent.port_num;
657 struct ib_wc mad_wc;
658
659 if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
660 ret = -EINVAL;
661 printk(KERN_ERR PFX "Invalid directed route\n");
662 goto out;
663 }
664 /* Check to post send on QP or process locally */
665 ret = smi_check_local_dr_smp(smp, device, port_num);
666 if (!ret || !device->process_mad)
667 goto out;
668
669 local = kmalloc(sizeof *local, GFP_ATOMIC);
670 if (!local) {
671 ret = -ENOMEM;
672 printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
673 goto out;
674 }
675 local->mad_priv = NULL;
676 local->recv_mad_agent = NULL;
677 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
678 if (!mad_priv) {
679 ret = -ENOMEM;
680 printk(KERN_ERR PFX "No memory for local response MAD\n");
681 kfree(local);
682 goto out;
683 }
684
685 build_smp_wc(send_wr->wr_id, smp->dr_slid, send_wr->wr.ud.pkey_index,
686 send_wr->wr.ud.port_num, &mad_wc);
687
688 /* No GRH for DR SMP */
689 ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
690 (struct ib_mad *)smp,
691 (struct ib_mad *)&mad_priv->mad);
692 switch (ret)
693 {
694 case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
695 /*
696 * See if response is solicited and
697 * there is a recv handler
698 */
699 if (solicited_mad(&mad_priv->mad.mad) &&
700 mad_agent_priv->agent.recv_handler) {
701 local->mad_priv = mad_priv;
702 local->recv_mad_agent = mad_agent_priv;
703 /*
704 * Reference MAD agent until receive
705 * side of local completion handled
706 */
707 atomic_inc(&mad_agent_priv->refcount);
708 } else
709 kmem_cache_free(ib_mad_cache, mad_priv);
710 break;
711 case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
712 kmem_cache_free(ib_mad_cache, mad_priv);
713 break;
714 case IB_MAD_RESULT_SUCCESS:
715 /* Treat like an incoming receive MAD */
716 solicited = solicited_mad(&mad_priv->mad.mad);
717 port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
718 mad_agent_priv->agent.port_num);
719 if (port_priv) {
720 mad_priv->mad.mad.mad_hdr.tid =
721 ((struct ib_mad *)smp)->mad_hdr.tid;
722 recv_mad_agent = find_mad_agent(port_priv,
723 &mad_priv->mad.mad,
724 solicited);
725 }
726 if (!port_priv || !recv_mad_agent) {
727 kmem_cache_free(ib_mad_cache, mad_priv);
728 kfree(local);
729 ret = 0;
730 goto out;
731 }
732 local->mad_priv = mad_priv;
733 local->recv_mad_agent = recv_mad_agent;
734 break;
735 default:
736 kmem_cache_free(ib_mad_cache, mad_priv);
737 kfree(local);
738 ret = -EINVAL;
739 goto out;
740 }
741
742 local->send_wr = *send_wr;
743 local->send_wr.sg_list = local->sg_list;
744 memcpy(local->sg_list, send_wr->sg_list,
745 sizeof *send_wr->sg_list * send_wr->num_sge);
746 local->send_wr.next = NULL;
747 local->tid = send_wr->wr.ud.mad_hdr->tid;
748 local->wr_id = send_wr->wr_id;
749 /* Reference MAD agent until send side of local completion handled */
750 atomic_inc(&mad_agent_priv->refcount);
751 /* Queue local completion to local list */
752 spin_lock_irqsave(&mad_agent_priv->lock, flags);
753 list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
754 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
755 queue_work(mad_agent_priv->qp_info->port_priv->wq,
756 &mad_agent_priv->local_work);
757 ret = 1;
758out:
759 return ret;
760}
761
762static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv,
763 struct ib_mad_send_wr_private *mad_send_wr)
764{
765 struct ib_mad_qp_info *qp_info;
766 struct ib_send_wr *bad_send_wr;
767 unsigned long flags;
768 int ret;
769
770 /* Replace user's WR ID with our own to find WR upon completion */
771 qp_info = mad_agent_priv->qp_info;
772 mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id;
773 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
774 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
775
776 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
777 if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) {
778 list_add_tail(&mad_send_wr->mad_list.list,
779 &qp_info->send_queue.list);
780 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
781 ret = ib_post_send(mad_agent_priv->agent.qp,
782 &mad_send_wr->send_wr, &bad_send_wr);
783 if (ret) {
784 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
785 dequeue_mad(&mad_send_wr->mad_list);
786 }
787 } else {
788 list_add_tail(&mad_send_wr->mad_list.list,
789 &qp_info->overflow_list);
790 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
791 ret = 0;
792 }
793 return ret;
794}
795
796/*
797 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
798 * with the registered client
799 */
800int ib_post_send_mad(struct ib_mad_agent *mad_agent,
801 struct ib_send_wr *send_wr,
802 struct ib_send_wr **bad_send_wr)
803{
804 int ret = -EINVAL;
805 struct ib_mad_agent_private *mad_agent_priv;
806
807 /* Validate supplied parameters */
808 if (!bad_send_wr)
809 goto error1;
810
811 if (!mad_agent || !send_wr)
812 goto error2;
813
814 if (!mad_agent->send_handler)
815 goto error2;
816
817 mad_agent_priv = container_of(mad_agent,
818 struct ib_mad_agent_private,
819 agent);
820
821 /* Walk list of send WRs and post each on send list */
822 while (send_wr) {
823 unsigned long flags;
824 struct ib_send_wr *next_send_wr;
825 struct ib_mad_send_wr_private *mad_send_wr;
826 struct ib_smp *smp;
827
828 /* Validate more parameters */
829 if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
830 goto error2;
831
832 if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
833 goto error2;
834
835 if (!send_wr->wr.ud.mad_hdr) {
836 printk(KERN_ERR PFX "MAD header must be supplied "
837 "in WR %p\n", send_wr);
838 goto error2;
839 }
840
841 /*
842 * Save pointer to next work request to post in case the
843 * current one completes, and the user modifies the work
844 * request associated with the completion
845 */
846 next_send_wr = (struct ib_send_wr *)send_wr->next;
847
848 smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
849 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
850 ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
851 send_wr);
852 if (ret < 0) /* error */
853 goto error2;
854 else if (ret == 1) /* locally consumed */
855 goto next;
856 }
857
858 /* Allocate MAD send WR tracking structure */
859 mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
860 if (!mad_send_wr) {
861 printk(KERN_ERR PFX "No memory for "
862 "ib_mad_send_wr_private\n");
863 ret = -ENOMEM;
864 goto error2;
865 }
866
867 mad_send_wr->send_wr = *send_wr;
868 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
869 memcpy(mad_send_wr->sg_list, send_wr->sg_list,
870 sizeof *send_wr->sg_list * send_wr->num_sge);
871 mad_send_wr->send_wr.next = NULL;
872 mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
873 mad_send_wr->agent = mad_agent;
874 /* Timeout will be updated after send completes */
875 mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
876 ud.timeout_ms);
877 mad_send_wr->retry = 0;
878 /* One reference for each work request to QP + response */
879 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
880 mad_send_wr->status = IB_WC_SUCCESS;
881
882 /* Reference MAD agent until send completes */
883 atomic_inc(&mad_agent_priv->refcount);
884 spin_lock_irqsave(&mad_agent_priv->lock, flags);
885 list_add_tail(&mad_send_wr->agent_list,
886 &mad_agent_priv->send_list);
887 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
888
889 ret = ib_send_mad(mad_agent_priv, mad_send_wr);
890 if (ret) {
891 /* Fail send request */
892 spin_lock_irqsave(&mad_agent_priv->lock, flags);
893 list_del(&mad_send_wr->agent_list);
894 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
895 atomic_dec(&mad_agent_priv->refcount);
896 goto error2;
897 }
898next:
899 send_wr = next_send_wr;
900 }
901 return 0;
902
903error2:
904 *bad_send_wr = send_wr;
905error1:
906 return ret;
907}
908EXPORT_SYMBOL(ib_post_send_mad);
909
910/*
911 * ib_free_recv_mad - Returns data buffers used to receive
912 * a MAD to the access layer
913 */
914void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
915{
916 struct ib_mad_recv_buf *entry;
917 struct ib_mad_private_header *mad_priv_hdr;
918 struct ib_mad_private *priv;
919
920 mad_priv_hdr = container_of(mad_recv_wc,
921 struct ib_mad_private_header,
922 recv_wc);
923 priv = container_of(mad_priv_hdr, struct ib_mad_private, header);
924
925 /*
926 * Walk receive buffer list associated with this WC
927 * No need to remove them from list of receive buffers
928 */
929 list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) {
930 /* Free previous receive buffer */
931 kmem_cache_free(ib_mad_cache, priv);
932 mad_priv_hdr = container_of(mad_recv_wc,
933 struct ib_mad_private_header,
934 recv_wc);
935 priv = container_of(mad_priv_hdr, struct ib_mad_private,
936 header);
937 }
938
939 /* Free last buffer */
940 kmem_cache_free(ib_mad_cache, priv);
941}
942EXPORT_SYMBOL(ib_free_recv_mad);
943
944void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc,
945 void *buf)
946{
947 printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n");
948}
949EXPORT_SYMBOL(ib_coalesce_recv_mad);
950
951struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
952 u8 rmpp_version,
953 ib_mad_send_handler send_handler,
954 ib_mad_recv_handler recv_handler,
955 void *context)
956{
957 return ERR_PTR(-EINVAL); /* XXX: for now */
958}
959EXPORT_SYMBOL(ib_redirect_mad_qp);
960
961int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
962 struct ib_wc *wc)
963{
964 printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
965 return 0;
966}
967EXPORT_SYMBOL(ib_process_mad_wc);
968
969static int method_in_use(struct ib_mad_mgmt_method_table **method,
970 struct ib_mad_reg_req *mad_reg_req)
971{
972 int i;
973
974 for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
975 i < IB_MGMT_MAX_METHODS;
976 i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
977 1+i)) {
978 if ((*method)->agent[i]) {
979 printk(KERN_ERR PFX "Method %d already in use\n", i);
980 return -EINVAL;
981 }
982 }
983 return 0;
984}
985
986static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
987{
988 /* Allocate management method table */
989 *method = kmalloc(sizeof **method, GFP_ATOMIC);
990 if (!*method) {
991 printk(KERN_ERR PFX "No memory for "
992 "ib_mad_mgmt_method_table\n");
993 return -ENOMEM;
994 }
995 /* Clear management method table */
996 memset(*method, 0, sizeof **method);
997
998 return 0;
999}
1000
1001/*
1002 * Check to see if there are any methods still in use
1003 */
1004static int check_method_table(struct ib_mad_mgmt_method_table *method)
1005{
1006 int i;
1007
1008 for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1009 if (method->agent[i])
1010 return 1;
1011 return 0;
1012}
1013
1014/*
1015 * Check to see if there are any method tables for this class still in use
1016 */
1017static int check_class_table(struct ib_mad_mgmt_class_table *class)
1018{
1019 int i;
1020
1021 for (i = 0; i < MAX_MGMT_CLASS; i++)
1022 if (class->method_table[i])
1023 return 1;
1024 return 0;
1025}
1026
1027static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1028{
1029 int i;
1030
1031 for (i = 0; i < MAX_MGMT_OUI; i++)
1032 if (vendor_class->method_table[i])
1033 return 1;
1034 return 0;
1035}
1036
1037static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1038 char *oui)
1039{
1040 int i;
1041
1042 for (i = 0; i < MAX_MGMT_OUI; i++)
1043 /* Is there matching OUI for this vendor class ? */
1044 if (!memcmp(vendor_class->oui[i], oui, 3))
1045 return i;
1046
1047 return -1;
1048}
1049
1050static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1051{
1052 int i;
1053
1054 for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1055 if (vendor->vendor_class[i])
1056 return 1;
1057
1058 return 0;
1059}
1060
1061static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1062 struct ib_mad_agent_private *agent)
1063{
1064 int i;
1065
1066 /* Remove any methods for this mad agent */
1067 for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1068 if (method->agent[i] == agent) {
1069 method->agent[i] = NULL;
1070 }
1071 }
1072}
1073
1074static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1075 struct ib_mad_agent_private *agent_priv,
1076 u8 mgmt_class)
1077{
1078 struct ib_mad_port_private *port_priv;
1079 struct ib_mad_mgmt_class_table **class;
1080 struct ib_mad_mgmt_method_table **method;
1081 int i, ret;
1082
1083 port_priv = agent_priv->qp_info->port_priv;
1084 class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1085 if (!*class) {
1086 /* Allocate management class table for "new" class version */
1087 *class = kmalloc(sizeof **class, GFP_ATOMIC);
1088 if (!*class) {
1089 printk(KERN_ERR PFX "No memory for "
1090 "ib_mad_mgmt_class_table\n");
1091 ret = -ENOMEM;
1092 goto error1;
1093 }
1094 /* Clear management class table */
1095 memset(*class, 0, sizeof(**class));
1096 /* Allocate method table for this management class */
1097 method = &(*class)->method_table[mgmt_class];
1098 if ((ret = allocate_method_table(method)))
1099 goto error2;
1100 } else {
1101 method = &(*class)->method_table[mgmt_class];
1102 if (!*method) {
1103 /* Allocate method table for this management class */
1104 if ((ret = allocate_method_table(method)))
1105 goto error1;
1106 }
1107 }
1108
1109 /* Now, make sure methods are not already in use */
1110 if (method_in_use(method, mad_reg_req))
1111 goto error3;
1112
1113 /* Finally, add in methods being registered */
1114 for (i = find_first_bit(mad_reg_req->method_mask,
1115 IB_MGMT_MAX_METHODS);
1116 i < IB_MGMT_MAX_METHODS;
1117 i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1118 1+i)) {
1119 (*method)->agent[i] = agent_priv;
1120 }
1121 return 0;
1122
1123error3:
1124 /* Remove any methods for this mad agent */
1125 remove_methods_mad_agent(*method, agent_priv);
1126 /* Now, check to see if there are any methods in use */
1127 if (!check_method_table(*method)) {
1128 /* If not, release management method table */
1129 kfree(*method);
1130 *method = NULL;
1131 }
1132 ret = -EINVAL;
1133 goto error1;
1134error2:
1135 kfree(*class);
1136 *class = NULL;
1137error1:
1138 return ret;
1139}
1140
1141static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1142 struct ib_mad_agent_private *agent_priv)
1143{
1144 struct ib_mad_port_private *port_priv;
1145 struct ib_mad_mgmt_vendor_class_table **vendor_table;
1146 struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1147 struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1148 struct ib_mad_mgmt_method_table **method;
1149 int i, ret = -ENOMEM;
1150 u8 vclass;
1151
1152 /* "New" vendor (with OUI) class */
1153 vclass = vendor_class_index(mad_reg_req->mgmt_class);
1154 port_priv = agent_priv->qp_info->port_priv;
1155 vendor_table = &port_priv->version[
1156 mad_reg_req->mgmt_class_version].vendor;
1157 if (!*vendor_table) {
1158 /* Allocate mgmt vendor class table for "new" class version */
1159 vendor = kmalloc(sizeof *vendor, GFP_ATOMIC);
1160 if (!vendor) {
1161 printk(KERN_ERR PFX "No memory for "
1162 "ib_mad_mgmt_vendor_class_table\n");
1163 goto error1;
1164 }
1165 /* Clear management vendor class table */
1166 memset(vendor, 0, sizeof(*vendor));
1167 *vendor_table = vendor;
1168 }
1169 if (!(*vendor_table)->vendor_class[vclass]) {
1170 /* Allocate table for this management vendor class */
1171 vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC);
1172 if (!vendor_class) {
1173 printk(KERN_ERR PFX "No memory for "
1174 "ib_mad_mgmt_vendor_class\n");
1175 goto error2;
1176 }
1177 memset(vendor_class, 0, sizeof(*vendor_class));
1178 (*vendor_table)->vendor_class[vclass] = vendor_class;
1179 }
1180 for (i = 0; i < MAX_MGMT_OUI; i++) {
1181 /* Is there matching OUI for this vendor class ? */
1182 if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1183 mad_reg_req->oui, 3)) {
1184 method = &(*vendor_table)->vendor_class[
1185 vclass]->method_table[i];
1186 BUG_ON(!*method);
1187 goto check_in_use;
1188 }
1189 }
1190 for (i = 0; i < MAX_MGMT_OUI; i++) {
1191 /* OUI slot available ? */
1192 if (!is_vendor_oui((*vendor_table)->vendor_class[
1193 vclass]->oui[i])) {
1194 method = &(*vendor_table)->vendor_class[
1195 vclass]->method_table[i];
1196 BUG_ON(*method);
1197 /* Allocate method table for this OUI */
1198 if ((ret = allocate_method_table(method)))
1199 goto error3;
1200 memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1201 mad_reg_req->oui, 3);
1202 goto check_in_use;
1203 }
1204 }
1205 printk(KERN_ERR PFX "All OUI slots in use\n");
1206 goto error3;
1207
1208check_in_use:
1209 /* Now, make sure methods are not already in use */
1210 if (method_in_use(method, mad_reg_req))
1211 goto error4;
1212
1213 /* Finally, add in methods being registered */
1214 for (i = find_first_bit(mad_reg_req->method_mask,
1215 IB_MGMT_MAX_METHODS);
1216 i < IB_MGMT_MAX_METHODS;
1217 i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1218 1+i)) {
1219 (*method)->agent[i] = agent_priv;
1220 }
1221 return 0;
1222
1223error4:
1224 /* Remove any methods for this mad agent */
1225 remove_methods_mad_agent(*method, agent_priv);
1226 /* Now, check to see if there are any methods in use */
1227 if (!check_method_table(*method)) {
1228 /* If not, release management method table */
1229 kfree(*method);
1230 *method = NULL;
1231 }
1232 ret = -EINVAL;
1233error3:
1234 if (vendor_class) {
1235 (*vendor_table)->vendor_class[vclass] = NULL;
1236 kfree(vendor_class);
1237 }
1238error2:
1239 if (vendor) {
1240 *vendor_table = NULL;
1241 kfree(vendor);
1242 }
1243error1:
1244 return ret;
1245}
1246
1247static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1248{
1249 struct ib_mad_port_private *port_priv;
1250 struct ib_mad_mgmt_class_table *class;
1251 struct ib_mad_mgmt_method_table *method;
1252 struct ib_mad_mgmt_vendor_class_table *vendor;
1253 struct ib_mad_mgmt_vendor_class *vendor_class;
1254 int index;
1255 u8 mgmt_class;
1256
1257 /*
1258 * Was MAD registration request supplied
1259 * with original registration ?
1260 */
1261 if (!agent_priv->reg_req) {
1262 goto out;
1263 }
1264
1265 port_priv = agent_priv->qp_info->port_priv;
1266 mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1267 class = port_priv->version[
1268 agent_priv->reg_req->mgmt_class_version].class;
1269 if (!class)
1270 goto vendor_check;
1271
1272 method = class->method_table[mgmt_class];
1273 if (method) {
1274 /* Remove any methods for this mad agent */
1275 remove_methods_mad_agent(method, agent_priv);
1276 /* Now, check to see if there are any methods still in use */
1277 if (!check_method_table(method)) {
1278 /* If not, release management method table */
1279 kfree(method);
1280 class->method_table[mgmt_class] = NULL;
1281 /* Any management classes left ? */
1282 if (!check_class_table(class)) {
1283 /* If not, release management class table */
1284 kfree(class);
1285 port_priv->version[
1286 agent_priv->reg_req->
1287 mgmt_class_version].class = NULL;
1288 }
1289 }
1290 }
1291
1292vendor_check:
1293 if (!is_vendor_class(mgmt_class))
1294 goto out;
1295
1296 /* normalize mgmt_class to vendor range 2 */
1297 mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1298 vendor = port_priv->version[
1299 agent_priv->reg_req->mgmt_class_version].vendor;
1300
1301 if (!vendor)
1302 goto out;
1303
1304 vendor_class = vendor->vendor_class[mgmt_class];
1305 if (vendor_class) {
1306 index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1307 if (index < 0)
1308 goto out;
1309 method = vendor_class->method_table[index];
1310 if (method) {
1311 /* Remove any methods for this mad agent */
1312 remove_methods_mad_agent(method, agent_priv);
1313 /*
1314 * Now, check to see if there are
1315 * any methods still in use
1316 */
1317 if (!check_method_table(method)) {
1318 /* If not, release management method table */
1319 kfree(method);
1320 vendor_class->method_table[index] = NULL;
1321 memset(vendor_class->oui[index], 0, 3);
1322 /* Any OUIs left ? */
1323 if (!check_vendor_class(vendor_class)) {
1324 /* If not, release vendor class table */
1325 kfree(vendor_class);
1326 vendor->vendor_class[mgmt_class] = NULL;
1327 /* Any other vendor classes left ? */
1328 if (!check_vendor_table(vendor)) {
1329 kfree(vendor);
1330 port_priv->version[
1331 agent_priv->reg_req->
1332 mgmt_class_version].
1333 vendor = NULL;
1334 }
1335 }
1336 }
1337 }
1338 }
1339
1340out:
1341 return;
1342}
1343
1344static int response_mad(struct ib_mad *mad)
1345{
1346 /* Trap represses are responses although response bit is reset */
1347 return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
1348 (mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
1349}
1350
1351static int solicited_mad(struct ib_mad *mad)
1352{
1353 /* CM MADs are never solicited */
1354 if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) {
1355 return 0;
1356 }
1357
1358 /* XXX: Determine whether MAD is using RMPP */
1359
1360 /* Not using RMPP */
1361 /* Is this MAD a response to a previous MAD ? */
1362 return response_mad(mad);
1363}
1364
1365static struct ib_mad_agent_private *
1366find_mad_agent(struct ib_mad_port_private *port_priv,
1367 struct ib_mad *mad,
1368 int solicited)
1369{
1370 struct ib_mad_agent_private *mad_agent = NULL;
1371 unsigned long flags;
1372
1373 spin_lock_irqsave(&port_priv->reg_lock, flags);
1374
1375 /*
1376 * Whether MAD was solicited determines type of routing to
1377 * MAD client.
1378 */
1379 if (solicited) {
1380 u32 hi_tid;
1381 struct ib_mad_agent_private *entry;
1382
1383 /*
1384 * Routing is based on high 32 bits of transaction ID
1385 * of MAD.
1386 */
1387 hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1388 list_for_each_entry(entry, &port_priv->agent_list,
1389 agent_list) {
1390 if (entry->agent.hi_tid == hi_tid) {
1391 mad_agent = entry;
1392 break;
1393 }
1394 }
1395 } else {
1396 struct ib_mad_mgmt_class_table *class;
1397 struct ib_mad_mgmt_method_table *method;
1398 struct ib_mad_mgmt_vendor_class_table *vendor;
1399 struct ib_mad_mgmt_vendor_class *vendor_class;
1400 struct ib_vendor_mad *vendor_mad;
1401 int index;
1402
1403 /*
1404 * Routing is based on version, class, and method
1405 * For "newer" vendor MADs, also based on OUI
1406 */
1407 if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
1408 goto out;
1409 if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
1410 class = port_priv->version[
1411 mad->mad_hdr.class_version].class;
1412 if (!class)
1413 goto out;
1414 method = class->method_table[convert_mgmt_class(
1415 mad->mad_hdr.mgmt_class)];
1416 if (method)
1417 mad_agent = method->agent[mad->mad_hdr.method &
1418 ~IB_MGMT_METHOD_RESP];
1419 } else {
1420 vendor = port_priv->version[
1421 mad->mad_hdr.class_version].vendor;
1422 if (!vendor)
1423 goto out;
1424 vendor_class = vendor->vendor_class[vendor_class_index(
1425 mad->mad_hdr.mgmt_class)];
1426 if (!vendor_class)
1427 goto out;
1428 /* Find matching OUI */
1429 vendor_mad = (struct ib_vendor_mad *)mad;
1430 index = find_vendor_oui(vendor_class, vendor_mad->oui);
1431 if (index == -1)
1432 goto out;
1433 method = vendor_class->method_table[index];
1434 if (method) {
1435 mad_agent = method->agent[mad->mad_hdr.method &
1436 ~IB_MGMT_METHOD_RESP];
1437 }
1438 }
1439 }
1440
1441 if (mad_agent) {
1442 if (mad_agent->agent.recv_handler)
1443 atomic_inc(&mad_agent->refcount);
1444 else {
1445 printk(KERN_NOTICE PFX "No receive handler for client "
1446 "%p on port %d\n",
1447 &mad_agent->agent, port_priv->port_num);
1448 mad_agent = NULL;
1449 }
1450 }
1451out:
1452 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1453
1454 return mad_agent;
1455}
1456
1457static int validate_mad(struct ib_mad *mad, u32 qp_num)
1458{
1459 int valid = 0;
1460
1461 /* Make sure MAD base version is understood */
1462 if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1463 printk(KERN_ERR PFX "MAD received with unsupported base "
1464 "version %d\n", mad->mad_hdr.base_version);
1465 goto out;
1466 }
1467
1468 /* Filter SMI packets sent to other than QP0 */
1469 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1470 (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1471 if (qp_num == 0)
1472 valid = 1;
1473 } else {
1474 /* Filter GSI packets sent to QP0 */
1475 if (qp_num != 0)
1476 valid = 1;
1477 }
1478
1479out:
1480 return valid;
1481}
1482
1483/*
1484 * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet
1485 */
1486static struct ib_mad_private *
1487reassemble_recv(struct ib_mad_agent_private *mad_agent_priv,
1488 struct ib_mad_private *recv)
1489{
1490 /* Until we have RMPP, all receives are reassembled!... */
1491 INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list);
1492 return recv;
1493}
1494
1495static struct ib_mad_send_wr_private*
1496find_send_req(struct ib_mad_agent_private *mad_agent_priv,
1497 u64 tid)
1498{
1499 struct ib_mad_send_wr_private *mad_send_wr;
1500
1501 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
1502 agent_list) {
1503 if (mad_send_wr->tid == tid)
1504 return mad_send_wr;
1505 }
1506
1507 /*
1508 * It's possible to receive the response before we've
1509 * been notified that the send has completed
1510 */
1511 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1512 agent_list) {
1513 if (mad_send_wr->tid == tid && mad_send_wr->timeout) {
1514 /* Verify request has not been canceled */
1515 return (mad_send_wr->status == IB_WC_SUCCESS) ?
1516 mad_send_wr : NULL;
1517 }
1518 }
1519 return NULL;
1520}
1521
1522static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1523 struct ib_mad_private *recv,
1524 int solicited)
1525{
1526 struct ib_mad_send_wr_private *mad_send_wr;
1527 struct ib_mad_send_wc mad_send_wc;
1528 unsigned long flags;
1529
1530 /* Fully reassemble receive before processing */
1531 recv = reassemble_recv(mad_agent_priv, recv);
1532 if (!recv) {
1533 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1534 wake_up(&mad_agent_priv->wait);
1535 return;
1536 }
1537
1538 /* Complete corresponding request */
1539 if (solicited) {
1540 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1541 mad_send_wr = find_send_req(mad_agent_priv,
1542 recv->mad.mad.mad_hdr.tid);
1543 if (!mad_send_wr) {
1544 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1545 ib_free_recv_mad(&recv->header.recv_wc);
1546 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1547 wake_up(&mad_agent_priv->wait);
1548 return;
1549 }
1550 /* Timeout = 0 means that we won't wait for a response */
1551 mad_send_wr->timeout = 0;
1552 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1553
1554 /* Defined behavior is to complete response before request */
1555 recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id;
1556 mad_agent_priv->agent.recv_handler(
1557 &mad_agent_priv->agent,
1558 &recv->header.recv_wc);
1559 atomic_dec(&mad_agent_priv->refcount);
1560
1561 mad_send_wc.status = IB_WC_SUCCESS;
1562 mad_send_wc.vendor_err = 0;
1563 mad_send_wc.wr_id = mad_send_wr->wr_id;
1564 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1565 } else {
1566 mad_agent_priv->agent.recv_handler(
1567 &mad_agent_priv->agent,
1568 &recv->header.recv_wc);
1569 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1570 wake_up(&mad_agent_priv->wait);
1571 }
1572}
1573
1574static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1575 struct ib_wc *wc)
1576{
1577 struct ib_mad_qp_info *qp_info;
1578 struct ib_mad_private_header *mad_priv_hdr;
1579 struct ib_mad_private *recv, *response;
1580 struct ib_mad_list_head *mad_list;
1581 struct ib_mad_agent_private *mad_agent;
1582 int solicited;
1583
1584 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1585 if (!response)
1586 printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
1587 "for response buffer\n");
1588
1589 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1590 qp_info = mad_list->mad_queue->qp_info;
1591 dequeue_mad(mad_list);
1592
1593 mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
1594 mad_list);
1595 recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
1596 dma_unmap_single(port_priv->device->dma_device,
1597 pci_unmap_addr(&recv->header, mapping),
1598 sizeof(struct ib_mad_private) -
1599 sizeof(struct ib_mad_private_header),
1600 DMA_FROM_DEVICE);
1601
1602 /* Setup MAD receive work completion from "normal" work completion */
1603 recv->header.recv_wc.wc = wc;
1604 recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
1605 recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
1606 recv->header.recv_wc.recv_buf.grh = &recv->grh;
1607
1608 if (atomic_read(&qp_info->snoop_count))
1609 snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
1610
1611 /* Validate MAD */
1612 if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1613 goto out;
1614
1615 if (recv->mad.mad.mad_hdr.mgmt_class ==
1616 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1617 if (!smi_handle_dr_smp_recv(&recv->mad.smp,
1618 port_priv->device->node_type,
1619 port_priv->port_num,
1620 port_priv->device->phys_port_cnt))
1621 goto out;
1622 if (!smi_check_forward_dr_smp(&recv->mad.smp))
1623 goto local;
1624 if (!smi_handle_dr_smp_send(&recv->mad.smp,
1625 port_priv->device->node_type,
1626 port_priv->port_num))
1627 goto out;
1628 if (!smi_check_local_dr_smp(&recv->mad.smp,
1629 port_priv->device,
1630 port_priv->port_num))
1631 goto out;
1632 }
1633
1634local:
1635 /* Give driver "right of first refusal" on incoming MAD */
1636 if (port_priv->device->process_mad) {
1637 int ret;
1638
1639 if (!response) {
1640 printk(KERN_ERR PFX "No memory for response MAD\n");
1641 /*
1642 * Is it better to assume that
1643 * it wouldn't be processed ?
1644 */
1645 goto out;
1646 }
1647
1648 ret = port_priv->device->process_mad(port_priv->device, 0,
1649 port_priv->port_num,
1650 wc, &recv->grh,
1651 &recv->mad.mad,
1652 &response->mad.mad);
1653 if (ret & IB_MAD_RESULT_SUCCESS) {
1654 if (ret & IB_MAD_RESULT_CONSUMED)
1655 goto out;
1656 if (ret & IB_MAD_RESULT_REPLY) {
1657 /* Send response */
1658 if (!agent_send(response, &recv->grh, wc,
1659 port_priv->device,
1660 port_priv->port_num))
1661 response = NULL;
1662 goto out;
1663 }
1664 }
1665 }
1666
1667 /* Determine corresponding MAD agent for incoming receive MAD */
1668 solicited = solicited_mad(&recv->mad.mad);
1669 mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited);
1670 if (mad_agent) {
1671 ib_mad_complete_recv(mad_agent, recv, solicited);
1672 /*
1673 * recv is freed up in error cases in ib_mad_complete_recv
1674 * or via recv_handler in ib_mad_complete_recv()
1675 */
1676 recv = NULL;
1677 }
1678
1679out:
1680 /* Post another receive request for this QP */
1681 if (response) {
1682 ib_mad_post_receive_mads(qp_info, response);
1683 if (recv)
1684 kmem_cache_free(ib_mad_cache, recv);
1685 } else
1686 ib_mad_post_receive_mads(qp_info, recv);
1687}
1688
1689static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1690{
1691 struct ib_mad_send_wr_private *mad_send_wr;
1692 unsigned long delay;
1693
1694 if (list_empty(&mad_agent_priv->wait_list)) {
1695 cancel_delayed_work(&mad_agent_priv->timed_work);
1696 } else {
1697 mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
1698 struct ib_mad_send_wr_private,
1699 agent_list);
1700
1701 if (time_after(mad_agent_priv->timeout,
1702 mad_send_wr->timeout)) {
1703 mad_agent_priv->timeout = mad_send_wr->timeout;
1704 cancel_delayed_work(&mad_agent_priv->timed_work);
1705 delay = mad_send_wr->timeout - jiffies;
1706 if ((long)delay <= 0)
1707 delay = 1;
1708 queue_delayed_work(mad_agent_priv->qp_info->
1709 port_priv->wq,
1710 &mad_agent_priv->timed_work, delay);
1711 }
1712 }
1713}
1714
1715static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv,
1716 struct ib_mad_send_wr_private *mad_send_wr )
1717{
1718 struct ib_mad_send_wr_private *temp_mad_send_wr;
1719 struct list_head *list_item;
1720 unsigned long delay;
1721
1722 list_del(&mad_send_wr->agent_list);
1723
1724 delay = mad_send_wr->timeout;
1725 mad_send_wr->timeout += jiffies;
1726
1727 list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
1728 temp_mad_send_wr = list_entry(list_item,
1729 struct ib_mad_send_wr_private,
1730 agent_list);
1731 if (time_after(mad_send_wr->timeout,
1732 temp_mad_send_wr->timeout))
1733 break;
1734 }
1735 list_add(&mad_send_wr->agent_list, list_item);
1736
1737 /* Reschedule a work item if we have a shorter timeout */
1738 if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
1739 cancel_delayed_work(&mad_agent_priv->timed_work);
1740 queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
1741 &mad_agent_priv->timed_work, delay);
1742 }
1743}
1744
1745/*
1746 * Process a send work completion
1747 */
1748static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1749 struct ib_mad_send_wc *mad_send_wc)
1750{
1751 struct ib_mad_agent_private *mad_agent_priv;
1752 unsigned long flags;
1753
1754 mad_agent_priv = container_of(mad_send_wr->agent,
1755 struct ib_mad_agent_private, agent);
1756
1757 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1758 if (mad_send_wc->status != IB_WC_SUCCESS &&
1759 mad_send_wr->status == IB_WC_SUCCESS) {
1760 mad_send_wr->status = mad_send_wc->status;
1761 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
1762 }
1763
1764 if (--mad_send_wr->refcount > 0) {
1765 if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
1766 mad_send_wr->status == IB_WC_SUCCESS) {
1767 wait_for_response(mad_agent_priv, mad_send_wr);
1768 }
1769 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1770 return;
1771 }
1772
1773 /* Remove send from MAD agent and notify client of completion */
1774 list_del(&mad_send_wr->agent_list);
1775 adjust_timeout(mad_agent_priv);
1776 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1777
1778 if (mad_send_wr->status != IB_WC_SUCCESS )
1779 mad_send_wc->status = mad_send_wr->status;
1780 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
1781 mad_send_wc);
1782
1783 /* Release reference on agent taken when sending */
1784 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1785 wake_up(&mad_agent_priv->wait);
1786
1787 kfree(mad_send_wr);
1788}
1789
1790static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1791 struct ib_wc *wc)
1792{
1793 struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
1794 struct ib_mad_list_head *mad_list;
1795 struct ib_mad_qp_info *qp_info;
1796 struct ib_mad_queue *send_queue;
1797 struct ib_send_wr *bad_send_wr;
1798 unsigned long flags;
1799 int ret;
1800
1801 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1802 mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
1803 mad_list);
1804 send_queue = mad_list->mad_queue;
1805 qp_info = send_queue->qp_info;
1806
1807retry:
1808 queued_send_wr = NULL;
1809 spin_lock_irqsave(&send_queue->lock, flags);
1810 list_del(&mad_list->list);
1811
1812 /* Move queued send to the send queue */
1813 if (send_queue->count-- > send_queue->max_active) {
1814 mad_list = container_of(qp_info->overflow_list.next,
1815 struct ib_mad_list_head, list);
1816 queued_send_wr = container_of(mad_list,
1817 struct ib_mad_send_wr_private,
1818 mad_list);
1819 list_del(&mad_list->list);
1820 list_add_tail(&mad_list->list, &send_queue->list);
1821 }
1822 spin_unlock_irqrestore(&send_queue->lock, flags);
1823
1824 /* Restore client wr_id in WC and complete send */
1825 wc->wr_id = mad_send_wr->wr_id;
1826 if (atomic_read(&qp_info->snoop_count))
1827 snoop_send(qp_info, &mad_send_wr->send_wr,
1828 (struct ib_mad_send_wc *)wc,
1829 IB_MAD_SNOOP_SEND_COMPLETIONS);
1830 ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
1831
1832 if (queued_send_wr) {
1833 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
1834 &bad_send_wr);
1835 if (ret) {
1836 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
1837 mad_send_wr = queued_send_wr;
1838 wc->status = IB_WC_LOC_QP_OP_ERR;
1839 goto retry;
1840 }
1841 }
1842}
1843
1844static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
1845{
1846 struct ib_mad_send_wr_private *mad_send_wr;
1847 struct ib_mad_list_head *mad_list;
1848 unsigned long flags;
1849
1850 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1851 list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
1852 mad_send_wr = container_of(mad_list,
1853 struct ib_mad_send_wr_private,
1854 mad_list);
1855 mad_send_wr->retry = 1;
1856 }
1857 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1858}
1859
1860static void mad_error_handler(struct ib_mad_port_private *port_priv,
1861 struct ib_wc *wc)
1862{
1863 struct ib_mad_list_head *mad_list;
1864 struct ib_mad_qp_info *qp_info;
1865 struct ib_mad_send_wr_private *mad_send_wr;
1866 int ret;
1867
1868 /* Determine if failure was a send or receive */
1869 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1870 qp_info = mad_list->mad_queue->qp_info;
1871 if (mad_list->mad_queue == &qp_info->recv_queue)
1872 /*
1873 * Receive errors indicate that the QP has entered the error
1874 * state - error handling/shutdown code will cleanup
1875 */
1876 return;
1877
1878 /*
1879 * Send errors will transition the QP to SQE - move
1880 * QP to RTS and repost flushed work requests
1881 */
1882 mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
1883 mad_list);
1884 if (wc->status == IB_WC_WR_FLUSH_ERR) {
1885 if (mad_send_wr->retry) {
1886 /* Repost send */
1887 struct ib_send_wr *bad_send_wr;
1888
1889 mad_send_wr->retry = 0;
1890 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
1891 &bad_send_wr);
1892 if (ret)
1893 ib_mad_send_done_handler(port_priv, wc);
1894 } else
1895 ib_mad_send_done_handler(port_priv, wc);
1896 } else {
1897 struct ib_qp_attr *attr;
1898
1899 /* Transition QP to RTS and fail offending send */
1900 attr = kmalloc(sizeof *attr, GFP_KERNEL);
1901 if (attr) {
1902 attr->qp_state = IB_QPS_RTS;
1903 attr->cur_qp_state = IB_QPS_SQE;
1904 ret = ib_modify_qp(qp_info->qp, attr,
1905 IB_QP_STATE | IB_QP_CUR_STATE);
1906 kfree(attr);
1907 if (ret)
1908 printk(KERN_ERR PFX "mad_error_handler - "
1909 "ib_modify_qp to RTS : %d\n", ret);
1910 else
1911 mark_sends_for_retry(qp_info);
1912 }
1913 ib_mad_send_done_handler(port_priv, wc);
1914 }
1915}
1916
1917/*
1918 * IB MAD completion callback
1919 */
1920static void ib_mad_completion_handler(void *data)
1921{
1922 struct ib_mad_port_private *port_priv;
1923 struct ib_wc wc;
1924
1925 port_priv = (struct ib_mad_port_private *)data;
1926 ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
1927
1928 while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
1929 if (wc.status == IB_WC_SUCCESS) {
1930 switch (wc.opcode) {
1931 case IB_WC_SEND:
1932 ib_mad_send_done_handler(port_priv, &wc);
1933 break;
1934 case IB_WC_RECV:
1935 ib_mad_recv_done_handler(port_priv, &wc);
1936 break;
1937 default:
1938 BUG_ON(1);
1939 break;
1940 }
1941 } else
1942 mad_error_handler(port_priv, &wc);
1943 }
1944}
1945
1946static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
1947{
1948 unsigned long flags;
1949 struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
1950 struct ib_mad_send_wc mad_send_wc;
1951 struct list_head cancel_list;
1952
1953 INIT_LIST_HEAD(&cancel_list);
1954
1955 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1956 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
1957 &mad_agent_priv->send_list, agent_list) {
1958 if (mad_send_wr->status == IB_WC_SUCCESS) {
1959 mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
1960 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
1961 }
1962 }
1963
1964 /* Empty wait list to prevent receives from finding a request */
1965 list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
1966 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1967
1968 /* Report all cancelled requests */
1969 mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
1970 mad_send_wc.vendor_err = 0;
1971
1972 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
1973 &cancel_list, agent_list) {
1974 mad_send_wc.wr_id = mad_send_wr->wr_id;
1975 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
1976 &mad_send_wc);
1977
1978 list_del(&mad_send_wr->agent_list);
1979 kfree(mad_send_wr);
1980 atomic_dec(&mad_agent_priv->refcount);
1981 }
1982}
1983
1984static struct ib_mad_send_wr_private*
1985find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv,
1986 u64 wr_id)
1987{
1988 struct ib_mad_send_wr_private *mad_send_wr;
1989
1990 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
1991 agent_list) {
1992 if (mad_send_wr->wr_id == wr_id)
1993 return mad_send_wr;
1994 }
1995
1996 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1997 agent_list) {
1998 if (mad_send_wr->wr_id == wr_id)
1999 return mad_send_wr;
2000 }
2001 return NULL;
2002}
2003
2004void cancel_sends(void *data)
2005{
2006 struct ib_mad_agent_private *mad_agent_priv;
2007 struct ib_mad_send_wr_private *mad_send_wr;
2008 struct ib_mad_send_wc mad_send_wc;
2009 unsigned long flags;
2010
2011 mad_agent_priv = data;
2012
2013 mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2014 mad_send_wc.vendor_err = 0;
2015
2016 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2017 while (!list_empty(&mad_agent_priv->canceled_list)) {
2018 mad_send_wr = list_entry(mad_agent_priv->canceled_list.next,
2019 struct ib_mad_send_wr_private,
2020 agent_list);
2021
2022 list_del(&mad_send_wr->agent_list);
2023 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2024
2025 mad_send_wc.wr_id = mad_send_wr->wr_id;
2026 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2027 &mad_send_wc);
2028
2029 kfree(mad_send_wr);
2030 if (atomic_dec_and_test(&mad_agent_priv->refcount))
2031 wake_up(&mad_agent_priv->wait);
2032 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2033 }
2034 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2035}
2036
2037void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2038 u64 wr_id)
2039{
2040 struct ib_mad_agent_private *mad_agent_priv;
2041 struct ib_mad_send_wr_private *mad_send_wr;
2042 unsigned long flags;
2043
2044 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2045 agent);
2046 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2047 mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
2048 if (!mad_send_wr) {
2049 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2050 goto out;
2051 }
2052
2053 if (mad_send_wr->status == IB_WC_SUCCESS)
2054 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2055
2056 if (mad_send_wr->refcount != 0) {
2057 mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2058 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2059 goto out;
2060 }
2061
2062 list_del(&mad_send_wr->agent_list);
2063 list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->canceled_list);
2064 adjust_timeout(mad_agent_priv);
2065 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2066
2067 queue_work(mad_agent_priv->qp_info->port_priv->wq,
2068 &mad_agent_priv->canceled_work);
2069out:
2070 return;
2071}
2072EXPORT_SYMBOL(ib_cancel_mad);
2073
2074static void local_completions(void *data)
2075{
2076 struct ib_mad_agent_private *mad_agent_priv;
2077 struct ib_mad_local_private *local;
2078 struct ib_mad_agent_private *recv_mad_agent;
2079 unsigned long flags;
2080 struct ib_wc wc;
2081 struct ib_mad_send_wc mad_send_wc;
2082
2083 mad_agent_priv = (struct ib_mad_agent_private *)data;
2084
2085 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2086 while (!list_empty(&mad_agent_priv->local_list)) {
2087 local = list_entry(mad_agent_priv->local_list.next,
2088 struct ib_mad_local_private,
2089 completion_list);
2090 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2091 if (local->mad_priv) {
2092 recv_mad_agent = local->recv_mad_agent;
2093 if (!recv_mad_agent) {
2094 printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
2095 kmem_cache_free(ib_mad_cache, local->mad_priv);
2096 goto local_send_completion;
2097 }
2098
2099 /*
2100 * Defined behavior is to complete response
2101 * before request
2102 */
2103 build_smp_wc(local->wr_id, IB_LID_PERMISSIVE,
2104 0 /* pkey index */,
2105 recv_mad_agent->agent.port_num, &wc);
2106
2107 local->mad_priv->header.recv_wc.wc = &wc;
2108 local->mad_priv->header.recv_wc.mad_len =
2109 sizeof(struct ib_mad);
2110 INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list);
2111 local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2112 local->mad_priv->header.recv_wc.recv_buf.mad =
2113 &local->mad_priv->mad.mad;
2114 if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2115 snoop_recv(recv_mad_agent->qp_info,
2116 &local->mad_priv->header.recv_wc,
2117 IB_MAD_SNOOP_RECVS);
2118 recv_mad_agent->agent.recv_handler(
2119 &recv_mad_agent->agent,
2120 &local->mad_priv->header.recv_wc);
2121 spin_lock_irqsave(&recv_mad_agent->lock, flags);
2122 atomic_dec(&recv_mad_agent->refcount);
2123 spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2124 }
2125
2126local_send_completion:
2127 /* Complete send */
2128 mad_send_wc.status = IB_WC_SUCCESS;
2129 mad_send_wc.vendor_err = 0;
2130 mad_send_wc.wr_id = local->wr_id;
2131 if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2132 snoop_send(mad_agent_priv->qp_info, &local->send_wr,
2133 &mad_send_wc,
2134 IB_MAD_SNOOP_SEND_COMPLETIONS);
2135 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2136 &mad_send_wc);
2137
2138 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2139 list_del(&local->completion_list);
2140 atomic_dec(&mad_agent_priv->refcount);
2141 kfree(local);
2142 }
2143 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2144}
2145
2146static void timeout_sends(void *data)
2147{
2148 struct ib_mad_agent_private *mad_agent_priv;
2149 struct ib_mad_send_wr_private *mad_send_wr;
2150 struct ib_mad_send_wc mad_send_wc;
2151 unsigned long flags, delay;
2152
2153 mad_agent_priv = (struct ib_mad_agent_private *)data;
2154
2155 mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2156 mad_send_wc.vendor_err = 0;
2157
2158 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2159 while (!list_empty(&mad_agent_priv->wait_list)) {
2160 mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2161 struct ib_mad_send_wr_private,
2162 agent_list);
2163
2164 if (time_after(mad_send_wr->timeout, jiffies)) {
2165 delay = mad_send_wr->timeout - jiffies;
2166 if ((long)delay <= 0)
2167 delay = 1;
2168 queue_delayed_work(mad_agent_priv->qp_info->
2169 port_priv->wq,
2170 &mad_agent_priv->timed_work, delay);
2171 break;
2172 }
2173
2174 list_del(&mad_send_wr->agent_list);
2175 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2176
2177 mad_send_wc.wr_id = mad_send_wr->wr_id;
2178 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2179 &mad_send_wc);
2180
2181 kfree(mad_send_wr);
2182 atomic_dec(&mad_agent_priv->refcount);
2183 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2184 }
2185 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2186}
2187
2188static void ib_mad_thread_completion_handler(struct ib_cq *cq)
2189{
2190 struct ib_mad_port_private *port_priv = cq->cq_context;
2191
2192 queue_work(port_priv->wq, &port_priv->work);
2193}
2194
2195/*
2196 * Allocate receive MADs and post receive WRs for them
2197 */
2198static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2199 struct ib_mad_private *mad)
2200{
2201 unsigned long flags;
2202 int post, ret;
2203 struct ib_mad_private *mad_priv;
2204 struct ib_sge sg_list;
2205 struct ib_recv_wr recv_wr, *bad_recv_wr;
2206 struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2207
2208 /* Initialize common scatter list fields */
2209 sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
2210 sg_list.lkey = (*qp_info->port_priv->mr).lkey;
2211
2212 /* Initialize common receive WR fields */
2213 recv_wr.next = NULL;
2214 recv_wr.sg_list = &sg_list;
2215 recv_wr.num_sge = 1;
2216
2217 do {
2218 /* Allocate and map receive buffer */
2219 if (mad) {
2220 mad_priv = mad;
2221 mad = NULL;
2222 } else {
2223 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2224 if (!mad_priv) {
2225 printk(KERN_ERR PFX "No memory for receive buffer\n");
2226 ret = -ENOMEM;
2227 break;
2228 }
2229 }
2230 sg_list.addr = dma_map_single(qp_info->port_priv->
2231 device->dma_device,
2232 &mad_priv->grh,
2233 sizeof *mad_priv -
2234 sizeof mad_priv->header,
2235 DMA_FROM_DEVICE);
2236 pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
2237 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2238 mad_priv->header.mad_list.mad_queue = recv_queue;
2239
2240 /* Post receive WR */
2241 spin_lock_irqsave(&recv_queue->lock, flags);
2242 post = (++recv_queue->count < recv_queue->max_active);
2243 list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2244 spin_unlock_irqrestore(&recv_queue->lock, flags);
2245 ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2246 if (ret) {
2247 spin_lock_irqsave(&recv_queue->lock, flags);
2248 list_del(&mad_priv->header.mad_list.list);
2249 recv_queue->count--;
2250 spin_unlock_irqrestore(&recv_queue->lock, flags);
2251 dma_unmap_single(qp_info->port_priv->device->dma_device,
2252 pci_unmap_addr(&mad_priv->header,
2253 mapping),
2254 sizeof *mad_priv -
2255 sizeof mad_priv->header,
2256 DMA_FROM_DEVICE);
2257 kmem_cache_free(ib_mad_cache, mad_priv);
2258 printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
2259 break;
2260 }
2261 } while (post);
2262
2263 return ret;
2264}
2265
2266/*
2267 * Return all the posted receive MADs
2268 */
2269static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2270{
2271 struct ib_mad_private_header *mad_priv_hdr;
2272 struct ib_mad_private *recv;
2273 struct ib_mad_list_head *mad_list;
2274
2275 while (!list_empty(&qp_info->recv_queue.list)) {
2276
2277 mad_list = list_entry(qp_info->recv_queue.list.next,
2278 struct ib_mad_list_head, list);
2279 mad_priv_hdr = container_of(mad_list,
2280 struct ib_mad_private_header,
2281 mad_list);
2282 recv = container_of(mad_priv_hdr, struct ib_mad_private,
2283 header);
2284
2285 /* Remove from posted receive MAD list */
2286 list_del(&mad_list->list);
2287
2288 /* Undo PCI mapping */
2289 dma_unmap_single(qp_info->port_priv->device->dma_device,
2290 pci_unmap_addr(&recv->header, mapping),
2291 sizeof(struct ib_mad_private) -
2292 sizeof(struct ib_mad_private_header),
2293 DMA_FROM_DEVICE);
2294 kmem_cache_free(ib_mad_cache, recv);
2295 }
2296
2297 qp_info->recv_queue.count = 0;
2298}
2299
2300/*
2301 * Start the port
2302 */
2303static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2304{
2305 int ret, i;
2306 struct ib_qp_attr *attr;
2307 struct ib_qp *qp;
2308
2309 attr = kmalloc(sizeof *attr, GFP_KERNEL);
2310 if (!attr) {
2311 printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
2312 return -ENOMEM;
2313 }
2314
2315 for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2316 qp = port_priv->qp_info[i].qp;
2317 /*
2318 * PKey index for QP1 is irrelevant but
2319 * one is needed for the Reset to Init transition
2320 */
2321 attr->qp_state = IB_QPS_INIT;
2322 attr->pkey_index = 0;
2323 attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2324 ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2325 IB_QP_PKEY_INDEX | IB_QP_QKEY);
2326 if (ret) {
2327 printk(KERN_ERR PFX "Couldn't change QP%d state to "
2328 "INIT: %d\n", i, ret);
2329 goto out;
2330 }
2331
2332 attr->qp_state = IB_QPS_RTR;
2333 ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2334 if (ret) {
2335 printk(KERN_ERR PFX "Couldn't change QP%d state to "
2336 "RTR: %d\n", i, ret);
2337 goto out;
2338 }
2339
2340 attr->qp_state = IB_QPS_RTS;
2341 attr->sq_psn = IB_MAD_SEND_Q_PSN;
2342 ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2343 if (ret) {
2344 printk(KERN_ERR PFX "Couldn't change QP%d state to "
2345 "RTS: %d\n", i, ret);
2346 goto out;
2347 }
2348 }
2349
2350 ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2351 if (ret) {
2352 printk(KERN_ERR PFX "Failed to request completion "
2353 "notification: %d\n", ret);
2354 goto out;
2355 }
2356
2357 for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2358 ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2359 if (ret) {
2360 printk(KERN_ERR PFX "Couldn't post receive WRs\n");
2361 goto out;
2362 }
2363 }
2364out:
2365 kfree(attr);
2366 return ret;
2367}
2368
2369static void qp_event_handler(struct ib_event *event, void *qp_context)
2370{
2371 struct ib_mad_qp_info *qp_info = qp_context;
2372
2373 /* It's worse than that! He's dead, Jim! */
2374 printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
2375 event->event, qp_info->qp->qp_num);
2376}
2377
2378static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2379 struct ib_mad_queue *mad_queue)
2380{
2381 mad_queue->qp_info = qp_info;
2382 mad_queue->count = 0;
2383 spin_lock_init(&mad_queue->lock);
2384 INIT_LIST_HEAD(&mad_queue->list);
2385}
2386
2387static void init_mad_qp(struct ib_mad_port_private *port_priv,
2388 struct ib_mad_qp_info *qp_info)
2389{
2390 qp_info->port_priv = port_priv;
2391 init_mad_queue(qp_info, &qp_info->send_queue);
2392 init_mad_queue(qp_info, &qp_info->recv_queue);
2393 INIT_LIST_HEAD(&qp_info->overflow_list);
2394 spin_lock_init(&qp_info->snoop_lock);
2395 qp_info->snoop_table = NULL;
2396 qp_info->snoop_table_size = 0;
2397 atomic_set(&qp_info->snoop_count, 0);
2398}
2399
2400static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2401 enum ib_qp_type qp_type)
2402{
2403 struct ib_qp_init_attr qp_init_attr;
2404 int ret;
2405
2406 memset(&qp_init_attr, 0, sizeof qp_init_attr);
2407 qp_init_attr.send_cq = qp_info->port_priv->cq;
2408 qp_init_attr.recv_cq = qp_info->port_priv->cq;
2409 qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2410 qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
2411 qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
2412 qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2413 qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2414 qp_init_attr.qp_type = qp_type;
2415 qp_init_attr.port_num = qp_info->port_priv->port_num;
2416 qp_init_attr.qp_context = qp_info;
2417 qp_init_attr.event_handler = qp_event_handler;
2418 qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2419 if (IS_ERR(qp_info->qp)) {
2420 printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
2421 get_spl_qp_index(qp_type));
2422 ret = PTR_ERR(qp_info->qp);
2423 goto error;
2424 }
2425 /* Use minimum queue sizes unless the CQ is resized */
2426 qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
2427 qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
2428 return 0;
2429
2430error:
2431 return ret;
2432}
2433
2434static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2435{
2436 ib_destroy_qp(qp_info->qp);
2437 if (qp_info->snoop_table)
2438 kfree(qp_info->snoop_table);
2439}
2440
2441/*
2442 * Open the port
2443 * Create the QP, PD, MR, and CQ if needed
2444 */
2445static int ib_mad_port_open(struct ib_device *device,
2446 int port_num)
2447{
2448 int ret, cq_size;
2449 struct ib_mad_port_private *port_priv;
2450 unsigned long flags;
2451 char name[sizeof "ib_mad123"];
2452
2453 /* First, check if port already open at MAD layer */
2454 port_priv = ib_get_mad_port(device, port_num);
2455 if (port_priv) {
2456 printk(KERN_DEBUG PFX "%s port %d already open\n",
2457 device->name, port_num);
2458 return 0;
2459 }
2460
2461 /* Create new device info */
2462 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
2463 if (!port_priv) {
2464 printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
2465 return -ENOMEM;
2466 }
2467 memset(port_priv, 0, sizeof *port_priv);
2468 port_priv->device = device;
2469 port_priv->port_num = port_num;
2470 spin_lock_init(&port_priv->reg_lock);
2471 INIT_LIST_HEAD(&port_priv->agent_list);
2472 init_mad_qp(port_priv, &port_priv->qp_info[0]);
2473 init_mad_qp(port_priv, &port_priv->qp_info[1]);
2474
2475 cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
2476 port_priv->cq = ib_create_cq(port_priv->device,
2477 (ib_comp_handler)
2478 ib_mad_thread_completion_handler,
2479 NULL, port_priv, cq_size);
2480 if (IS_ERR(port_priv->cq)) {
2481 printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
2482 ret = PTR_ERR(port_priv->cq);
2483 goto error3;
2484 }
2485
2486 port_priv->pd = ib_alloc_pd(device);
2487 if (IS_ERR(port_priv->pd)) {
2488 printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
2489 ret = PTR_ERR(port_priv->pd);
2490 goto error4;
2491 }
2492
2493 port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2494 if (IS_ERR(port_priv->mr)) {
2495 printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
2496 ret = PTR_ERR(port_priv->mr);
2497 goto error5;
2498 }
2499
2500 ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2501 if (ret)
2502 goto error6;
2503 ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2504 if (ret)
2505 goto error7;
2506
2507 snprintf(name, sizeof name, "ib_mad%d", port_num);
2508 port_priv->wq = create_singlethread_workqueue(name);
2509 if (!port_priv->wq) {
2510 ret = -ENOMEM;
2511 goto error8;
2512 }
2513 INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
2514
2515 ret = ib_mad_port_start(port_priv);
2516 if (ret) {
2517 printk(KERN_ERR PFX "Couldn't start port\n");
2518 goto error9;
2519 }
2520
2521 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2522 list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2523 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2524 return 0;
2525
2526error9:
2527 destroy_workqueue(port_priv->wq);
2528error8:
2529 destroy_mad_qp(&port_priv->qp_info[1]);
2530error7:
2531 destroy_mad_qp(&port_priv->qp_info[0]);
2532error6:
2533 ib_dereg_mr(port_priv->mr);
2534error5:
2535 ib_dealloc_pd(port_priv->pd);
2536error4:
2537 ib_destroy_cq(port_priv->cq);
2538 cleanup_recv_queue(&port_priv->qp_info[1]);
2539 cleanup_recv_queue(&port_priv->qp_info[0]);
2540error3:
2541 kfree(port_priv);
2542
2543 return ret;
2544}
2545
2546/*
2547 * Close the port
2548 * If there are no classes using the port, free the port
2549 * resources (CQ, MR, PD, QP) and remove the port's info structure
2550 */
2551static int ib_mad_port_close(struct ib_device *device, int port_num)
2552{
2553 struct ib_mad_port_private *port_priv;
2554 unsigned long flags;
2555
2556 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2557 port_priv = __ib_get_mad_port(device, port_num);
2558 if (port_priv == NULL) {
2559 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2560 printk(KERN_ERR PFX "Port %d not found\n", port_num);
2561 return -ENODEV;
2562 }
2563 list_del(&port_priv->port_list);
2564 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2565
2566 /* Stop processing completions. */
2567 flush_workqueue(port_priv->wq);
2568 destroy_workqueue(port_priv->wq);
2569 destroy_mad_qp(&port_priv->qp_info[1]);
2570 destroy_mad_qp(&port_priv->qp_info[0]);
2571 ib_dereg_mr(port_priv->mr);
2572 ib_dealloc_pd(port_priv->pd);
2573 ib_destroy_cq(port_priv->cq);
2574 cleanup_recv_queue(&port_priv->qp_info[1]);
2575 cleanup_recv_queue(&port_priv->qp_info[0]);
2576 /* XXX: Handle deallocation of MAD registration tables */
2577
2578 kfree(port_priv);
2579
2580 return 0;
2581}
2582
2583static void ib_mad_init_device(struct ib_device *device)
2584{
2585 int ret, num_ports, cur_port, i, ret2;
2586
2587 if (device->node_type == IB_NODE_SWITCH) {
2588 num_ports = 1;
2589 cur_port = 0;
2590 } else {
2591 num_ports = device->phys_port_cnt;
2592 cur_port = 1;
2593 }
2594 for (i = 0; i < num_ports; i++, cur_port++) {
2595 ret = ib_mad_port_open(device, cur_port);
2596 if (ret) {
2597 printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2598 device->name, cur_port);
2599 goto error_device_open;
2600 }
2601 ret = ib_agent_port_open(device, cur_port);
2602 if (ret) {
2603 printk(KERN_ERR PFX "Couldn't open %s port %d "
2604 "for agents\n",
2605 device->name, cur_port);
2606 goto error_device_open;
2607 }
2608 }
2609
2610 goto error_device_query;
2611
2612error_device_open:
2613 while (i > 0) {
2614 cur_port--;
2615 ret2 = ib_agent_port_close(device, cur_port);
2616 if (ret2) {
2617 printk(KERN_ERR PFX "Couldn't close %s port %d "
2618 "for agents\n",
2619 device->name, cur_port);
2620 }
2621 ret2 = ib_mad_port_close(device, cur_port);
2622 if (ret2) {
2623 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2624 device->name, cur_port);
2625 }
2626 i--;
2627 }
2628
2629error_device_query:
2630 return;
2631}
2632
2633static void ib_mad_remove_device(struct ib_device *device)
2634{
2635 int ret = 0, i, num_ports, cur_port, ret2;
2636
2637 if (device->node_type == IB_NODE_SWITCH) {
2638 num_ports = 1;
2639 cur_port = 0;
2640 } else {
2641 num_ports = device->phys_port_cnt;
2642 cur_port = 1;
2643 }
2644 for (i = 0; i < num_ports; i++, cur_port++) {
2645 ret2 = ib_agent_port_close(device, cur_port);
2646 if (ret2) {
2647 printk(KERN_ERR PFX "Couldn't close %s port %d "
2648 "for agents\n",
2649 device->name, cur_port);
2650 if (!ret)
2651 ret = ret2;
2652 }
2653 ret2 = ib_mad_port_close(device, cur_port);
2654 if (ret2) {
2655 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2656 device->name, cur_port);
2657 if (!ret)
2658 ret = ret2;
2659 }
2660 }
2661}
2662
2663static struct ib_client mad_client = {
2664 .name = "mad",
2665 .add = ib_mad_init_device,
2666 .remove = ib_mad_remove_device
2667};
2668
2669static int __init ib_mad_init_module(void)
2670{
2671 int ret;
2672
2673 spin_lock_init(&ib_mad_port_list_lock);
2674 spin_lock_init(&ib_agent_port_list_lock);
2675
2676 ib_mad_cache = kmem_cache_create("ib_mad",
2677 sizeof(struct ib_mad_private),
2678 0,
2679 SLAB_HWCACHE_ALIGN,
2680 NULL,
2681 NULL);
2682 if (!ib_mad_cache) {
2683 printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
2684 ret = -ENOMEM;
2685 goto error1;
2686 }
2687
2688 INIT_LIST_HEAD(&ib_mad_port_list);
2689
2690 if (ib_register_client(&mad_client)) {
2691 printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
2692 ret = -EINVAL;
2693 goto error2;
2694 }
2695
2696 return 0;
2697
2698error2:
2699 kmem_cache_destroy(ib_mad_cache);
2700error1:
2701 return ret;
2702}
2703
2704static void __exit ib_mad_cleanup_module(void)
2705{
2706 ib_unregister_client(&mad_client);
2707
2708 if (kmem_cache_destroy(ib_mad_cache)) {
2709 printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n");
2710 }
2711}
2712
2713module_init(ib_mad_init_module);
2714module_exit(ib_mad_cleanup_module);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
new file mode 100644
index 000000000000..4ba9f726bf1d
--- /dev/null
+++ b/drivers/infiniband/core/mad_priv.h
@@ -0,0 +1,199 @@
1/*
2 * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#ifndef __IB_MAD_PRIV_H__
36#define __IB_MAD_PRIV_H__
37
38#include <linux/pci.h>
39#include <linux/kthread.h>
40#include <linux/workqueue.h>
41#include <ib_mad.h>
42#include <ib_smi.h>
43
44
45#define PFX "ib_mad: "
46
47#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
48
49/* QP and CQ parameters */
50#define IB_MAD_QP_SEND_SIZE 128
51#define IB_MAD_QP_RECV_SIZE 512
52#define IB_MAD_SEND_REQ_MAX_SG 2
53#define IB_MAD_RECV_REQ_MAX_SG 1
54
55#define IB_MAD_SEND_Q_PSN 0
56
57/* Registration table sizes */
58#define MAX_MGMT_CLASS 80
59#define MAX_MGMT_VERSION 8
60#define MAX_MGMT_OUI 8
61#define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \
62 IB_MGMT_CLASS_VENDOR_RANGE2_START + 1)
63
64struct ib_mad_list_head {
65 struct list_head list;
66 struct ib_mad_queue *mad_queue;
67};
68
69struct ib_mad_private_header {
70 struct ib_mad_list_head mad_list;
71 struct ib_mad_recv_wc recv_wc;
72 DECLARE_PCI_UNMAP_ADDR(mapping)
73} __attribute__ ((packed));
74
75struct ib_mad_private {
76 struct ib_mad_private_header header;
77 struct ib_grh grh;
78 union {
79 struct ib_mad mad;
80 struct ib_rmpp_mad rmpp_mad;
81 struct ib_smp smp;
82 } mad;
83} __attribute__ ((packed));
84
85struct ib_mad_agent_private {
86 struct list_head agent_list;
87 struct ib_mad_agent agent;
88 struct ib_mad_reg_req *reg_req;
89 struct ib_mad_qp_info *qp_info;
90
91 spinlock_t lock;
92 struct list_head send_list;
93 struct list_head wait_list;
94 struct work_struct timed_work;
95 unsigned long timeout;
96 struct list_head local_list;
97 struct work_struct local_work;
98 struct list_head canceled_list;
99 struct work_struct canceled_work;
100
101 atomic_t refcount;
102 wait_queue_head_t wait;
103 u8 rmpp_version;
104};
105
106struct ib_mad_snoop_private {
107 struct ib_mad_agent agent;
108 struct ib_mad_qp_info *qp_info;
109 int snoop_index;
110 int mad_snoop_flags;
111 atomic_t refcount;
112 wait_queue_head_t wait;
113};
114
115struct ib_mad_send_wr_private {
116 struct ib_mad_list_head mad_list;
117 struct list_head agent_list;
118 struct ib_mad_agent *agent;
119 struct ib_send_wr send_wr;
120 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
121 u64 wr_id; /* client WR ID */
122 u64 tid;
123 unsigned long timeout;
124 int retry;
125 int refcount;
126 enum ib_wc_status status;
127};
128
129struct ib_mad_local_private {
130 struct list_head completion_list;
131 struct ib_mad_private *mad_priv;
132 struct ib_mad_agent_private *recv_mad_agent;
133 struct ib_send_wr send_wr;
134 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
135 u64 wr_id; /* client WR ID */
136 u64 tid;
137};
138
139struct ib_mad_mgmt_method_table {
140 struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS];
141};
142
143struct ib_mad_mgmt_class_table {
144 struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
145};
146
147struct ib_mad_mgmt_vendor_class {
148 u8 oui[MAX_MGMT_OUI][3];
149 struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI];
150};
151
152struct ib_mad_mgmt_vendor_class_table {
153 struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2];
154};
155
156struct ib_mad_mgmt_version_table {
157 struct ib_mad_mgmt_class_table *class;
158 struct ib_mad_mgmt_vendor_class_table *vendor;
159};
160
161struct ib_mad_queue {
162 spinlock_t lock;
163 struct list_head list;
164 int count;
165 int max_active;
166 struct ib_mad_qp_info *qp_info;
167};
168
169struct ib_mad_qp_info {
170 struct ib_mad_port_private *port_priv;
171 struct ib_qp *qp;
172 struct ib_mad_queue send_queue;
173 struct ib_mad_queue recv_queue;
174 struct list_head overflow_list;
175 spinlock_t snoop_lock;
176 struct ib_mad_snoop_private **snoop_table;
177 int snoop_table_size;
178 atomic_t snoop_count;
179};
180
181struct ib_mad_port_private {
182 struct list_head port_list;
183 struct ib_device *device;
184 int port_num;
185 struct ib_cq *cq;
186 struct ib_pd *pd;
187 struct ib_mr *mr;
188
189 spinlock_t reg_lock;
190 struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
191 struct list_head agent_list;
192 struct workqueue_struct *wq;
193 struct work_struct work;
194 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
195};
196
197extern kmem_cache_t *ib_mad_cache;
198
199#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
new file mode 100644
index 000000000000..5f15feffeae2
--- /dev/null
+++ b/drivers/infiniband/core/packer.c
@@ -0,0 +1,201 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <ib_pack.h>
36
37static u64 value_read(int offset, int size, void *structure)
38{
39 switch (size) {
40 case 1: return *(u8 *) (structure + offset);
41 case 2: return be16_to_cpup((__be16 *) (structure + offset));
42 case 4: return be32_to_cpup((__be32 *) (structure + offset));
43 case 8: return be64_to_cpup((__be64 *) (structure + offset));
44 default:
45 printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
46 return 0;
47 }
48}
49
50/**
51 * ib_pack - Pack a structure into a buffer
52 * @desc:Array of structure field descriptions
53 * @desc_len:Number of entries in @desc
54 * @structure:Structure to pack from
55 * @buf:Buffer to pack into
56 *
57 * ib_pack() packs a list of structure fields into a buffer,
58 * controlled by the array of fields in @desc.
59 */
60void ib_pack(const struct ib_field *desc,
61 int desc_len,
62 void *structure,
63 void *buf)
64{
65 int i;
66
67 for (i = 0; i < desc_len; ++i) {
68 if (desc[i].size_bits <= 32) {
69 int shift;
70 u32 val;
71 __be32 mask;
72 __be32 *addr;
73
74 shift = 32 - desc[i].offset_bits - desc[i].size_bits;
75 if (desc[i].struct_size_bytes)
76 val = value_read(desc[i].struct_offset_bytes,
77 desc[i].struct_size_bytes,
78 structure) << shift;
79 else
80 val = 0;
81
82 mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift);
83 addr = (__be32 *) buf + desc[i].offset_words;
84 *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask);
85 } else if (desc[i].size_bits <= 64) {
86 int shift;
87 u64 val;
88 __be64 mask;
89 __be64 *addr;
90
91 shift = 64 - desc[i].offset_bits - desc[i].size_bits;
92 if (desc[i].struct_size_bytes)
93 val = value_read(desc[i].struct_offset_bytes,
94 desc[i].struct_size_bytes,
95 structure) << shift;
96 else
97 val = 0;
98
99 mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift);
100 addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
101 *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
102 } else {
103 if (desc[i].offset_bits % 8 ||
104 desc[i].size_bits % 8) {
105 printk(KERN_WARNING "Structure field %s of size %d "
106 "bits is not byte-aligned\n",
107 desc[i].field_name, desc[i].size_bits);
108 }
109
110 if (desc[i].struct_size_bytes)
111 memcpy(buf + desc[i].offset_words * 4 +
112 desc[i].offset_bits / 8,
113 structure + desc[i].struct_offset_bytes,
114 desc[i].size_bits / 8);
115 else
116 memset(buf + desc[i].offset_words * 4 +
117 desc[i].offset_bits / 8,
118 0,
119 desc[i].size_bits / 8);
120 }
121 }
122}
123EXPORT_SYMBOL(ib_pack);
124
125static void value_write(int offset, int size, u64 val, void *structure)
126{
127 switch (size * 8) {
128 case 8: *( u8 *) (structure + offset) = val; break;
129 case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break;
130 case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
131 case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
132 default:
133 printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
134 }
135}
136
137/**
138 * ib_unpack - Unpack a buffer into a structure
139 * @desc:Array of structure field descriptions
140 * @desc_len:Number of entries in @desc
141 * @buf:Buffer to unpack from
142 * @structure:Structure to unpack into
143 *
144 * ib_pack() unpacks a list of structure fields from a buffer,
145 * controlled by the array of fields in @desc.
146 */
147void ib_unpack(const struct ib_field *desc,
148 int desc_len,
149 void *buf,
150 void *structure)
151{
152 int i;
153
154 for (i = 0; i < desc_len; ++i) {
155 if (!desc[i].struct_size_bytes)
156 continue;
157
158 if (desc[i].size_bits <= 32) {
159 int shift;
160 u32 val;
161 u32 mask;
162 __be32 *addr;
163
164 shift = 32 - desc[i].offset_bits - desc[i].size_bits;
165 mask = ((1ull << desc[i].size_bits) - 1) << shift;
166 addr = (__be32 *) buf + desc[i].offset_words;
167 val = (be32_to_cpup(addr) & mask) >> shift;
168 value_write(desc[i].struct_offset_bytes,
169 desc[i].struct_size_bytes,
170 val,
171 structure);
172 } else if (desc[i].size_bits <= 64) {
173 int shift;
174 u64 val;
175 u64 mask;
176 __be64 *addr;
177
178 shift = 64 - desc[i].offset_bits - desc[i].size_bits;
179 mask = ((1ull << desc[i].size_bits) - 1) << shift;
180 addr = (__be64 *) buf + desc[i].offset_words;
181 val = (be64_to_cpup(addr) & mask) >> shift;
182 value_write(desc[i].struct_offset_bytes,
183 desc[i].struct_size_bytes,
184 val,
185 structure);
186 } else {
187 if (desc[i].offset_bits % 8 ||
188 desc[i].size_bits % 8) {
189 printk(KERN_WARNING "Structure field %s of size %d "
190 "bits is not byte-aligned\n",
191 desc[i].field_name, desc[i].size_bits);
192 }
193
194 memcpy(structure + desc[i].struct_offset_bytes,
195 buf + desc[i].offset_words * 4 +
196 desc[i].offset_bits / 8,
197 desc[i].size_bits / 8);
198 }
199 }
200}
201EXPORT_SYMBOL(ib_unpack);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
new file mode 100644
index 000000000000..d4233ee61c35
--- /dev/null
+++ b/drivers/infiniband/core/sa_query.c
@@ -0,0 +1,866 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/err.h>
38#include <linux/random.h>
39#include <linux/spinlock.h>
40#include <linux/slab.h>
41#include <linux/pci.h>
42#include <linux/dma-mapping.h>
43#include <linux/kref.h>
44#include <linux/idr.h>
45
46#include <ib_pack.h>
47#include <ib_sa.h>
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("InfiniBand subnet administration query support");
51MODULE_LICENSE("Dual BSD/GPL");
52
53/*
54 * These two structures must be packed because they have 64-bit fields
55 * that are only 32-bit aligned. 64-bit architectures will lay them
56 * out wrong otherwise. (And unfortunately they are sent on the wire
57 * so we can't change the layout)
58 */
59struct ib_sa_hdr {
60 u64 sm_key;
61 u16 attr_offset;
62 u16 reserved;
63 ib_sa_comp_mask comp_mask;
64} __attribute__ ((packed));
65
66struct ib_sa_mad {
67 struct ib_mad_hdr mad_hdr;
68 struct ib_rmpp_hdr rmpp_hdr;
69 struct ib_sa_hdr sa_hdr;
70 u8 data[200];
71} __attribute__ ((packed));
72
73struct ib_sa_sm_ah {
74 struct ib_ah *ah;
75 struct kref ref;
76};
77
78struct ib_sa_port {
79 struct ib_mad_agent *agent;
80 struct ib_mr *mr;
81 struct ib_sa_sm_ah *sm_ah;
82 struct work_struct update_task;
83 spinlock_t ah_lock;
84 u8 port_num;
85};
86
87struct ib_sa_device {
88 int start_port, end_port;
89 struct ib_event_handler event_handler;
90 struct ib_sa_port port[0];
91};
92
93struct ib_sa_query {
94 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
95 void (*release)(struct ib_sa_query *);
96 struct ib_sa_port *port;
97 struct ib_sa_mad *mad;
98 struct ib_sa_sm_ah *sm_ah;
99 DECLARE_PCI_UNMAP_ADDR(mapping)
100 int id;
101};
102
103struct ib_sa_path_query {
104 void (*callback)(int, struct ib_sa_path_rec *, void *);
105 void *context;
106 struct ib_sa_query sa_query;
107};
108
109struct ib_sa_mcmember_query {
110 void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
111 void *context;
112 struct ib_sa_query sa_query;
113};
114
115static void ib_sa_add_one(struct ib_device *device);
116static void ib_sa_remove_one(struct ib_device *device);
117
118static struct ib_client sa_client = {
119 .name = "sa",
120 .add = ib_sa_add_one,
121 .remove = ib_sa_remove_one
122};
123
124static spinlock_t idr_lock;
125static DEFINE_IDR(query_idr);
126
127static spinlock_t tid_lock;
128static u32 tid;
129
130enum {
131 IB_SA_ATTR_CLASS_PORTINFO = 0x01,
132 IB_SA_ATTR_NOTICE = 0x02,
133 IB_SA_ATTR_INFORM_INFO = 0x03,
134 IB_SA_ATTR_NODE_REC = 0x11,
135 IB_SA_ATTR_PORT_INFO_REC = 0x12,
136 IB_SA_ATTR_SL2VL_REC = 0x13,
137 IB_SA_ATTR_SWITCH_REC = 0x14,
138 IB_SA_ATTR_LINEAR_FDB_REC = 0x15,
139 IB_SA_ATTR_RANDOM_FDB_REC = 0x16,
140 IB_SA_ATTR_MCAST_FDB_REC = 0x17,
141 IB_SA_ATTR_SM_INFO_REC = 0x18,
142 IB_SA_ATTR_LINK_REC = 0x20,
143 IB_SA_ATTR_GUID_INFO_REC = 0x30,
144 IB_SA_ATTR_SERVICE_REC = 0x31,
145 IB_SA_ATTR_PARTITION_REC = 0x33,
146 IB_SA_ATTR_RANGE_REC = 0x34,
147 IB_SA_ATTR_PATH_REC = 0x35,
148 IB_SA_ATTR_VL_ARB_REC = 0x36,
149 IB_SA_ATTR_MC_GROUP_REC = 0x37,
150 IB_SA_ATTR_MC_MEMBER_REC = 0x38,
151 IB_SA_ATTR_TRACE_REC = 0x39,
152 IB_SA_ATTR_MULTI_PATH_REC = 0x3a,
153 IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b
154};
155
156#define PATH_REC_FIELD(field) \
157 .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
158 .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
159 .field_name = "sa_path_rec:" #field
160
161static const struct ib_field path_rec_table[] = {
162 { RESERVED,
163 .offset_words = 0,
164 .offset_bits = 0,
165 .size_bits = 32 },
166 { RESERVED,
167 .offset_words = 1,
168 .offset_bits = 0,
169 .size_bits = 32 },
170 { PATH_REC_FIELD(dgid),
171 .offset_words = 2,
172 .offset_bits = 0,
173 .size_bits = 128 },
174 { PATH_REC_FIELD(sgid),
175 .offset_words = 6,
176 .offset_bits = 0,
177 .size_bits = 128 },
178 { PATH_REC_FIELD(dlid),
179 .offset_words = 10,
180 .offset_bits = 0,
181 .size_bits = 16 },
182 { PATH_REC_FIELD(slid),
183 .offset_words = 10,
184 .offset_bits = 16,
185 .size_bits = 16 },
186 { PATH_REC_FIELD(raw_traffic),
187 .offset_words = 11,
188 .offset_bits = 0,
189 .size_bits = 1 },
190 { RESERVED,
191 .offset_words = 11,
192 .offset_bits = 1,
193 .size_bits = 3 },
194 { PATH_REC_FIELD(flow_label),
195 .offset_words = 11,
196 .offset_bits = 4,
197 .size_bits = 20 },
198 { PATH_REC_FIELD(hop_limit),
199 .offset_words = 11,
200 .offset_bits = 24,
201 .size_bits = 8 },
202 { PATH_REC_FIELD(traffic_class),
203 .offset_words = 12,
204 .offset_bits = 0,
205 .size_bits = 8 },
206 { PATH_REC_FIELD(reversible),
207 .offset_words = 12,
208 .offset_bits = 8,
209 .size_bits = 1 },
210 { PATH_REC_FIELD(numb_path),
211 .offset_words = 12,
212 .offset_bits = 9,
213 .size_bits = 7 },
214 { PATH_REC_FIELD(pkey),
215 .offset_words = 12,
216 .offset_bits = 16,
217 .size_bits = 16 },
218 { RESERVED,
219 .offset_words = 13,
220 .offset_bits = 0,
221 .size_bits = 12 },
222 { PATH_REC_FIELD(sl),
223 .offset_words = 13,
224 .offset_bits = 12,
225 .size_bits = 4 },
226 { PATH_REC_FIELD(mtu_selector),
227 .offset_words = 13,
228 .offset_bits = 16,
229 .size_bits = 2 },
230 { PATH_REC_FIELD(mtu),
231 .offset_words = 13,
232 .offset_bits = 18,
233 .size_bits = 6 },
234 { PATH_REC_FIELD(rate_selector),
235 .offset_words = 13,
236 .offset_bits = 24,
237 .size_bits = 2 },
238 { PATH_REC_FIELD(rate),
239 .offset_words = 13,
240 .offset_bits = 26,
241 .size_bits = 6 },
242 { PATH_REC_FIELD(packet_life_time_selector),
243 .offset_words = 14,
244 .offset_bits = 0,
245 .size_bits = 2 },
246 { PATH_REC_FIELD(packet_life_time),
247 .offset_words = 14,
248 .offset_bits = 2,
249 .size_bits = 6 },
250 { PATH_REC_FIELD(preference),
251 .offset_words = 14,
252 .offset_bits = 8,
253 .size_bits = 8 },
254 { RESERVED,
255 .offset_words = 14,
256 .offset_bits = 16,
257 .size_bits = 48 },
258};
259
260#define MCMEMBER_REC_FIELD(field) \
261 .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
262 .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
263 .field_name = "sa_mcmember_rec:" #field
264
265static const struct ib_field mcmember_rec_table[] = {
266 { MCMEMBER_REC_FIELD(mgid),
267 .offset_words = 0,
268 .offset_bits = 0,
269 .size_bits = 128 },
270 { MCMEMBER_REC_FIELD(port_gid),
271 .offset_words = 4,
272 .offset_bits = 0,
273 .size_bits = 128 },
274 { MCMEMBER_REC_FIELD(qkey),
275 .offset_words = 8,
276 .offset_bits = 0,
277 .size_bits = 32 },
278 { MCMEMBER_REC_FIELD(mlid),
279 .offset_words = 9,
280 .offset_bits = 0,
281 .size_bits = 16 },
282 { MCMEMBER_REC_FIELD(mtu_selector),
283 .offset_words = 9,
284 .offset_bits = 16,
285 .size_bits = 2 },
286 { MCMEMBER_REC_FIELD(mtu),
287 .offset_words = 9,
288 .offset_bits = 18,
289 .size_bits = 6 },
290 { MCMEMBER_REC_FIELD(traffic_class),
291 .offset_words = 9,
292 .offset_bits = 24,
293 .size_bits = 8 },
294 { MCMEMBER_REC_FIELD(pkey),
295 .offset_words = 10,
296 .offset_bits = 0,
297 .size_bits = 16 },
298 { MCMEMBER_REC_FIELD(rate_selector),
299 .offset_words = 10,
300 .offset_bits = 16,
301 .size_bits = 2 },
302 { MCMEMBER_REC_FIELD(rate),
303 .offset_words = 10,
304 .offset_bits = 18,
305 .size_bits = 6 },
306 { MCMEMBER_REC_FIELD(packet_life_time_selector),
307 .offset_words = 10,
308 .offset_bits = 24,
309 .size_bits = 2 },
310 { MCMEMBER_REC_FIELD(packet_life_time),
311 .offset_words = 10,
312 .offset_bits = 26,
313 .size_bits = 6 },
314 { MCMEMBER_REC_FIELD(sl),
315 .offset_words = 11,
316 .offset_bits = 0,
317 .size_bits = 4 },
318 { MCMEMBER_REC_FIELD(flow_label),
319 .offset_words = 11,
320 .offset_bits = 4,
321 .size_bits = 20 },
322 { MCMEMBER_REC_FIELD(hop_limit),
323 .offset_words = 11,
324 .offset_bits = 24,
325 .size_bits = 8 },
326 { MCMEMBER_REC_FIELD(scope),
327 .offset_words = 12,
328 .offset_bits = 0,
329 .size_bits = 4 },
330 { MCMEMBER_REC_FIELD(join_state),
331 .offset_words = 12,
332 .offset_bits = 4,
333 .size_bits = 4 },
334 { MCMEMBER_REC_FIELD(proxy_join),
335 .offset_words = 12,
336 .offset_bits = 8,
337 .size_bits = 1 },
338 { RESERVED,
339 .offset_words = 12,
340 .offset_bits = 9,
341 .size_bits = 23 },
342};
343
344static void free_sm_ah(struct kref *kref)
345{
346 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
347
348 ib_destroy_ah(sm_ah->ah);
349 kfree(sm_ah);
350}
351
352static void update_sm_ah(void *port_ptr)
353{
354 struct ib_sa_port *port = port_ptr;
355 struct ib_sa_sm_ah *new_ah, *old_ah;
356 struct ib_port_attr port_attr;
357 struct ib_ah_attr ah_attr;
358
359 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
360 printk(KERN_WARNING "Couldn't query port\n");
361 return;
362 }
363
364 new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
365 if (!new_ah) {
366 printk(KERN_WARNING "Couldn't allocate new SM AH\n");
367 return;
368 }
369
370 kref_init(&new_ah->ref);
371
372 memset(&ah_attr, 0, sizeof ah_attr);
373 ah_attr.dlid = port_attr.sm_lid;
374 ah_attr.sl = port_attr.sm_sl;
375 ah_attr.port_num = port->port_num;
376
377 new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
378 if (IS_ERR(new_ah->ah)) {
379 printk(KERN_WARNING "Couldn't create new SM AH\n");
380 kfree(new_ah);
381 return;
382 }
383
384 spin_lock_irq(&port->ah_lock);
385 old_ah = port->sm_ah;
386 port->sm_ah = new_ah;
387 spin_unlock_irq(&port->ah_lock);
388
389 if (old_ah)
390 kref_put(&old_ah->ref, free_sm_ah);
391}
392
393static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
394{
395 if (event->event == IB_EVENT_PORT_ERR ||
396 event->event == IB_EVENT_PORT_ACTIVE ||
397 event->event == IB_EVENT_LID_CHANGE ||
398 event->event == IB_EVENT_PKEY_CHANGE ||
399 event->event == IB_EVENT_SM_CHANGE) {
400 struct ib_sa_device *sa_dev =
401 ib_get_client_data(event->device, &sa_client);
402
403 schedule_work(&sa_dev->port[event->element.port_num -
404 sa_dev->start_port].update_task);
405 }
406}
407
408/**
409 * ib_sa_cancel_query - try to cancel an SA query
410 * @id:ID of query to cancel
411 * @query:query pointer to cancel
412 *
413 * Try to cancel an SA query. If the id and query don't match up or
414 * the query has already completed, nothing is done. Otherwise the
415 * query is canceled and will complete with a status of -EINTR.
416 */
417void ib_sa_cancel_query(int id, struct ib_sa_query *query)
418{
419 unsigned long flags;
420 struct ib_mad_agent *agent;
421
422 spin_lock_irqsave(&idr_lock, flags);
423 if (idr_find(&query_idr, id) != query) {
424 spin_unlock_irqrestore(&idr_lock, flags);
425 return;
426 }
427 agent = query->port->agent;
428 spin_unlock_irqrestore(&idr_lock, flags);
429
430 ib_cancel_mad(agent, id);
431}
432EXPORT_SYMBOL(ib_sa_cancel_query);
433
434static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
435{
436 unsigned long flags;
437
438 memset(mad, 0, sizeof *mad);
439
440 mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
441 mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
442 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
443
444 spin_lock_irqsave(&tid_lock, flags);
445 mad->mad_hdr.tid =
446 cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
447 spin_unlock_irqrestore(&tid_lock, flags);
448}
449
450static int send_mad(struct ib_sa_query *query, int timeout_ms)
451{
452 struct ib_sa_port *port = query->port;
453 unsigned long flags;
454 int ret;
455 struct ib_sge gather_list;
456 struct ib_send_wr *bad_wr, wr = {
457 .opcode = IB_WR_SEND,
458 .sg_list = &gather_list,
459 .num_sge = 1,
460 .send_flags = IB_SEND_SIGNALED,
461 .wr = {
462 .ud = {
463 .mad_hdr = &query->mad->mad_hdr,
464 .remote_qpn = 1,
465 .remote_qkey = IB_QP1_QKEY,
466 .timeout_ms = timeout_ms
467 }
468 }
469 };
470
471retry:
472 if (!idr_pre_get(&query_idr, GFP_ATOMIC))
473 return -ENOMEM;
474 spin_lock_irqsave(&idr_lock, flags);
475 ret = idr_get_new(&query_idr, query, &query->id);
476 spin_unlock_irqrestore(&idr_lock, flags);
477 if (ret == -EAGAIN)
478 goto retry;
479 if (ret)
480 return ret;
481
482 wr.wr_id = query->id;
483
484 spin_lock_irqsave(&port->ah_lock, flags);
485 kref_get(&port->sm_ah->ref);
486 query->sm_ah = port->sm_ah;
487 wr.wr.ud.ah = port->sm_ah->ah;
488 spin_unlock_irqrestore(&port->ah_lock, flags);
489
490 gather_list.addr = dma_map_single(port->agent->device->dma_device,
491 query->mad,
492 sizeof (struct ib_sa_mad),
493 DMA_TO_DEVICE);
494 gather_list.length = sizeof (struct ib_sa_mad);
495 gather_list.lkey = port->mr->lkey;
496 pci_unmap_addr_set(query, mapping, gather_list.addr);
497
498 ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
499 if (ret) {
500 dma_unmap_single(port->agent->device->dma_device,
501 pci_unmap_addr(query, mapping),
502 sizeof (struct ib_sa_mad),
503 DMA_TO_DEVICE);
504 kref_put(&query->sm_ah->ref, free_sm_ah);
505 spin_lock_irqsave(&idr_lock, flags);
506 idr_remove(&query_idr, query->id);
507 spin_unlock_irqrestore(&idr_lock, flags);
508 }
509
510 return ret;
511}
512
513static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
514 int status,
515 struct ib_sa_mad *mad)
516{
517 struct ib_sa_path_query *query =
518 container_of(sa_query, struct ib_sa_path_query, sa_query);
519
520 if (mad) {
521 struct ib_sa_path_rec rec;
522
523 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
524 mad->data, &rec);
525 query->callback(status, &rec, query->context);
526 } else
527 query->callback(status, NULL, query->context);
528}
529
530static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
531{
532 kfree(sa_query->mad);
533 kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
534}
535
536/**
537 * ib_sa_path_rec_get - Start a Path get query
538 * @device:device to send query on
539 * @port_num: port number to send query on
540 * @rec:Path Record to send in query
541 * @comp_mask:component mask to send in query
542 * @timeout_ms:time to wait for response
543 * @gfp_mask:GFP mask to use for internal allocations
544 * @callback:function called when query completes, times out or is
545 * canceled
546 * @context:opaque user context passed to callback
547 * @sa_query:query context, used to cancel query
548 *
549 * Send a Path Record Get query to the SA to look up a path. The
550 * callback function will be called when the query completes (or
551 * fails); status is 0 for a successful response, -EINTR if the query
552 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
553 * occurred sending the query. The resp parameter of the callback is
554 * only valid if status is 0.
555 *
556 * If the return value of ib_sa_path_rec_get() is negative, it is an
557 * error code. Otherwise it is a query ID that can be used to cancel
558 * the query.
559 */
560int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
561 struct ib_sa_path_rec *rec,
562 ib_sa_comp_mask comp_mask,
563 int timeout_ms, int gfp_mask,
564 void (*callback)(int status,
565 struct ib_sa_path_rec *resp,
566 void *context),
567 void *context,
568 struct ib_sa_query **sa_query)
569{
570 struct ib_sa_path_query *query;
571 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
572 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
573 struct ib_mad_agent *agent = port->agent;
574 int ret;
575
576 query = kmalloc(sizeof *query, gfp_mask);
577 if (!query)
578 return -ENOMEM;
579 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
580 if (!query->sa_query.mad) {
581 kfree(query);
582 return -ENOMEM;
583 }
584
585 query->callback = callback;
586 query->context = context;
587
588 init_mad(query->sa_query.mad, agent);
589
590 query->sa_query.callback = ib_sa_path_rec_callback;
591 query->sa_query.release = ib_sa_path_rec_release;
592 query->sa_query.port = port;
593 query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET;
594 query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
595 query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
596
597 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
598 rec, query->sa_query.mad->data);
599
600 *sa_query = &query->sa_query;
601 ret = send_mad(&query->sa_query, timeout_ms);
602 if (ret) {
603 *sa_query = NULL;
604 kfree(query->sa_query.mad);
605 kfree(query);
606 }
607
608 return ret ? ret : query->sa_query.id;
609}
610EXPORT_SYMBOL(ib_sa_path_rec_get);
611
612static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
613 int status,
614 struct ib_sa_mad *mad)
615{
616 struct ib_sa_mcmember_query *query =
617 container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
618
619 if (mad) {
620 struct ib_sa_mcmember_rec rec;
621
622 ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
623 mad->data, &rec);
624 query->callback(status, &rec, query->context);
625 } else
626 query->callback(status, NULL, query->context);
627}
628
629static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
630{
631 kfree(sa_query->mad);
632 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
633}
634
635int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
636 u8 method,
637 struct ib_sa_mcmember_rec *rec,
638 ib_sa_comp_mask comp_mask,
639 int timeout_ms, int gfp_mask,
640 void (*callback)(int status,
641 struct ib_sa_mcmember_rec *resp,
642 void *context),
643 void *context,
644 struct ib_sa_query **sa_query)
645{
646 struct ib_sa_mcmember_query *query;
647 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
648 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
649 struct ib_mad_agent *agent = port->agent;
650 int ret;
651
652 query = kmalloc(sizeof *query, gfp_mask);
653 if (!query)
654 return -ENOMEM;
655 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
656 if (!query->sa_query.mad) {
657 kfree(query);
658 return -ENOMEM;
659 }
660
661 query->callback = callback;
662 query->context = context;
663
664 init_mad(query->sa_query.mad, agent);
665
666 query->sa_query.callback = ib_sa_mcmember_rec_callback;
667 query->sa_query.release = ib_sa_mcmember_rec_release;
668 query->sa_query.port = port;
669 query->sa_query.mad->mad_hdr.method = method;
670 query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
671 query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
672
673 ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
674 rec, query->sa_query.mad->data);
675
676 *sa_query = &query->sa_query;
677 ret = send_mad(&query->sa_query, timeout_ms);
678 if (ret) {
679 *sa_query = NULL;
680 kfree(query->sa_query.mad);
681 kfree(query);
682 }
683
684 return ret ? ret : query->sa_query.id;
685}
686EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
687
688static void send_handler(struct ib_mad_agent *agent,
689 struct ib_mad_send_wc *mad_send_wc)
690{
691 struct ib_sa_query *query;
692 unsigned long flags;
693
694 spin_lock_irqsave(&idr_lock, flags);
695 query = idr_find(&query_idr, mad_send_wc->wr_id);
696 spin_unlock_irqrestore(&idr_lock, flags);
697
698 if (!query)
699 return;
700
701 switch (mad_send_wc->status) {
702 case IB_WC_SUCCESS:
703 /* No callback -- already got recv */
704 break;
705 case IB_WC_RESP_TIMEOUT_ERR:
706 query->callback(query, -ETIMEDOUT, NULL);
707 break;
708 case IB_WC_WR_FLUSH_ERR:
709 query->callback(query, -EINTR, NULL);
710 break;
711 default:
712 query->callback(query, -EIO, NULL);
713 break;
714 }
715
716 dma_unmap_single(agent->device->dma_device,
717 pci_unmap_addr(query, mapping),
718 sizeof (struct ib_sa_mad),
719 DMA_TO_DEVICE);
720 kref_put(&query->sm_ah->ref, free_sm_ah);
721
722 query->release(query);
723
724 spin_lock_irqsave(&idr_lock, flags);
725 idr_remove(&query_idr, mad_send_wc->wr_id);
726 spin_unlock_irqrestore(&idr_lock, flags);
727}
728
729static void recv_handler(struct ib_mad_agent *mad_agent,
730 struct ib_mad_recv_wc *mad_recv_wc)
731{
732 struct ib_sa_query *query;
733 unsigned long flags;
734
735 spin_lock_irqsave(&idr_lock, flags);
736 query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
737 spin_unlock_irqrestore(&idr_lock, flags);
738
739 if (query) {
740 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
741 query->callback(query,
742 mad_recv_wc->recv_buf.mad->mad_hdr.status ?
743 -EINVAL : 0,
744 (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
745 else
746 query->callback(query, -EIO, NULL);
747 }
748
749 ib_free_recv_mad(mad_recv_wc);
750}
751
752static void ib_sa_add_one(struct ib_device *device)
753{
754 struct ib_sa_device *sa_dev;
755 int s, e, i;
756
757 if (device->node_type == IB_NODE_SWITCH)
758 s = e = 0;
759 else {
760 s = 1;
761 e = device->phys_port_cnt;
762 }
763
764 sa_dev = kmalloc(sizeof *sa_dev +
765 (e - s + 1) * sizeof (struct ib_sa_port),
766 GFP_KERNEL);
767 if (!sa_dev)
768 return;
769
770 sa_dev->start_port = s;
771 sa_dev->end_port = e;
772
773 for (i = 0; i <= e - s; ++i) {
774 sa_dev->port[i].mr = NULL;
775 sa_dev->port[i].sm_ah = NULL;
776 sa_dev->port[i].port_num = i + s;
777 spin_lock_init(&sa_dev->port[i].ah_lock);
778
779 sa_dev->port[i].agent =
780 ib_register_mad_agent(device, i + s, IB_QPT_GSI,
781 NULL, 0, send_handler,
782 recv_handler, sa_dev);
783 if (IS_ERR(sa_dev->port[i].agent))
784 goto err;
785
786 sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd,
787 IB_ACCESS_LOCAL_WRITE);
788 if (IS_ERR(sa_dev->port[i].mr)) {
789 ib_unregister_mad_agent(sa_dev->port[i].agent);
790 goto err;
791 }
792
793 INIT_WORK(&sa_dev->port[i].update_task,
794 update_sm_ah, &sa_dev->port[i]);
795 }
796
797 ib_set_client_data(device, &sa_client, sa_dev);
798
799 /*
800 * We register our event handler after everything is set up,
801 * and then update our cached info after the event handler is
802 * registered to avoid any problems if a port changes state
803 * during our initialization.
804 */
805
806 INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
807 if (ib_register_event_handler(&sa_dev->event_handler))
808 goto err;
809
810 for (i = 0; i <= e - s; ++i)
811 update_sm_ah(&sa_dev->port[i]);
812
813 return;
814
815err:
816 while (--i >= 0) {
817 ib_dereg_mr(sa_dev->port[i].mr);
818 ib_unregister_mad_agent(sa_dev->port[i].agent);
819 }
820
821 kfree(sa_dev);
822
823 return;
824}
825
826static void ib_sa_remove_one(struct ib_device *device)
827{
828 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
829 int i;
830
831 if (!sa_dev)
832 return;
833
834 ib_unregister_event_handler(&sa_dev->event_handler);
835
836 for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
837 ib_unregister_mad_agent(sa_dev->port[i].agent);
838 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
839 }
840
841 kfree(sa_dev);
842}
843
844static int __init ib_sa_init(void)
845{
846 int ret;
847
848 spin_lock_init(&idr_lock);
849 spin_lock_init(&tid_lock);
850
851 get_random_bytes(&tid, sizeof tid);
852
853 ret = ib_register_client(&sa_client);
854 if (ret)
855 printk(KERN_ERR "Couldn't register ib_sa client\n");
856
857 return ret;
858}
859
860static void __exit ib_sa_cleanup(void)
861{
862 ib_unregister_client(&sa_client);
863}
864
865module_init(ib_sa_init);
866module_exit(ib_sa_cleanup);
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
new file mode 100644
index 000000000000..b4b284324a33
--- /dev/null
+++ b/drivers/infiniband/core/smi.c
@@ -0,0 +1,234 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#include <ib_smi.h>
40#include "smi.h"
41
42/*
43 * Fixup a directed route SMP for sending
44 * Return 0 if the SMP should be discarded
45 */
46int smi_handle_dr_smp_send(struct ib_smp *smp,
47 u8 node_type,
48 int port_num)
49{
50 u8 hop_ptr, hop_cnt;
51
52 hop_ptr = smp->hop_ptr;
53 hop_cnt = smp->hop_cnt;
54
55 /* See section 14.2.2.2, Vol 1 IB spec */
56 if (!ib_get_smp_direction(smp)) {
57 /* C14-9:1 */
58 if (hop_cnt && hop_ptr == 0) {
59 smp->hop_ptr++;
60 return (smp->initial_path[smp->hop_ptr] ==
61 port_num);
62 }
63
64 /* C14-9:2 */
65 if (hop_ptr && hop_ptr < hop_cnt) {
66 if (node_type != IB_NODE_SWITCH)
67 return 0;
68
69 /* smp->return_path set when received */
70 smp->hop_ptr++;
71 return (smp->initial_path[smp->hop_ptr] ==
72 port_num);
73 }
74
75 /* C14-9:3 -- We're at the end of the DR segment of path */
76 if (hop_ptr == hop_cnt) {
77 /* smp->return_path set when received */
78 smp->hop_ptr++;
79 return (node_type == IB_NODE_SWITCH ||
80 smp->dr_dlid == IB_LID_PERMISSIVE);
81 }
82
83 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
84 /* C14-9:5 -- Fail unreasonable hop pointer */
85 return (hop_ptr == hop_cnt + 1);
86
87 } else {
88 /* C14-13:1 */
89 if (hop_cnt && hop_ptr == hop_cnt + 1) {
90 smp->hop_ptr--;
91 return (smp->return_path[smp->hop_ptr] ==
92 port_num);
93 }
94
95 /* C14-13:2 */
96 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
97 if (node_type != IB_NODE_SWITCH)
98 return 0;
99
100 smp->hop_ptr--;
101 return (smp->return_path[smp->hop_ptr] ==
102 port_num);
103 }
104
105 /* C14-13:3 -- at the end of the DR segment of path */
106 if (hop_ptr == 1) {
107 smp->hop_ptr--;
108 /* C14-13:3 -- SMPs destined for SM shouldn't be here */
109 return (node_type == IB_NODE_SWITCH ||
110 smp->dr_slid == IB_LID_PERMISSIVE);
111 }
112
113 /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
114 if (hop_ptr == 0)
115 return 1;
116
117 /* C14-13:5 -- Check for unreasonable hop pointer */
118 return 0;
119 }
120}
121
122/*
123 * Adjust information for a received SMP
124 * Return 0 if the SMP should be dropped
125 */
126int smi_handle_dr_smp_recv(struct ib_smp *smp,
127 u8 node_type,
128 int port_num,
129 int phys_port_cnt)
130{
131 u8 hop_ptr, hop_cnt;
132
133 hop_ptr = smp->hop_ptr;
134 hop_cnt = smp->hop_cnt;
135
136 /* See section 14.2.2.2, Vol 1 IB spec */
137 if (!ib_get_smp_direction(smp)) {
138 /* C14-9:1 -- sender should have incremented hop_ptr */
139 if (hop_cnt && hop_ptr == 0)
140 return 0;
141
142 /* C14-9:2 -- intermediate hop */
143 if (hop_ptr && hop_ptr < hop_cnt) {
144 if (node_type != IB_NODE_SWITCH)
145 return 0;
146
147 smp->return_path[hop_ptr] = port_num;
148 /* smp->hop_ptr updated when sending */
149 return (smp->initial_path[hop_ptr+1] <= phys_port_cnt);
150 }
151
152 /* C14-9:3 -- We're at the end of the DR segment of path */
153 if (hop_ptr == hop_cnt) {
154 if (hop_cnt)
155 smp->return_path[hop_ptr] = port_num;
156 /* smp->hop_ptr updated when sending */
157
158 return (node_type == IB_NODE_SWITCH ||
159 smp->dr_dlid == IB_LID_PERMISSIVE);
160 }
161
162 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
163 /* C14-9:5 -- fail unreasonable hop pointer */
164 return (hop_ptr == hop_cnt + 1);
165
166 } else {
167
168 /* C14-13:1 */
169 if (hop_cnt && hop_ptr == hop_cnt + 1) {
170 smp->hop_ptr--;
171 return (smp->return_path[smp->hop_ptr] ==
172 port_num);
173 }
174
175 /* C14-13:2 */
176 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
177 if (node_type != IB_NODE_SWITCH)
178 return 0;
179
180 /* smp->hop_ptr updated when sending */
181 return (smp->return_path[hop_ptr-1] <= phys_port_cnt);
182 }
183
184 /* C14-13:3 -- We're at the end of the DR segment of path */
185 if (hop_ptr == 1) {
186 if (smp->dr_slid == IB_LID_PERMISSIVE) {
187 /* giving SMP to SM - update hop_ptr */
188 smp->hop_ptr--;
189 return 1;
190 }
191 /* smp->hop_ptr updated when sending */
192 return (node_type == IB_NODE_SWITCH);
193 }
194
195 /* C14-13:4 -- hop_ptr = 0 -> give to SM */
196 /* C14-13:5 -- Check for unreasonable hop pointer */
197 return (hop_ptr == 0);
198 }
199}
200
201/*
202 * Return 1 if the received DR SMP should be forwarded to the send queue
203 * Return 0 if the SMP should be completed up the stack
204 */
205int smi_check_forward_dr_smp(struct ib_smp *smp)
206{
207 u8 hop_ptr, hop_cnt;
208
209 hop_ptr = smp->hop_ptr;
210 hop_cnt = smp->hop_cnt;
211
212 if (!ib_get_smp_direction(smp)) {
213 /* C14-9:2 -- intermediate hop */
214 if (hop_ptr && hop_ptr < hop_cnt)
215 return 1;
216
217 /* C14-9:3 -- at the end of the DR segment of path */
218 if (hop_ptr == hop_cnt)
219 return (smp->dr_dlid == IB_LID_PERMISSIVE);
220
221 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
222 if (hop_ptr == hop_cnt + 1)
223 return 1;
224 } else {
225 /* C14-13:2 */
226 if (2 <= hop_ptr && hop_ptr <= hop_cnt)
227 return 1;
228
229 /* C14-13:3 -- at the end of the DR segment of path */
230 if (hop_ptr == 1)
231 return (smp->dr_slid != IB_LID_PERMISSIVE);
232 }
233 return 0;
234}
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
new file mode 100644
index 000000000000..db25503a0736
--- /dev/null
+++ b/drivers/infiniband/core/smi.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#ifndef __SMI_H_
40#define __SMI_H_
41
42int smi_handle_dr_smp_recv(struct ib_smp *smp,
43 u8 node_type,
44 int port_num,
45 int phys_port_cnt);
46extern int smi_check_forward_dr_smp(struct ib_smp *smp);
47extern int smi_handle_dr_smp_send(struct ib_smp *smp,
48 u8 node_type,
49 int port_num);
50extern int smi_check_local_dr_smp(struct ib_smp *smp,
51 struct ib_device *device,
52 int port_num);
53
54/*
55 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
56 */
57static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent,
58 struct ib_smp *smp)
59{
60 /* C14-9:3 -- We're at the end of the DR segment of path */
61 /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
62 return ((mad_agent->device->process_mad &&
63 !ib_get_smp_direction(smp) &&
64 (smp->hop_ptr == smp->hop_cnt + 1)));
65}
66
67#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
new file mode 100644
index 000000000000..3a413f72ff6d
--- /dev/null
+++ b/drivers/infiniband/core/sysfs.c
@@ -0,0 +1,762 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include "core_priv.h"
36
37#include <ib_mad.h>
38
39struct ib_port {
40 struct kobject kobj;
41 struct ib_device *ibdev;
42 struct attribute_group gid_group;
43 struct attribute **gid_attr;
44 struct attribute_group pkey_group;
45 struct attribute **pkey_attr;
46 u8 port_num;
47};
48
49struct port_attribute {
50 struct attribute attr;
51 ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
52 ssize_t (*store)(struct ib_port *, struct port_attribute *,
53 const char *buf, size_t count);
54};
55
56#define PORT_ATTR(_name, _mode, _show, _store) \
57struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
58
59#define PORT_ATTR_RO(_name) \
60struct port_attribute port_attr_##_name = __ATTR_RO(_name)
61
62struct port_table_attribute {
63 struct port_attribute attr;
64 int index;
65};
66
67static ssize_t port_attr_show(struct kobject *kobj,
68 struct attribute *attr, char *buf)
69{
70 struct port_attribute *port_attr =
71 container_of(attr, struct port_attribute, attr);
72 struct ib_port *p = container_of(kobj, struct ib_port, kobj);
73
74 if (!port_attr->show)
75 return 0;
76
77 return port_attr->show(p, port_attr, buf);
78}
79
80static struct sysfs_ops port_sysfs_ops = {
81 .show = port_attr_show
82};
83
84static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
85 char *buf)
86{
87 struct ib_port_attr attr;
88 ssize_t ret;
89
90 static const char *state_name[] = {
91 [IB_PORT_NOP] = "NOP",
92 [IB_PORT_DOWN] = "DOWN",
93 [IB_PORT_INIT] = "INIT",
94 [IB_PORT_ARMED] = "ARMED",
95 [IB_PORT_ACTIVE] = "ACTIVE",
96 [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
97 };
98
99 ret = ib_query_port(p->ibdev, p->port_num, &attr);
100 if (ret)
101 return ret;
102
103 return sprintf(buf, "%d: %s\n", attr.state,
104 attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ?
105 state_name[attr.state] : "UNKNOWN");
106}
107
108static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
109 char *buf)
110{
111 struct ib_port_attr attr;
112 ssize_t ret;
113
114 ret = ib_query_port(p->ibdev, p->port_num, &attr);
115 if (ret)
116 return ret;
117
118 return sprintf(buf, "0x%x\n", attr.lid);
119}
120
121static ssize_t lid_mask_count_show(struct ib_port *p,
122 struct port_attribute *unused,
123 char *buf)
124{
125 struct ib_port_attr attr;
126 ssize_t ret;
127
128 ret = ib_query_port(p->ibdev, p->port_num, &attr);
129 if (ret)
130 return ret;
131
132 return sprintf(buf, "%d\n", attr.lmc);
133}
134
135static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
136 char *buf)
137{
138 struct ib_port_attr attr;
139 ssize_t ret;
140
141 ret = ib_query_port(p->ibdev, p->port_num, &attr);
142 if (ret)
143 return ret;
144
145 return sprintf(buf, "0x%x\n", attr.sm_lid);
146}
147
148static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
149 char *buf)
150{
151 struct ib_port_attr attr;
152 ssize_t ret;
153
154 ret = ib_query_port(p->ibdev, p->port_num, &attr);
155 if (ret)
156 return ret;
157
158 return sprintf(buf, "%d\n", attr.sm_sl);
159}
160
161static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
162 char *buf)
163{
164 struct ib_port_attr attr;
165 ssize_t ret;
166
167 ret = ib_query_port(p->ibdev, p->port_num, &attr);
168 if (ret)
169 return ret;
170
171 return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
172}
173
174static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
175 char *buf)
176{
177 struct ib_port_attr attr;
178 char *speed = "";
179 int rate;
180 ssize_t ret;
181
182 ret = ib_query_port(p->ibdev, p->port_num, &attr);
183 if (ret)
184 return ret;
185
186 switch (attr.active_speed) {
187 case 2: speed = " DDR"; break;
188 case 4: speed = " QDR"; break;
189 }
190
191 rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
192 if (rate < 0)
193 return -EINVAL;
194
195 return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
196 rate / 10, rate % 10 ? ".5" : "",
197 ib_width_enum_to_int(attr.active_width), speed);
198}
199
200static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
201 char *buf)
202{
203 struct ib_port_attr attr;
204
205 ssize_t ret;
206
207 ret = ib_query_port(p->ibdev, p->port_num, &attr);
208 if (ret)
209 return ret;
210
211 switch (attr.phys_state) {
212 case 1: return sprintf(buf, "1: Sleep\n");
213 case 2: return sprintf(buf, "2: Polling\n");
214 case 3: return sprintf(buf, "3: Disabled\n");
215 case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
216 case 5: return sprintf(buf, "5: LinkUp\n");
217 case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
218 case 7: return sprintf(buf, "7: Phy Test\n");
219 default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
220 }
221}
222
223static PORT_ATTR_RO(state);
224static PORT_ATTR_RO(lid);
225static PORT_ATTR_RO(lid_mask_count);
226static PORT_ATTR_RO(sm_lid);
227static PORT_ATTR_RO(sm_sl);
228static PORT_ATTR_RO(cap_mask);
229static PORT_ATTR_RO(rate);
230static PORT_ATTR_RO(phys_state);
231
232static struct attribute *port_default_attrs[] = {
233 &port_attr_state.attr,
234 &port_attr_lid.attr,
235 &port_attr_lid_mask_count.attr,
236 &port_attr_sm_lid.attr,
237 &port_attr_sm_sl.attr,
238 &port_attr_cap_mask.attr,
239 &port_attr_rate.attr,
240 &port_attr_phys_state.attr,
241 NULL
242};
243
244static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
245 char *buf)
246{
247 struct port_table_attribute *tab_attr =
248 container_of(attr, struct port_table_attribute, attr);
249 union ib_gid gid;
250 ssize_t ret;
251
252 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
253 if (ret)
254 return ret;
255
256 return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
257 be16_to_cpu(((u16 *) gid.raw)[0]),
258 be16_to_cpu(((u16 *) gid.raw)[1]),
259 be16_to_cpu(((u16 *) gid.raw)[2]),
260 be16_to_cpu(((u16 *) gid.raw)[3]),
261 be16_to_cpu(((u16 *) gid.raw)[4]),
262 be16_to_cpu(((u16 *) gid.raw)[5]),
263 be16_to_cpu(((u16 *) gid.raw)[6]),
264 be16_to_cpu(((u16 *) gid.raw)[7]));
265}
266
267static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
268 char *buf)
269{
270 struct port_table_attribute *tab_attr =
271 container_of(attr, struct port_table_attribute, attr);
272 u16 pkey;
273 ssize_t ret;
274
275 ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
276 if (ret)
277 return ret;
278
279 return sprintf(buf, "0x%04x\n", pkey);
280}
281
282#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
283struct port_table_attribute port_pma_attr_##_name = { \
284 .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
285 .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
286}
287
288static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
289 char *buf)
290{
291 struct port_table_attribute *tab_attr =
292 container_of(attr, struct port_table_attribute, attr);
293 int offset = tab_attr->index & 0xffff;
294 int width = (tab_attr->index >> 16) & 0xff;
295 struct ib_mad *in_mad = NULL;
296 struct ib_mad *out_mad = NULL;
297 ssize_t ret;
298
299 if (!p->ibdev->process_mad)
300 return sprintf(buf, "N/A (no PMA)\n");
301
302 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
303 out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
304 if (!in_mad || !out_mad) {
305 ret = -ENOMEM;
306 goto out;
307 }
308
309 memset(in_mad, 0, sizeof *in_mad);
310 in_mad->mad_hdr.base_version = 1;
311 in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
312 in_mad->mad_hdr.class_version = 1;
313 in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
314 in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
315
316 in_mad->data[41] = p->port_num; /* PortSelect field */
317
318 if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
319 p->port_num, NULL, NULL, in_mad, out_mad) &
320 (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
321 (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
322 ret = -EINVAL;
323 goto out;
324 }
325
326 switch (width) {
327 case 4:
328 ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
329 (offset % 4)) & 0xf);
330 break;
331 case 8:
332 ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
333 break;
334 case 16:
335 ret = sprintf(buf, "%u\n",
336 be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8)));
337 break;
338 case 32:
339 ret = sprintf(buf, "%u\n",
340 be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8)));
341 break;
342 default:
343 ret = 0;
344 }
345
346out:
347 kfree(in_mad);
348 kfree(out_mad);
349
350 return ret;
351}
352
353static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
354static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
355static PORT_PMA_ATTR(link_downed , 2, 8, 56);
356static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
357static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
358static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
359static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
360static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
361static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
362static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
363static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
364static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
365static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
366static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
367static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
368static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
369
370static struct attribute *pma_attrs[] = {
371 &port_pma_attr_symbol_error.attr.attr,
372 &port_pma_attr_link_error_recovery.attr.attr,
373 &port_pma_attr_link_downed.attr.attr,
374 &port_pma_attr_port_rcv_errors.attr.attr,
375 &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
376 &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
377 &port_pma_attr_port_xmit_discards.attr.attr,
378 &port_pma_attr_port_xmit_constraint_errors.attr.attr,
379 &port_pma_attr_port_rcv_constraint_errors.attr.attr,
380 &port_pma_attr_local_link_integrity_errors.attr.attr,
381 &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
382 &port_pma_attr_VL15_dropped.attr.attr,
383 &port_pma_attr_port_xmit_data.attr.attr,
384 &port_pma_attr_port_rcv_data.attr.attr,
385 &port_pma_attr_port_xmit_packets.attr.attr,
386 &port_pma_attr_port_rcv_packets.attr.attr,
387 NULL
388};
389
390static struct attribute_group pma_group = {
391 .name = "counters",
392 .attrs = pma_attrs
393};
394
395static void ib_port_release(struct kobject *kobj)
396{
397 struct ib_port *p = container_of(kobj, struct ib_port, kobj);
398 struct attribute *a;
399 int i;
400
401 for (i = 0; (a = p->gid_attr[i]); ++i) {
402 kfree(a->name);
403 kfree(a);
404 }
405
406 for (i = 0; (a = p->pkey_attr[i]); ++i) {
407 kfree(a->name);
408 kfree(a);
409 }
410
411 kfree(p->gid_attr);
412 kfree(p);
413}
414
415static struct kobj_type port_type = {
416 .release = ib_port_release,
417 .sysfs_ops = &port_sysfs_ops,
418 .default_attrs = port_default_attrs
419};
420
421static void ib_device_release(struct class_device *cdev)
422{
423 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
424
425 kfree(dev);
426}
427
428static int ib_device_hotplug(struct class_device *cdev, char **envp,
429 int num_envp, char *buf, int size)
430{
431 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
432 int i = 0, len = 0;
433
434 if (add_hotplug_env_var(envp, num_envp, &i, buf, size, &len,
435 "NAME=%s", dev->name))
436 return -ENOMEM;
437
438 /*
439 * It might be nice to pass the node GUID to hotplug, but
440 * right now the only way to get it is to query the device
441 * provider, and this can crash during device removal because
442 * we are will be running after driver removal has started.
443 * We could add a node_guid field to struct ib_device, or we
444 * could just let the hotplug script read the node GUID from
445 * sysfs when devices are added.
446 */
447
448 envp[i] = NULL;
449 return 0;
450}
451
452static int alloc_group(struct attribute ***attr,
453 ssize_t (*show)(struct ib_port *,
454 struct port_attribute *, char *buf),
455 int len)
456{
457 struct port_table_attribute ***tab_attr =
458 (struct port_table_attribute ***) attr;
459 int i;
460 int ret;
461
462 *tab_attr = kmalloc((1 + len) * sizeof *tab_attr, GFP_KERNEL);
463 if (!*tab_attr)
464 return -ENOMEM;
465
466 memset(*tab_attr, 0, (1 + len) * sizeof *tab_attr);
467
468 for (i = 0; i < len; ++i) {
469 (*tab_attr)[i] = kmalloc(sizeof *(*tab_attr)[i], GFP_KERNEL);
470 if (!(*tab_attr)[i]) {
471 ret = -ENOMEM;
472 goto err;
473 }
474 memset((*tab_attr)[i], 0, sizeof *(*tab_attr)[i]);
475 (*tab_attr)[i]->attr.attr.name = kmalloc(8, GFP_KERNEL);
476 if (!(*tab_attr)[i]->attr.attr.name) {
477 ret = -ENOMEM;
478 goto err;
479 }
480
481 if (snprintf((*tab_attr)[i]->attr.attr.name, 8, "%d", i) >= 8) {
482 ret = -ENOMEM;
483 goto err;
484 }
485
486 (*tab_attr)[i]->attr.attr.mode = S_IRUGO;
487 (*tab_attr)[i]->attr.attr.owner = THIS_MODULE;
488 (*tab_attr)[i]->attr.show = show;
489 (*tab_attr)[i]->index = i;
490 }
491
492 return 0;
493
494err:
495 for (i = 0; i < len; ++i) {
496 if ((*tab_attr)[i])
497 kfree((*tab_attr)[i]->attr.attr.name);
498 kfree((*tab_attr)[i]);
499 }
500
501 kfree(*tab_attr);
502
503 return ret;
504}
505
506static int add_port(struct ib_device *device, int port_num)
507{
508 struct ib_port *p;
509 struct ib_port_attr attr;
510 int i;
511 int ret;
512
513 ret = ib_query_port(device, port_num, &attr);
514 if (ret)
515 return ret;
516
517 p = kmalloc(sizeof *p, GFP_KERNEL);
518 if (!p)
519 return -ENOMEM;
520 memset(p, 0, sizeof *p);
521
522 p->ibdev = device;
523 p->port_num = port_num;
524 p->kobj.ktype = &port_type;
525
526 p->kobj.parent = kobject_get(&device->ports_parent);
527 if (!p->kobj.parent) {
528 ret = -EBUSY;
529 goto err;
530 }
531
532 ret = kobject_set_name(&p->kobj, "%d", port_num);
533 if (ret)
534 goto err_put;
535
536 ret = kobject_register(&p->kobj);
537 if (ret)
538 goto err_put;
539
540 ret = sysfs_create_group(&p->kobj, &pma_group);
541 if (ret)
542 goto err_put;
543
544 ret = alloc_group(&p->gid_attr, show_port_gid, attr.gid_tbl_len);
545 if (ret)
546 goto err_remove_pma;
547
548 p->gid_group.name = "gids";
549 p->gid_group.attrs = p->gid_attr;
550
551 ret = sysfs_create_group(&p->kobj, &p->gid_group);
552 if (ret)
553 goto err_free_gid;
554
555 ret = alloc_group(&p->pkey_attr, show_port_pkey, attr.pkey_tbl_len);
556 if (ret)
557 goto err_remove_gid;
558
559 p->pkey_group.name = "pkeys";
560 p->pkey_group.attrs = p->pkey_attr;
561
562 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
563 if (ret)
564 goto err_free_pkey;
565
566 list_add_tail(&p->kobj.entry, &device->port_list);
567
568 return 0;
569
570err_free_pkey:
571 for (i = 0; i < attr.pkey_tbl_len; ++i) {
572 kfree(p->pkey_attr[i]->name);
573 kfree(p->pkey_attr[i]);
574 }
575
576 kfree(p->pkey_attr);
577
578err_remove_gid:
579 sysfs_remove_group(&p->kobj, &p->gid_group);
580
581err_free_gid:
582 for (i = 0; i < attr.gid_tbl_len; ++i) {
583 kfree(p->gid_attr[i]->name);
584 kfree(p->gid_attr[i]);
585 }
586
587 kfree(p->gid_attr);
588
589err_remove_pma:
590 sysfs_remove_group(&p->kobj, &pma_group);
591
592err_put:
593 kobject_put(&device->ports_parent);
594
595err:
596 kfree(p);
597 return ret;
598}
599
600static ssize_t show_node_type(struct class_device *cdev, char *buf)
601{
602 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
603
604 switch (dev->node_type) {
605 case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
606 case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
607 case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
608 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
609 }
610}
611
612static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
613{
614 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
615 struct ib_device_attr attr;
616 ssize_t ret;
617
618 ret = ib_query_device(dev, &attr);
619 if (ret)
620 return ret;
621
622 return sprintf(buf, "%04x:%04x:%04x:%04x\n",
623 be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]),
624 be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]),
625 be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]),
626 be16_to_cpu(((u16 *) &attr.sys_image_guid)[3]));
627}
628
629static ssize_t show_node_guid(struct class_device *cdev, char *buf)
630{
631 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
632 struct ib_device_attr attr;
633 ssize_t ret;
634
635 ret = ib_query_device(dev, &attr);
636 if (ret)
637 return ret;
638
639 return sprintf(buf, "%04x:%04x:%04x:%04x\n",
640 be16_to_cpu(((u16 *) &attr.node_guid)[0]),
641 be16_to_cpu(((u16 *) &attr.node_guid)[1]),
642 be16_to_cpu(((u16 *) &attr.node_guid)[2]),
643 be16_to_cpu(((u16 *) &attr.node_guid)[3]));
644}
645
646static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
647static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
648static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
649
650static struct class_device_attribute *ib_class_attributes[] = {
651 &class_device_attr_node_type,
652 &class_device_attr_sys_image_guid,
653 &class_device_attr_node_guid
654};
655
656static struct class ib_class = {
657 .name = "infiniband",
658 .release = ib_device_release,
659 .hotplug = ib_device_hotplug,
660};
661
662int ib_device_register_sysfs(struct ib_device *device)
663{
664 struct class_device *class_dev = &device->class_dev;
665 int ret;
666 int i;
667
668 class_dev->class = &ib_class;
669 class_dev->class_data = device;
670 strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
671
672 INIT_LIST_HEAD(&device->port_list);
673
674 ret = class_device_register(class_dev);
675 if (ret)
676 goto err;
677
678 for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
679 ret = class_device_create_file(class_dev, ib_class_attributes[i]);
680 if (ret)
681 goto err_unregister;
682 }
683
684 device->ports_parent.parent = kobject_get(&class_dev->kobj);
685 if (!device->ports_parent.parent) {
686 ret = -EBUSY;
687 goto err_unregister;
688 }
689 ret = kobject_set_name(&device->ports_parent, "ports");
690 if (ret)
691 goto err_put;
692 ret = kobject_register(&device->ports_parent);
693 if (ret)
694 goto err_put;
695
696 if (device->node_type == IB_NODE_SWITCH) {
697 ret = add_port(device, 0);
698 if (ret)
699 goto err_put;
700 } else {
701 int i;
702
703 for (i = 1; i <= device->phys_port_cnt; ++i) {
704 ret = add_port(device, i);
705 if (ret)
706 goto err_put;
707 }
708 }
709
710 return 0;
711
712err_put:
713 {
714 struct kobject *p, *t;
715 struct ib_port *port;
716
717 list_for_each_entry_safe(p, t, &device->port_list, entry) {
718 list_del(&p->entry);
719 port = container_of(p, struct ib_port, kobj);
720 sysfs_remove_group(p, &pma_group);
721 sysfs_remove_group(p, &port->pkey_group);
722 sysfs_remove_group(p, &port->gid_group);
723 kobject_unregister(p);
724 }
725 }
726
727 kobject_put(&class_dev->kobj);
728
729err_unregister:
730 class_device_unregister(class_dev);
731
732err:
733 return ret;
734}
735
736void ib_device_unregister_sysfs(struct ib_device *device)
737{
738 struct kobject *p, *t;
739 struct ib_port *port;
740
741 list_for_each_entry_safe(p, t, &device->port_list, entry) {
742 list_del(&p->entry);
743 port = container_of(p, struct ib_port, kobj);
744 sysfs_remove_group(p, &pma_group);
745 sysfs_remove_group(p, &port->pkey_group);
746 sysfs_remove_group(p, &port->gid_group);
747 kobject_unregister(p);
748 }
749
750 kobject_unregister(&device->ports_parent);
751 class_device_unregister(&device->class_dev);
752}
753
754int ib_sysfs_setup(void)
755{
756 return class_register(&ib_class);
757}
758
759void ib_sysfs_cleanup(void)
760{
761 class_unregister(&ib_class);
762}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
new file mode 100644
index 000000000000..dc4eb1db5e96
--- /dev/null
+++ b/drivers/infiniband/core/ud_header.c
@@ -0,0 +1,365 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/errno.h>
36
37#include <ib_pack.h>
38
39#define STRUCT_FIELD(header, field) \
40 .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
41 .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
42 .field_name = #header ":" #field
43
44static const struct ib_field lrh_table[] = {
45 { STRUCT_FIELD(lrh, virtual_lane),
46 .offset_words = 0,
47 .offset_bits = 0,
48 .size_bits = 4 },
49 { STRUCT_FIELD(lrh, link_version),
50 .offset_words = 0,
51 .offset_bits = 4,
52 .size_bits = 4 },
53 { STRUCT_FIELD(lrh, service_level),
54 .offset_words = 0,
55 .offset_bits = 8,
56 .size_bits = 4 },
57 { RESERVED,
58 .offset_words = 0,
59 .offset_bits = 12,
60 .size_bits = 2 },
61 { STRUCT_FIELD(lrh, link_next_header),
62 .offset_words = 0,
63 .offset_bits = 14,
64 .size_bits = 2 },
65 { STRUCT_FIELD(lrh, destination_lid),
66 .offset_words = 0,
67 .offset_bits = 16,
68 .size_bits = 16 },
69 { RESERVED,
70 .offset_words = 1,
71 .offset_bits = 0,
72 .size_bits = 5 },
73 { STRUCT_FIELD(lrh, packet_length),
74 .offset_words = 1,
75 .offset_bits = 5,
76 .size_bits = 11 },
77 { STRUCT_FIELD(lrh, source_lid),
78 .offset_words = 1,
79 .offset_bits = 16,
80 .size_bits = 16 }
81};
82
83static const struct ib_field grh_table[] = {
84 { STRUCT_FIELD(grh, ip_version),
85 .offset_words = 0,
86 .offset_bits = 0,
87 .size_bits = 4 },
88 { STRUCT_FIELD(grh, traffic_class),
89 .offset_words = 0,
90 .offset_bits = 4,
91 .size_bits = 8 },
92 { STRUCT_FIELD(grh, flow_label),
93 .offset_words = 0,
94 .offset_bits = 12,
95 .size_bits = 20 },
96 { STRUCT_FIELD(grh, payload_length),
97 .offset_words = 1,
98 .offset_bits = 0,
99 .size_bits = 16 },
100 { STRUCT_FIELD(grh, next_header),
101 .offset_words = 1,
102 .offset_bits = 16,
103 .size_bits = 8 },
104 { STRUCT_FIELD(grh, hop_limit),
105 .offset_words = 1,
106 .offset_bits = 24,
107 .size_bits = 8 },
108 { STRUCT_FIELD(grh, source_gid),
109 .offset_words = 2,
110 .offset_bits = 0,
111 .size_bits = 128 },
112 { STRUCT_FIELD(grh, destination_gid),
113 .offset_words = 6,
114 .offset_bits = 0,
115 .size_bits = 128 }
116};
117
118static const struct ib_field bth_table[] = {
119 { STRUCT_FIELD(bth, opcode),
120 .offset_words = 0,
121 .offset_bits = 0,
122 .size_bits = 8 },
123 { STRUCT_FIELD(bth, solicited_event),
124 .offset_words = 0,
125 .offset_bits = 8,
126 .size_bits = 1 },
127 { STRUCT_FIELD(bth, mig_req),
128 .offset_words = 0,
129 .offset_bits = 9,
130 .size_bits = 1 },
131 { STRUCT_FIELD(bth, pad_count),
132 .offset_words = 0,
133 .offset_bits = 10,
134 .size_bits = 2 },
135 { STRUCT_FIELD(bth, transport_header_version),
136 .offset_words = 0,
137 .offset_bits = 12,
138 .size_bits = 4 },
139 { STRUCT_FIELD(bth, pkey),
140 .offset_words = 0,
141 .offset_bits = 16,
142 .size_bits = 16 },
143 { RESERVED,
144 .offset_words = 1,
145 .offset_bits = 0,
146 .size_bits = 8 },
147 { STRUCT_FIELD(bth, destination_qpn),
148 .offset_words = 1,
149 .offset_bits = 8,
150 .size_bits = 24 },
151 { STRUCT_FIELD(bth, ack_req),
152 .offset_words = 2,
153 .offset_bits = 0,
154 .size_bits = 1 },
155 { RESERVED,
156 .offset_words = 2,
157 .offset_bits = 1,
158 .size_bits = 7 },
159 { STRUCT_FIELD(bth, psn),
160 .offset_words = 2,
161 .offset_bits = 8,
162 .size_bits = 24 }
163};
164
165static const struct ib_field deth_table[] = {
166 { STRUCT_FIELD(deth, qkey),
167 .offset_words = 0,
168 .offset_bits = 0,
169 .size_bits = 32 },
170 { RESERVED,
171 .offset_words = 1,
172 .offset_bits = 0,
173 .size_bits = 8 },
174 { STRUCT_FIELD(deth, source_qpn),
175 .offset_words = 1,
176 .offset_bits = 8,
177 .size_bits = 24 }
178};
179
180/**
181 * ib_ud_header_init - Initialize UD header structure
182 * @payload_bytes:Length of packet payload
183 * @grh_present:GRH flag (if non-zero, GRH will be included)
184 * @header:Structure to initialize
185 *
186 * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
187 * lrh.packet_length, grh.ip_version, grh.payload_length,
188 * grh.next_header, bth.opcode, bth.pad_count and
189 * bth.transport_header_version fields of a &struct ib_ud_header given
190 * the payload length and whether a GRH will be included.
191 */
192void ib_ud_header_init(int payload_bytes,
193 int grh_present,
194 struct ib_ud_header *header)
195{
196 int header_len;
197
198 memset(header, 0, sizeof *header);
199
200 header_len =
201 IB_LRH_BYTES +
202 IB_BTH_BYTES +
203 IB_DETH_BYTES;
204 if (grh_present) {
205 header_len += IB_GRH_BYTES;
206 }
207
208 header->lrh.link_version = 0;
209 header->lrh.link_next_header =
210 grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
211 header->lrh.packet_length = (IB_LRH_BYTES +
212 IB_BTH_BYTES +
213 IB_DETH_BYTES +
214 payload_bytes +
215 4 + /* ICRC */
216 3) / 4; /* round up */
217
218 header->grh_present = grh_present;
219 if (grh_present) {
220 header->lrh.packet_length += IB_GRH_BYTES / 4;
221
222 header->grh.ip_version = 6;
223 header->grh.payload_length =
224 cpu_to_be16((IB_BTH_BYTES +
225 IB_DETH_BYTES +
226 payload_bytes +
227 4 + /* ICRC */
228 3) & ~3); /* round up */
229 header->grh.next_header = 0x1b;
230 }
231
232 cpu_to_be16s(&header->lrh.packet_length);
233
234 if (header->immediate_present)
235 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
236 else
237 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
238 header->bth.pad_count = (4 - payload_bytes) & 3;
239 header->bth.transport_header_version = 0;
240}
241EXPORT_SYMBOL(ib_ud_header_init);
242
243/**
244 * ib_ud_header_pack - Pack UD header struct into wire format
245 * @header:UD header struct
246 * @buf:Buffer to pack into
247 *
248 * ib_ud_header_pack() packs the UD header structure @header into wire
249 * format in the buffer @buf.
250 */
251int ib_ud_header_pack(struct ib_ud_header *header,
252 void *buf)
253{
254 int len = 0;
255
256 ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
257 &header->lrh, buf);
258 len += IB_LRH_BYTES;
259
260 if (header->grh_present) {
261 ib_pack(grh_table, ARRAY_SIZE(grh_table),
262 &header->grh, buf + len);
263 len += IB_GRH_BYTES;
264 }
265
266 ib_pack(bth_table, ARRAY_SIZE(bth_table),
267 &header->bth, buf + len);
268 len += IB_BTH_BYTES;
269
270 ib_pack(deth_table, ARRAY_SIZE(deth_table),
271 &header->deth, buf + len);
272 len += IB_DETH_BYTES;
273
274 if (header->immediate_present) {
275 memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data);
276 len += sizeof header->immediate_data;
277 }
278
279 return len;
280}
281EXPORT_SYMBOL(ib_ud_header_pack);
282
283/**
284 * ib_ud_header_unpack - Unpack UD header struct from wire format
285 * @header:UD header struct
286 * @buf:Buffer to pack into
287 *
288 * ib_ud_header_pack() unpacks the UD header structure @header from wire
289 * format in the buffer @buf.
290 */
291int ib_ud_header_unpack(void *buf,
292 struct ib_ud_header *header)
293{
294 ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
295 buf, &header->lrh);
296 buf += IB_LRH_BYTES;
297
298 if (header->lrh.link_version != 0) {
299 printk(KERN_WARNING "Invalid LRH.link_version %d\n",
300 header->lrh.link_version);
301 return -EINVAL;
302 }
303
304 switch (header->lrh.link_next_header) {
305 case IB_LNH_IBA_LOCAL:
306 header->grh_present = 0;
307 break;
308
309 case IB_LNH_IBA_GLOBAL:
310 header->grh_present = 1;
311 ib_unpack(grh_table, ARRAY_SIZE(grh_table),
312 buf, &header->grh);
313 buf += IB_GRH_BYTES;
314
315 if (header->grh.ip_version != 6) {
316 printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
317 header->grh.ip_version);
318 return -EINVAL;
319 }
320 if (header->grh.next_header != 0x1b) {
321 printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
322 header->grh.next_header);
323 return -EINVAL;
324 }
325 break;
326
327 default:
328 printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
329 header->lrh.link_next_header);
330 return -EINVAL;
331 }
332
333 ib_unpack(bth_table, ARRAY_SIZE(bth_table),
334 buf, &header->bth);
335 buf += IB_BTH_BYTES;
336
337 switch (header->bth.opcode) {
338 case IB_OPCODE_UD_SEND_ONLY:
339 header->immediate_present = 0;
340 break;
341 case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
342 header->immediate_present = 1;
343 break;
344 default:
345 printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
346 header->bth.opcode);
347 return -EINVAL;
348 }
349
350 if (header->bth.transport_header_version != 0) {
351 printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
352 header->bth.transport_header_version);
353 return -EINVAL;
354 }
355
356 ib_unpack(deth_table, ARRAY_SIZE(deth_table),
357 buf, &header->deth);
358 buf += IB_DETH_BYTES;
359
360 if (header->immediate_present)
361 memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
362
363 return 0;
364}
365EXPORT_SYMBOL(ib_ud_header_unpack);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
new file mode 100644
index 000000000000..54b4f33b0bf9
--- /dev/null
+++ b/drivers/infiniband/core/user_mad.c
@@ -0,0 +1,840 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/device.h>
38#include <linux/err.h>
39#include <linux/fs.h>
40#include <linux/cdev.h>
41#include <linux/pci.h>
42#include <linux/dma-mapping.h>
43#include <linux/poll.h>
44#include <linux/rwsem.h>
45#include <linux/kref.h>
46
47#include <asm/uaccess.h>
48#include <asm/semaphore.h>
49
50#include <ib_mad.h>
51#include <ib_user_mad.h>
52
53MODULE_AUTHOR("Roland Dreier");
54MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
55MODULE_LICENSE("Dual BSD/GPL");
56
57enum {
58 IB_UMAD_MAX_PORTS = 64,
59 IB_UMAD_MAX_AGENTS = 32,
60
61 IB_UMAD_MAJOR = 231,
62 IB_UMAD_MINOR_BASE = 0
63};
64
65struct ib_umad_port {
66 int devnum;
67 struct cdev dev;
68 struct class_device class_dev;
69
70 int sm_devnum;
71 struct cdev sm_dev;
72 struct class_device sm_class_dev;
73 struct semaphore sm_sem;
74
75 struct ib_device *ib_dev;
76 struct ib_umad_device *umad_dev;
77 u8 port_num;
78};
79
80struct ib_umad_device {
81 int start_port, end_port;
82 struct kref ref;
83 struct ib_umad_port port[0];
84};
85
86struct ib_umad_file {
87 struct ib_umad_port *port;
88 spinlock_t recv_lock;
89 struct list_head recv_list;
90 wait_queue_head_t recv_wait;
91 struct rw_semaphore agent_mutex;
92 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
93 struct ib_mr *mr[IB_UMAD_MAX_AGENTS];
94};
95
96struct ib_umad_packet {
97 struct ib_user_mad mad;
98 struct ib_ah *ah;
99 struct list_head list;
100 DECLARE_PCI_UNMAP_ADDR(mapping)
101};
102
103static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
104static spinlock_t map_lock;
105static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
106
107static void ib_umad_add_one(struct ib_device *device);
108static void ib_umad_remove_one(struct ib_device *device);
109
110static int queue_packet(struct ib_umad_file *file,
111 struct ib_mad_agent *agent,
112 struct ib_umad_packet *packet)
113{
114 int ret = 1;
115
116 down_read(&file->agent_mutex);
117 for (packet->mad.id = 0;
118 packet->mad.id < IB_UMAD_MAX_AGENTS;
119 packet->mad.id++)
120 if (agent == file->agent[packet->mad.id]) {
121 spin_lock_irq(&file->recv_lock);
122 list_add_tail(&packet->list, &file->recv_list);
123 spin_unlock_irq(&file->recv_lock);
124 wake_up_interruptible(&file->recv_wait);
125 ret = 0;
126 break;
127 }
128
129 up_read(&file->agent_mutex);
130
131 return ret;
132}
133
134static void send_handler(struct ib_mad_agent *agent,
135 struct ib_mad_send_wc *send_wc)
136{
137 struct ib_umad_file *file = agent->context;
138 struct ib_umad_packet *packet =
139 (void *) (unsigned long) send_wc->wr_id;
140
141 dma_unmap_single(agent->device->dma_device,
142 pci_unmap_addr(packet, mapping),
143 sizeof packet->mad.data,
144 DMA_TO_DEVICE);
145 ib_destroy_ah(packet->ah);
146
147 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
148 packet->mad.status = ETIMEDOUT;
149
150 if (!queue_packet(file, agent, packet))
151 return;
152 }
153
154 kfree(packet);
155}
156
157static void recv_handler(struct ib_mad_agent *agent,
158 struct ib_mad_recv_wc *mad_recv_wc)
159{
160 struct ib_umad_file *file = agent->context;
161 struct ib_umad_packet *packet;
162
163 if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
164 goto out;
165
166 packet = kmalloc(sizeof *packet, GFP_KERNEL);
167 if (!packet)
168 goto out;
169
170 memset(packet, 0, sizeof *packet);
171
172 memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data);
173 packet->mad.status = 0;
174 packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
175 packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid);
176 packet->mad.sl = mad_recv_wc->wc->sl;
177 packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits;
178 packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
179 if (packet->mad.grh_present) {
180 /* XXX parse GRH */
181 packet->mad.gid_index = 0;
182 packet->mad.hop_limit = 0;
183 packet->mad.traffic_class = 0;
184 memset(packet->mad.gid, 0, 16);
185 packet->mad.flow_label = 0;
186 }
187
188 if (queue_packet(file, agent, packet))
189 kfree(packet);
190
191out:
192 ib_free_recv_mad(mad_recv_wc);
193}
194
195static ssize_t ib_umad_read(struct file *filp, char __user *buf,
196 size_t count, loff_t *pos)
197{
198 struct ib_umad_file *file = filp->private_data;
199 struct ib_umad_packet *packet;
200 ssize_t ret;
201
202 if (count < sizeof (struct ib_user_mad))
203 return -EINVAL;
204
205 spin_lock_irq(&file->recv_lock);
206
207 while (list_empty(&file->recv_list)) {
208 spin_unlock_irq(&file->recv_lock);
209
210 if (filp->f_flags & O_NONBLOCK)
211 return -EAGAIN;
212
213 if (wait_event_interruptible(file->recv_wait,
214 !list_empty(&file->recv_list)))
215 return -ERESTARTSYS;
216
217 spin_lock_irq(&file->recv_lock);
218 }
219
220 packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
221 list_del(&packet->list);
222
223 spin_unlock_irq(&file->recv_lock);
224
225 if (copy_to_user(buf, &packet->mad, sizeof packet->mad))
226 ret = -EFAULT;
227 else
228 ret = sizeof packet->mad;
229
230 kfree(packet);
231 return ret;
232}
233
234static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
235 size_t count, loff_t *pos)
236{
237 struct ib_umad_file *file = filp->private_data;
238 struct ib_umad_packet *packet;
239 struct ib_mad_agent *agent;
240 struct ib_ah_attr ah_attr;
241 struct ib_sge gather_list;
242 struct ib_send_wr *bad_wr, wr = {
243 .opcode = IB_WR_SEND,
244 .sg_list = &gather_list,
245 .num_sge = 1,
246 .send_flags = IB_SEND_SIGNALED,
247 };
248 u8 method;
249 u64 *tid;
250 int ret;
251
252 if (count < sizeof (struct ib_user_mad))
253 return -EINVAL;
254
255 packet = kmalloc(sizeof *packet, GFP_KERNEL);
256 if (!packet)
257 return -ENOMEM;
258
259 if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) {
260 kfree(packet);
261 return -EFAULT;
262 }
263
264 if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) {
265 ret = -EINVAL;
266 goto err;
267 }
268
269 down_read(&file->agent_mutex);
270
271 agent = file->agent[packet->mad.id];
272 if (!agent) {
273 ret = -EINVAL;
274 goto err_up;
275 }
276
277 /*
278 * If userspace is generating a request that will generate a
279 * response, we need to make sure the high-order part of the
280 * transaction ID matches the agent being used to send the
281 * MAD.
282 */
283 method = ((struct ib_mad_hdr *) packet->mad.data)->method;
284
285 if (!(method & IB_MGMT_METHOD_RESP) &&
286 method != IB_MGMT_METHOD_TRAP_REPRESS &&
287 method != IB_MGMT_METHOD_SEND) {
288 tid = &((struct ib_mad_hdr *) packet->mad.data)->tid;
289 *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
290 (be64_to_cpup(tid) & 0xffffffff));
291 }
292
293 memset(&ah_attr, 0, sizeof ah_attr);
294 ah_attr.dlid = be16_to_cpu(packet->mad.lid);
295 ah_attr.sl = packet->mad.sl;
296 ah_attr.src_path_bits = packet->mad.path_bits;
297 ah_attr.port_num = file->port->port_num;
298 if (packet->mad.grh_present) {
299 ah_attr.ah_flags = IB_AH_GRH;
300 memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16);
301 ah_attr.grh.flow_label = packet->mad.flow_label;
302 ah_attr.grh.hop_limit = packet->mad.hop_limit;
303 ah_attr.grh.traffic_class = packet->mad.traffic_class;
304 }
305
306 packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
307 if (IS_ERR(packet->ah)) {
308 ret = PTR_ERR(packet->ah);
309 goto err_up;
310 }
311
312 gather_list.addr = dma_map_single(agent->device->dma_device,
313 packet->mad.data,
314 sizeof packet->mad.data,
315 DMA_TO_DEVICE);
316 gather_list.length = sizeof packet->mad.data;
317 gather_list.lkey = file->mr[packet->mad.id]->lkey;
318 pci_unmap_addr_set(packet, mapping, gather_list.addr);
319
320 wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data;
321 wr.wr.ud.ah = packet->ah;
322 wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn);
323 wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey);
324 wr.wr.ud.timeout_ms = packet->mad.timeout_ms;
325
326 wr.wr_id = (unsigned long) packet;
327
328 ret = ib_post_send_mad(agent, &wr, &bad_wr);
329 if (ret) {
330 dma_unmap_single(agent->device->dma_device,
331 pci_unmap_addr(packet, mapping),
332 sizeof packet->mad.data,
333 DMA_TO_DEVICE);
334 goto err_up;
335 }
336
337 up_read(&file->agent_mutex);
338
339 return sizeof packet->mad;
340
341err_up:
342 up_read(&file->agent_mutex);
343
344err:
345 kfree(packet);
346 return ret;
347}
348
349static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
350{
351 struct ib_umad_file *file = filp->private_data;
352
353 /* we will always be able to post a MAD send */
354 unsigned int mask = POLLOUT | POLLWRNORM;
355
356 poll_wait(filp, &file->recv_wait, wait);
357
358 if (!list_empty(&file->recv_list))
359 mask |= POLLIN | POLLRDNORM;
360
361 return mask;
362}
363
364static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
365{
366 struct ib_user_mad_reg_req ureq;
367 struct ib_mad_reg_req req;
368 struct ib_mad_agent *agent;
369 int agent_id;
370 int ret;
371
372 down_write(&file->agent_mutex);
373
374 if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
375 ret = -EFAULT;
376 goto out;
377 }
378
379 if (ureq.qpn != 0 && ureq.qpn != 1) {
380 ret = -EINVAL;
381 goto out;
382 }
383
384 for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
385 if (!file->agent[agent_id])
386 goto found;
387
388 ret = -ENOMEM;
389 goto out;
390
391found:
392 req.mgmt_class = ureq.mgmt_class;
393 req.mgmt_class_version = ureq.mgmt_class_version;
394 memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask);
395 memcpy(req.oui, ureq.oui, sizeof req.oui);
396
397 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
398 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
399 &req, 0, send_handler, recv_handler,
400 file);
401 if (IS_ERR(agent)) {
402 ret = PTR_ERR(agent);
403 goto out;
404 }
405
406 file->agent[agent_id] = agent;
407
408 file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
409 if (IS_ERR(file->mr[agent_id])) {
410 ret = -ENOMEM;
411 goto err;
412 }
413
414 if (put_user(agent_id,
415 (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
416 ret = -EFAULT;
417 goto err_mr;
418 }
419
420 ret = 0;
421 goto out;
422
423err_mr:
424 ib_dereg_mr(file->mr[agent_id]);
425
426err:
427 file->agent[agent_id] = NULL;
428 ib_unregister_mad_agent(agent);
429
430out:
431 up_write(&file->agent_mutex);
432 return ret;
433}
434
435static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
436{
437 u32 id;
438 int ret = 0;
439
440 down_write(&file->agent_mutex);
441
442 if (get_user(id, (u32 __user *) arg)) {
443 ret = -EFAULT;
444 goto out;
445 }
446
447 if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
448 ret = -EINVAL;
449 goto out;
450 }
451
452 ib_dereg_mr(file->mr[id]);
453 ib_unregister_mad_agent(file->agent[id]);
454 file->agent[id] = NULL;
455
456out:
457 up_write(&file->agent_mutex);
458 return ret;
459}
460
461static long ib_umad_ioctl(struct file *filp,
462 unsigned int cmd, unsigned long arg)
463{
464 switch (cmd) {
465 case IB_USER_MAD_REGISTER_AGENT:
466 return ib_umad_reg_agent(filp->private_data, arg);
467 case IB_USER_MAD_UNREGISTER_AGENT:
468 return ib_umad_unreg_agent(filp->private_data, arg);
469 default:
470 return -ENOIOCTLCMD;
471 }
472}
473
474static int ib_umad_open(struct inode *inode, struct file *filp)
475{
476 struct ib_umad_port *port =
477 container_of(inode->i_cdev, struct ib_umad_port, dev);
478 struct ib_umad_file *file;
479
480 file = kmalloc(sizeof *file, GFP_KERNEL);
481 if (!file)
482 return -ENOMEM;
483
484 memset(file, 0, sizeof *file);
485
486 spin_lock_init(&file->recv_lock);
487 init_rwsem(&file->agent_mutex);
488 INIT_LIST_HEAD(&file->recv_list);
489 init_waitqueue_head(&file->recv_wait);
490
491 file->port = port;
492 filp->private_data = file;
493
494 return 0;
495}
496
497static int ib_umad_close(struct inode *inode, struct file *filp)
498{
499 struct ib_umad_file *file = filp->private_data;
500 int i;
501
502 for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
503 if (file->agent[i]) {
504 ib_dereg_mr(file->mr[i]);
505 ib_unregister_mad_agent(file->agent[i]);
506 }
507
508 kfree(file);
509
510 return 0;
511}
512
513static struct file_operations umad_fops = {
514 .owner = THIS_MODULE,
515 .read = ib_umad_read,
516 .write = ib_umad_write,
517 .poll = ib_umad_poll,
518 .unlocked_ioctl = ib_umad_ioctl,
519 .compat_ioctl = ib_umad_ioctl,
520 .open = ib_umad_open,
521 .release = ib_umad_close
522};
523
524static int ib_umad_sm_open(struct inode *inode, struct file *filp)
525{
526 struct ib_umad_port *port =
527 container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
528 struct ib_port_modify props = {
529 .set_port_cap_mask = IB_PORT_SM
530 };
531 int ret;
532
533 if (filp->f_flags & O_NONBLOCK) {
534 if (down_trylock(&port->sm_sem))
535 return -EAGAIN;
536 } else {
537 if (down_interruptible(&port->sm_sem))
538 return -ERESTARTSYS;
539 }
540
541 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
542 if (ret) {
543 up(&port->sm_sem);
544 return ret;
545 }
546
547 filp->private_data = port;
548
549 return 0;
550}
551
552static int ib_umad_sm_close(struct inode *inode, struct file *filp)
553{
554 struct ib_umad_port *port = filp->private_data;
555 struct ib_port_modify props = {
556 .clr_port_cap_mask = IB_PORT_SM
557 };
558 int ret;
559
560 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
561 up(&port->sm_sem);
562
563 return ret;
564}
565
566static struct file_operations umad_sm_fops = {
567 .owner = THIS_MODULE,
568 .open = ib_umad_sm_open,
569 .release = ib_umad_sm_close
570};
571
572static struct ib_client umad_client = {
573 .name = "umad",
574 .add = ib_umad_add_one,
575 .remove = ib_umad_remove_one
576};
577
578static ssize_t show_dev(struct class_device *class_dev, char *buf)
579{
580 struct ib_umad_port *port = class_get_devdata(class_dev);
581
582 if (class_dev == &port->class_dev)
583 return print_dev_t(buf, port->dev.dev);
584 else
585 return print_dev_t(buf, port->sm_dev.dev);
586}
587static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
588
589static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
590{
591 struct ib_umad_port *port = class_get_devdata(class_dev);
592
593 return sprintf(buf, "%s\n", port->ib_dev->name);
594}
595static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
596
597static ssize_t show_port(struct class_device *class_dev, char *buf)
598{
599 struct ib_umad_port *port = class_get_devdata(class_dev);
600
601 return sprintf(buf, "%d\n", port->port_num);
602}
603static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
604
605static void ib_umad_release_dev(struct kref *ref)
606{
607 struct ib_umad_device *dev =
608 container_of(ref, struct ib_umad_device, ref);
609
610 kfree(dev);
611}
612
613static void ib_umad_release_port(struct class_device *class_dev)
614{
615 struct ib_umad_port *port = class_get_devdata(class_dev);
616
617 if (class_dev == &port->class_dev) {
618 cdev_del(&port->dev);
619 clear_bit(port->devnum, dev_map);
620 } else {
621 cdev_del(&port->sm_dev);
622 clear_bit(port->sm_devnum, dev_map);
623 }
624
625 kref_put(&port->umad_dev->ref, ib_umad_release_dev);
626}
627
628static struct class umad_class = {
629 .name = "infiniband_mad",
630 .release = ib_umad_release_port
631};
632
633static ssize_t show_abi_version(struct class *class, char *buf)
634{
635 return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
636}
637static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
638
639static int ib_umad_init_port(struct ib_device *device, int port_num,
640 struct ib_umad_port *port)
641{
642 spin_lock(&map_lock);
643 port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
644 if (port->devnum >= IB_UMAD_MAX_PORTS) {
645 spin_unlock(&map_lock);
646 return -1;
647 }
648 port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
649 if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
650 spin_unlock(&map_lock);
651 return -1;
652 }
653 set_bit(port->devnum, dev_map);
654 set_bit(port->sm_devnum, dev_map);
655 spin_unlock(&map_lock);
656
657 port->ib_dev = device;
658 port->port_num = port_num;
659 init_MUTEX(&port->sm_sem);
660
661 cdev_init(&port->dev, &umad_fops);
662 port->dev.owner = THIS_MODULE;
663 kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
664 if (cdev_add(&port->dev, base_dev + port->devnum, 1))
665 return -1;
666
667 port->class_dev.class = &umad_class;
668 port->class_dev.dev = device->dma_device;
669
670 snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
671
672 if (class_device_register(&port->class_dev))
673 goto err_cdev;
674
675 class_set_devdata(&port->class_dev, port);
676 kref_get(&port->umad_dev->ref);
677
678 if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
679 goto err_class;
680 if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
681 goto err_class;
682 if (class_device_create_file(&port->class_dev, &class_device_attr_port))
683 goto err_class;
684
685 cdev_init(&port->sm_dev, &umad_sm_fops);
686 port->sm_dev.owner = THIS_MODULE;
687 kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
688 if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
689 return -1;
690
691 port->sm_class_dev.class = &umad_class;
692 port->sm_class_dev.dev = device->dma_device;
693
694 snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
695
696 if (class_device_register(&port->sm_class_dev))
697 goto err_sm_cdev;
698
699 class_set_devdata(&port->sm_class_dev, port);
700 kref_get(&port->umad_dev->ref);
701
702 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
703 goto err_sm_class;
704 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
705 goto err_sm_class;
706 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
707 goto err_sm_class;
708
709 return 0;
710
711err_sm_class:
712 class_device_unregister(&port->sm_class_dev);
713
714err_sm_cdev:
715 cdev_del(&port->sm_dev);
716
717err_class:
718 class_device_unregister(&port->class_dev);
719
720err_cdev:
721 cdev_del(&port->dev);
722 clear_bit(port->devnum, dev_map);
723
724 return -1;
725}
726
727static void ib_umad_add_one(struct ib_device *device)
728{
729 struct ib_umad_device *umad_dev;
730 int s, e, i;
731
732 if (device->node_type == IB_NODE_SWITCH)
733 s = e = 0;
734 else {
735 s = 1;
736 e = device->phys_port_cnt;
737 }
738
739 umad_dev = kmalloc(sizeof *umad_dev +
740 (e - s + 1) * sizeof (struct ib_umad_port),
741 GFP_KERNEL);
742 if (!umad_dev)
743 return;
744
745 memset(umad_dev, 0, sizeof *umad_dev +
746 (e - s + 1) * sizeof (struct ib_umad_port));
747
748 kref_init(&umad_dev->ref);
749
750 umad_dev->start_port = s;
751 umad_dev->end_port = e;
752
753 for (i = s; i <= e; ++i) {
754 umad_dev->port[i - s].umad_dev = umad_dev;
755
756 if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
757 goto err;
758 }
759
760 ib_set_client_data(device, &umad_client, umad_dev);
761
762 return;
763
764err:
765 while (--i >= s) {
766 class_device_unregister(&umad_dev->port[i - s].class_dev);
767 class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
768 }
769
770 kref_put(&umad_dev->ref, ib_umad_release_dev);
771}
772
773static void ib_umad_remove_one(struct ib_device *device)
774{
775 struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client);
776 int i;
777
778 if (!umad_dev)
779 return;
780
781 for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
782 class_device_unregister(&umad_dev->port[i].class_dev);
783 class_device_unregister(&umad_dev->port[i].sm_class_dev);
784 }
785
786 kref_put(&umad_dev->ref, ib_umad_release_dev);
787}
788
789static int __init ib_umad_init(void)
790{
791 int ret;
792
793 spin_lock_init(&map_lock);
794
795 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
796 "infiniband_mad");
797 if (ret) {
798 printk(KERN_ERR "user_mad: couldn't register device number\n");
799 goto out;
800 }
801
802 ret = class_register(&umad_class);
803 if (ret) {
804 printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
805 goto out_chrdev;
806 }
807
808 ret = class_create_file(&umad_class, &class_attr_abi_version);
809 if (ret) {
810 printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
811 goto out_class;
812 }
813
814 ret = ib_register_client(&umad_client);
815 if (ret) {
816 printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
817 goto out_class;
818 }
819
820 return 0;
821
822out_class:
823 class_unregister(&umad_class);
824
825out_chrdev:
826 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
827
828out:
829 return ret;
830}
831
832static void __exit ib_umad_cleanup(void)
833{
834 ib_unregister_client(&umad_client);
835 class_unregister(&umad_class);
836 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
837}
838
839module_init(ib_umad_init);
840module_exit(ib_umad_cleanup);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
new file mode 100644
index 000000000000..7c08ed0cd7dd
--- /dev/null
+++ b/drivers/infiniband/core/verbs.c
@@ -0,0 +1,434 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $
37 */
38
39#include <linux/errno.h>
40#include <linux/err.h>
41
42#include <ib_verbs.h>
43
44/* Protection domains */
45
46struct ib_pd *ib_alloc_pd(struct ib_device *device)
47{
48 struct ib_pd *pd;
49
50 pd = device->alloc_pd(device);
51
52 if (!IS_ERR(pd)) {
53 pd->device = device;
54 atomic_set(&pd->usecnt, 0);
55 }
56
57 return pd;
58}
59EXPORT_SYMBOL(ib_alloc_pd);
60
61int ib_dealloc_pd(struct ib_pd *pd)
62{
63 if (atomic_read(&pd->usecnt))
64 return -EBUSY;
65
66 return pd->device->dealloc_pd(pd);
67}
68EXPORT_SYMBOL(ib_dealloc_pd);
69
70/* Address handles */
71
72struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
73{
74 struct ib_ah *ah;
75
76 ah = pd->device->create_ah(pd, ah_attr);
77
78 if (!IS_ERR(ah)) {
79 ah->device = pd->device;
80 ah->pd = pd;
81 atomic_inc(&pd->usecnt);
82 }
83
84 return ah;
85}
86EXPORT_SYMBOL(ib_create_ah);
87
88int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
89{
90 return ah->device->modify_ah ?
91 ah->device->modify_ah(ah, ah_attr) :
92 -ENOSYS;
93}
94EXPORT_SYMBOL(ib_modify_ah);
95
96int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
97{
98 return ah->device->query_ah ?
99 ah->device->query_ah(ah, ah_attr) :
100 -ENOSYS;
101}
102EXPORT_SYMBOL(ib_query_ah);
103
104int ib_destroy_ah(struct ib_ah *ah)
105{
106 struct ib_pd *pd;
107 int ret;
108
109 pd = ah->pd;
110 ret = ah->device->destroy_ah(ah);
111 if (!ret)
112 atomic_dec(&pd->usecnt);
113
114 return ret;
115}
116EXPORT_SYMBOL(ib_destroy_ah);
117
118/* Queue pairs */
119
120struct ib_qp *ib_create_qp(struct ib_pd *pd,
121 struct ib_qp_init_attr *qp_init_attr)
122{
123 struct ib_qp *qp;
124
125 qp = pd->device->create_qp(pd, qp_init_attr);
126
127 if (!IS_ERR(qp)) {
128 qp->device = pd->device;
129 qp->pd = pd;
130 qp->send_cq = qp_init_attr->send_cq;
131 qp->recv_cq = qp_init_attr->recv_cq;
132 qp->srq = qp_init_attr->srq;
133 qp->event_handler = qp_init_attr->event_handler;
134 qp->qp_context = qp_init_attr->qp_context;
135 qp->qp_type = qp_init_attr->qp_type;
136 atomic_inc(&pd->usecnt);
137 atomic_inc(&qp_init_attr->send_cq->usecnt);
138 atomic_inc(&qp_init_attr->recv_cq->usecnt);
139 if (qp_init_attr->srq)
140 atomic_inc(&qp_init_attr->srq->usecnt);
141 }
142
143 return qp;
144}
145EXPORT_SYMBOL(ib_create_qp);
146
147int ib_modify_qp(struct ib_qp *qp,
148 struct ib_qp_attr *qp_attr,
149 int qp_attr_mask)
150{
151 return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
152}
153EXPORT_SYMBOL(ib_modify_qp);
154
155int ib_query_qp(struct ib_qp *qp,
156 struct ib_qp_attr *qp_attr,
157 int qp_attr_mask,
158 struct ib_qp_init_attr *qp_init_attr)
159{
160 return qp->device->query_qp ?
161 qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
162 -ENOSYS;
163}
164EXPORT_SYMBOL(ib_query_qp);
165
166int ib_destroy_qp(struct ib_qp *qp)
167{
168 struct ib_pd *pd;
169 struct ib_cq *scq, *rcq;
170 struct ib_srq *srq;
171 int ret;
172
173 pd = qp->pd;
174 scq = qp->send_cq;
175 rcq = qp->recv_cq;
176 srq = qp->srq;
177
178 ret = qp->device->destroy_qp(qp);
179 if (!ret) {
180 atomic_dec(&pd->usecnt);
181 atomic_dec(&scq->usecnt);
182 atomic_dec(&rcq->usecnt);
183 if (srq)
184 atomic_dec(&srq->usecnt);
185 }
186
187 return ret;
188}
189EXPORT_SYMBOL(ib_destroy_qp);
190
191/* Completion queues */
192
193struct ib_cq *ib_create_cq(struct ib_device *device,
194 ib_comp_handler comp_handler,
195 void (*event_handler)(struct ib_event *, void *),
196 void *cq_context, int cqe)
197{
198 struct ib_cq *cq;
199
200 cq = device->create_cq(device, cqe);
201
202 if (!IS_ERR(cq)) {
203 cq->device = device;
204 cq->comp_handler = comp_handler;
205 cq->event_handler = event_handler;
206 cq->cq_context = cq_context;
207 atomic_set(&cq->usecnt, 0);
208 }
209
210 return cq;
211}
212EXPORT_SYMBOL(ib_create_cq);
213
214int ib_destroy_cq(struct ib_cq *cq)
215{
216 if (atomic_read(&cq->usecnt))
217 return -EBUSY;
218
219 return cq->device->destroy_cq(cq);
220}
221EXPORT_SYMBOL(ib_destroy_cq);
222
223int ib_resize_cq(struct ib_cq *cq,
224 int cqe)
225{
226 int ret;
227
228 if (!cq->device->resize_cq)
229 return -ENOSYS;
230
231 ret = cq->device->resize_cq(cq, &cqe);
232 if (!ret)
233 cq->cqe = cqe;
234
235 return ret;
236}
237EXPORT_SYMBOL(ib_resize_cq);
238
239/* Memory regions */
240
241struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
242{
243 struct ib_mr *mr;
244
245 mr = pd->device->get_dma_mr(pd, mr_access_flags);
246
247 if (!IS_ERR(mr)) {
248 mr->device = pd->device;
249 mr->pd = pd;
250 atomic_inc(&pd->usecnt);
251 atomic_set(&mr->usecnt, 0);
252 }
253
254 return mr;
255}
256EXPORT_SYMBOL(ib_get_dma_mr);
257
258struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
259 struct ib_phys_buf *phys_buf_array,
260 int num_phys_buf,
261 int mr_access_flags,
262 u64 *iova_start)
263{
264 struct ib_mr *mr;
265
266 mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
267 mr_access_flags, iova_start);
268
269 if (!IS_ERR(mr)) {
270 mr->device = pd->device;
271 mr->pd = pd;
272 atomic_inc(&pd->usecnt);
273 atomic_set(&mr->usecnt, 0);
274 }
275
276 return mr;
277}
278EXPORT_SYMBOL(ib_reg_phys_mr);
279
280int ib_rereg_phys_mr(struct ib_mr *mr,
281 int mr_rereg_mask,
282 struct ib_pd *pd,
283 struct ib_phys_buf *phys_buf_array,
284 int num_phys_buf,
285 int mr_access_flags,
286 u64 *iova_start)
287{
288 struct ib_pd *old_pd;
289 int ret;
290
291 if (!mr->device->rereg_phys_mr)
292 return -ENOSYS;
293
294 if (atomic_read(&mr->usecnt))
295 return -EBUSY;
296
297 old_pd = mr->pd;
298
299 ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
300 phys_buf_array, num_phys_buf,
301 mr_access_flags, iova_start);
302
303 if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
304 atomic_dec(&old_pd->usecnt);
305 atomic_inc(&pd->usecnt);
306 }
307
308 return ret;
309}
310EXPORT_SYMBOL(ib_rereg_phys_mr);
311
312int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
313{
314 return mr->device->query_mr ?
315 mr->device->query_mr(mr, mr_attr) : -ENOSYS;
316}
317EXPORT_SYMBOL(ib_query_mr);
318
319int ib_dereg_mr(struct ib_mr *mr)
320{
321 struct ib_pd *pd;
322 int ret;
323
324 if (atomic_read(&mr->usecnt))
325 return -EBUSY;
326
327 pd = mr->pd;
328 ret = mr->device->dereg_mr(mr);
329 if (!ret)
330 atomic_dec(&pd->usecnt);
331
332 return ret;
333}
334EXPORT_SYMBOL(ib_dereg_mr);
335
336/* Memory windows */
337
338struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
339{
340 struct ib_mw *mw;
341
342 if (!pd->device->alloc_mw)
343 return ERR_PTR(-ENOSYS);
344
345 mw = pd->device->alloc_mw(pd);
346 if (!IS_ERR(mw)) {
347 mw->device = pd->device;
348 mw->pd = pd;
349 atomic_inc(&pd->usecnt);
350 }
351
352 return mw;
353}
354EXPORT_SYMBOL(ib_alloc_mw);
355
356int ib_dealloc_mw(struct ib_mw *mw)
357{
358 struct ib_pd *pd;
359 int ret;
360
361 pd = mw->pd;
362 ret = mw->device->dealloc_mw(mw);
363 if (!ret)
364 atomic_dec(&pd->usecnt);
365
366 return ret;
367}
368EXPORT_SYMBOL(ib_dealloc_mw);
369
370/* "Fast" memory regions */
371
372struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
373 int mr_access_flags,
374 struct ib_fmr_attr *fmr_attr)
375{
376 struct ib_fmr *fmr;
377
378 if (!pd->device->alloc_fmr)
379 return ERR_PTR(-ENOSYS);
380
381 fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
382 if (!IS_ERR(fmr)) {
383 fmr->device = pd->device;
384 fmr->pd = pd;
385 atomic_inc(&pd->usecnt);
386 }
387
388 return fmr;
389}
390EXPORT_SYMBOL(ib_alloc_fmr);
391
392int ib_unmap_fmr(struct list_head *fmr_list)
393{
394 struct ib_fmr *fmr;
395
396 if (list_empty(fmr_list))
397 return 0;
398
399 fmr = list_entry(fmr_list->next, struct ib_fmr, list);
400 return fmr->device->unmap_fmr(fmr_list);
401}
402EXPORT_SYMBOL(ib_unmap_fmr);
403
404int ib_dealloc_fmr(struct ib_fmr *fmr)
405{
406 struct ib_pd *pd;
407 int ret;
408
409 pd = fmr->pd;
410 ret = fmr->device->dealloc_fmr(fmr);
411 if (!ret)
412 atomic_dec(&pd->usecnt);
413
414 return ret;
415}
416EXPORT_SYMBOL(ib_dealloc_fmr);
417
418/* Multicast groups */
419
420int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
421{
422 return qp->device->attach_mcast ?
423 qp->device->attach_mcast(qp, gid, lid) :
424 -ENOSYS;
425}
426EXPORT_SYMBOL(ib_attach_mcast);
427
428int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
429{
430 return qp->device->detach_mcast ?
431 qp->device->detach_mcast(qp, gid, lid) :
432 -ENOSYS;
433}
434EXPORT_SYMBOL(ib_detach_mcast);
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
new file mode 100644
index 000000000000..e88be85b3d5c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -0,0 +1,16 @@
1config INFINIBAND_MTHCA
2 tristate "Mellanox HCA support"
3 depends on PCI && INFINIBAND
4 ---help---
5 This is a low-level driver for Mellanox InfiniHost host
6 channel adapters (HCAs), including the MT23108 PCI-X HCA
7 ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
8
9config INFINIBAND_MTHCA_DEBUG
10 bool "Verbose debugging output"
11 depends on INFINIBAND_MTHCA
12 default n
13 ---help---
14 This option causes the mthca driver produce a bunch of debug
15 messages. Select this is you are developing the driver or
16 trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
new file mode 100644
index 000000000000..5dcbd43073e2
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -0,0 +1,12 @@
1EXTRA_CFLAGS += -Idrivers/infiniband/include
2
3ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
4EXTRA_CFLAGS += -DDEBUG
5endif
6
7obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
8
9ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
10 mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
11 mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
12 mthca_provider.o mthca_memfree.o mthca_uar.o
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
new file mode 100644
index 000000000000..b1db48dd91d6
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -0,0 +1,179 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/errno.h>
36#include <linux/slab.h>
37#include <linux/bitmap.h>
38
39#include "mthca_dev.h"
40
41/* Trivial bitmap-based allocator */
42u32 mthca_alloc(struct mthca_alloc *alloc)
43{
44 u32 obj;
45
46 spin_lock(&alloc->lock);
47 obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
48 if (obj >= alloc->max) {
49 alloc->top = (alloc->top + alloc->max) & alloc->mask;
50 obj = find_first_zero_bit(alloc->table, alloc->max);
51 }
52
53 if (obj < alloc->max) {
54 set_bit(obj, alloc->table);
55 obj |= alloc->top;
56 } else
57 obj = -1;
58
59 spin_unlock(&alloc->lock);
60
61 return obj;
62}
63
64void mthca_free(struct mthca_alloc *alloc, u32 obj)
65{
66 obj &= alloc->max - 1;
67 spin_lock(&alloc->lock);
68 clear_bit(obj, alloc->table);
69 alloc->last = min(alloc->last, obj);
70 alloc->top = (alloc->top + alloc->max) & alloc->mask;
71 spin_unlock(&alloc->lock);
72}
73
74int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
75 u32 reserved)
76{
77 int i;
78
79 /* num must be a power of 2 */
80 if (num != 1 << (ffs(num) - 1))
81 return -EINVAL;
82
83 alloc->last = 0;
84 alloc->top = 0;
85 alloc->max = num;
86 alloc->mask = mask;
87 spin_lock_init(&alloc->lock);
88 alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
89 GFP_KERNEL);
90 if (!alloc->table)
91 return -ENOMEM;
92
93 bitmap_zero(alloc->table, num);
94 for (i = 0; i < reserved; ++i)
95 set_bit(i, alloc->table);
96
97 return 0;
98}
99
100void mthca_alloc_cleanup(struct mthca_alloc *alloc)
101{
102 kfree(alloc->table);
103}
104
105/*
106 * Array of pointers with lazy allocation of leaf pages. Callers of
107 * _get, _set and _clear methods must use a lock or otherwise
108 * serialize access to the array.
109 */
110
111void *mthca_array_get(struct mthca_array *array, int index)
112{
113 int p = (index * sizeof (void *)) >> PAGE_SHIFT;
114
115 if (array->page_list[p].page) {
116 int i = index & (PAGE_SIZE / sizeof (void *) - 1);
117 return array->page_list[p].page[i];
118 } else
119 return NULL;
120}
121
122int mthca_array_set(struct mthca_array *array, int index, void *value)
123{
124 int p = (index * sizeof (void *)) >> PAGE_SHIFT;
125
126 /* Allocate with GFP_ATOMIC because we'll be called with locks held. */
127 if (!array->page_list[p].page)
128 array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
129
130 if (!array->page_list[p].page)
131 return -ENOMEM;
132
133 array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] =
134 value;
135 ++array->page_list[p].used;
136
137 return 0;
138}
139
140void mthca_array_clear(struct mthca_array *array, int index)
141{
142 int p = (index * sizeof (void *)) >> PAGE_SHIFT;
143
144 if (--array->page_list[p].used == 0) {
145 free_page((unsigned long) array->page_list[p].page);
146 array->page_list[p].page = NULL;
147 }
148
149 if (array->page_list[p].used < 0)
150 pr_debug("Array %p index %d page %d with ref count %d < 0\n",
151 array, index, p, array->page_list[p].used);
152}
153
154int mthca_array_init(struct mthca_array *array, int nent)
155{
156 int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
157 int i;
158
159 array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
160 if (!array->page_list)
161 return -ENOMEM;
162
163 for (i = 0; i < npage; ++i) {
164 array->page_list[i].page = NULL;
165 array->page_list[i].used = 0;
166 }
167
168 return 0;
169}
170
171void mthca_array_cleanup(struct mthca_array *array, int nent)
172{
173 int i;
174
175 for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
176 free_page((unsigned long) array->page_list[i].page);
177
178 kfree(array->page_list);
179}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
new file mode 100644
index 000000000000..426d32778e9c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/init.h>
36
37#include <ib_verbs.h>
38#include <ib_cache.h>
39
40#include "mthca_dev.h"
41
42struct mthca_av {
43 u32 port_pd;
44 u8 reserved1;
45 u8 g_slid;
46 u16 dlid;
47 u8 reserved2;
48 u8 gid_index;
49 u8 msg_sr;
50 u8 hop_limit;
51 u32 sl_tclass_flowlabel;
52 u32 dgid[4];
53};
54
55int mthca_create_ah(struct mthca_dev *dev,
56 struct mthca_pd *pd,
57 struct ib_ah_attr *ah_attr,
58 struct mthca_ah *ah)
59{
60 u32 index = -1;
61 struct mthca_av *av = NULL;
62
63 ah->type = MTHCA_AH_PCI_POOL;
64
65 if (dev->hca_type == ARBEL_NATIVE) {
66 ah->av = kmalloc(sizeof *ah->av, GFP_KERNEL);
67 if (!ah->av)
68 return -ENOMEM;
69
70 ah->type = MTHCA_AH_KMALLOC;
71 av = ah->av;
72 } else if (!atomic_read(&pd->sqp_count) &&
73 !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
74 index = mthca_alloc(&dev->av_table.alloc);
75
76 /* fall back to allocate in host memory */
77 if (index == -1)
78 goto on_hca_fail;
79
80 av = kmalloc(sizeof *av, GFP_KERNEL);
81 if (!av)
82 goto on_hca_fail;
83
84 ah->type = MTHCA_AH_ON_HCA;
85 ah->avdma = dev->av_table.ddr_av_base +
86 index * MTHCA_AV_SIZE;
87 }
88
89on_hca_fail:
90 if (ah->type == MTHCA_AH_PCI_POOL) {
91 ah->av = pci_pool_alloc(dev->av_table.pool,
92 SLAB_KERNEL, &ah->avdma);
93 if (!ah->av)
94 return -ENOMEM;
95
96 av = ah->av;
97 }
98
99 ah->key = pd->ntmr.ibmr.lkey;
100
101 memset(av, 0, MTHCA_AV_SIZE);
102
103 av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
104 av->g_slid = ah_attr->src_path_bits;
105 av->dlid = cpu_to_be16(ah_attr->dlid);
106 av->msg_sr = (3 << 4) | /* 2K message */
107 ah_attr->static_rate;
108 av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
109 if (ah_attr->ah_flags & IB_AH_GRH) {
110 av->g_slid |= 0x80;
111 av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
112 ah_attr->grh.sgid_index;
113 av->hop_limit = ah_attr->grh.hop_limit;
114 av->sl_tclass_flowlabel |=
115 cpu_to_be32((ah_attr->grh.traffic_class << 20) |
116 ah_attr->grh.flow_label);
117 memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
118 } else {
119 /* Arbel workaround -- low byte of GID must be 2 */
120 av->dgid[3] = cpu_to_be32(2);
121 }
122
123 if (0) {
124 int j;
125
126 mthca_dbg(dev, "Created UDAV at %p/%08lx:\n",
127 av, (unsigned long) ah->avdma);
128 for (j = 0; j < 8; ++j)
129 printk(KERN_DEBUG " [%2x] %08x\n",
130 j * 4, be32_to_cpu(((u32 *) av)[j]));
131 }
132
133 if (ah->type == MTHCA_AH_ON_HCA) {
134 memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
135 av, MTHCA_AV_SIZE);
136 kfree(av);
137 }
138
139 return 0;
140}
141
142int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
143{
144 switch (ah->type) {
145 case MTHCA_AH_ON_HCA:
146 mthca_free(&dev->av_table.alloc,
147 (ah->avdma - dev->av_table.ddr_av_base) /
148 MTHCA_AV_SIZE);
149 break;
150
151 case MTHCA_AH_PCI_POOL:
152 pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
153 break;
154
155 case MTHCA_AH_KMALLOC:
156 kfree(ah->av);
157 break;
158 }
159
160 return 0;
161}
162
163int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
164 struct ib_ud_header *header)
165{
166 if (ah->type == MTHCA_AH_ON_HCA)
167 return -EINVAL;
168
169 header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
170 header->lrh.destination_lid = ah->av->dlid;
171 header->lrh.source_lid = ah->av->g_slid & 0x7f;
172 if (ah->av->g_slid & 0x80) {
173 header->grh_present = 1;
174 header->grh.traffic_class =
175 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
176 header->grh.flow_label =
177 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
178 ib_get_cached_gid(&dev->ib_dev,
179 be32_to_cpu(ah->av->port_pd) >> 24,
180 ah->av->gid_index,
181 &header->grh.source_gid);
182 memcpy(header->grh.destination_gid.raw,
183 ah->av->dgid, 16);
184 } else {
185 header->grh_present = 0;
186 }
187
188 return 0;
189}
190
191int __devinit mthca_init_av_table(struct mthca_dev *dev)
192{
193 int err;
194
195 if (dev->hca_type == ARBEL_NATIVE)
196 return 0;
197
198 err = mthca_alloc_init(&dev->av_table.alloc,
199 dev->av_table.num_ddr_avs,
200 dev->av_table.num_ddr_avs - 1,
201 0);
202 if (err)
203 return err;
204
205 dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
206 MTHCA_AV_SIZE,
207 MTHCA_AV_SIZE, 0);
208 if (!dev->av_table.pool)
209 goto out_free_alloc;
210
211 if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
212 dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) +
213 dev->av_table.ddr_av_base -
214 dev->ddr_start,
215 dev->av_table.num_ddr_avs *
216 MTHCA_AV_SIZE);
217 if (!dev->av_table.av_map)
218 goto out_free_pool;
219 } else
220 dev->av_table.av_map = NULL;
221
222 return 0;
223
224 out_free_pool:
225 pci_pool_destroy(dev->av_table.pool);
226
227 out_free_alloc:
228 mthca_alloc_cleanup(&dev->av_table.alloc);
229 return -ENOMEM;
230}
231
232void __devexit mthca_cleanup_av_table(struct mthca_dev *dev)
233{
234 if (dev->hca_type == ARBEL_NATIVE)
235 return;
236
237 if (dev->av_table.av_map)
238 iounmap(dev->av_table.av_map);
239 pci_pool_destroy(dev->av_table.pool);
240 mthca_alloc_cleanup(&dev->av_table.alloc);
241}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
new file mode 100644
index 000000000000..9def0981f630
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -0,0 +1,1767 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/sched.h>
36#include <linux/pci.h>
37#include <linux/errno.h>
38#include <asm/io.h>
39#include <ib_mad.h>
40
41#include "mthca_dev.h"
42#include "mthca_config_reg.h"
43#include "mthca_cmd.h"
44#include "mthca_memfree.h"
45
46#define CMD_POLL_TOKEN 0xffff
47
48enum {
49 HCR_IN_PARAM_OFFSET = 0x00,
50 HCR_IN_MODIFIER_OFFSET = 0x08,
51 HCR_OUT_PARAM_OFFSET = 0x0c,
52 HCR_TOKEN_OFFSET = 0x14,
53 HCR_STATUS_OFFSET = 0x18,
54
55 HCR_OPMOD_SHIFT = 12,
56 HCA_E_BIT = 22,
57 HCR_GO_BIT = 23
58};
59
60enum {
61 /* initialization and general commands */
62 CMD_SYS_EN = 0x1,
63 CMD_SYS_DIS = 0x2,
64 CMD_MAP_FA = 0xfff,
65 CMD_UNMAP_FA = 0xffe,
66 CMD_RUN_FW = 0xff6,
67 CMD_MOD_STAT_CFG = 0x34,
68 CMD_QUERY_DEV_LIM = 0x3,
69 CMD_QUERY_FW = 0x4,
70 CMD_ENABLE_LAM = 0xff8,
71 CMD_DISABLE_LAM = 0xff7,
72 CMD_QUERY_DDR = 0x5,
73 CMD_QUERY_ADAPTER = 0x6,
74 CMD_INIT_HCA = 0x7,
75 CMD_CLOSE_HCA = 0x8,
76 CMD_INIT_IB = 0x9,
77 CMD_CLOSE_IB = 0xa,
78 CMD_QUERY_HCA = 0xb,
79 CMD_SET_IB = 0xc,
80 CMD_ACCESS_DDR = 0x2e,
81 CMD_MAP_ICM = 0xffa,
82 CMD_UNMAP_ICM = 0xff9,
83 CMD_MAP_ICM_AUX = 0xffc,
84 CMD_UNMAP_ICM_AUX = 0xffb,
85 CMD_SET_ICM_SIZE = 0xffd,
86
87 /* TPT commands */
88 CMD_SW2HW_MPT = 0xd,
89 CMD_QUERY_MPT = 0xe,
90 CMD_HW2SW_MPT = 0xf,
91 CMD_READ_MTT = 0x10,
92 CMD_WRITE_MTT = 0x11,
93 CMD_SYNC_TPT = 0x2f,
94
95 /* EQ commands */
96 CMD_MAP_EQ = 0x12,
97 CMD_SW2HW_EQ = 0x13,
98 CMD_HW2SW_EQ = 0x14,
99 CMD_QUERY_EQ = 0x15,
100
101 /* CQ commands */
102 CMD_SW2HW_CQ = 0x16,
103 CMD_HW2SW_CQ = 0x17,
104 CMD_QUERY_CQ = 0x18,
105 CMD_RESIZE_CQ = 0x2c,
106
107 /* SRQ commands */
108 CMD_SW2HW_SRQ = 0x35,
109 CMD_HW2SW_SRQ = 0x36,
110 CMD_QUERY_SRQ = 0x37,
111
112 /* QP/EE commands */
113 CMD_RST2INIT_QPEE = 0x19,
114 CMD_INIT2RTR_QPEE = 0x1a,
115 CMD_RTR2RTS_QPEE = 0x1b,
116 CMD_RTS2RTS_QPEE = 0x1c,
117 CMD_SQERR2RTS_QPEE = 0x1d,
118 CMD_2ERR_QPEE = 0x1e,
119 CMD_RTS2SQD_QPEE = 0x1f,
120 CMD_SQD2SQD_QPEE = 0x38,
121 CMD_SQD2RTS_QPEE = 0x20,
122 CMD_ERR2RST_QPEE = 0x21,
123 CMD_QUERY_QPEE = 0x22,
124 CMD_INIT2INIT_QPEE = 0x2d,
125 CMD_SUSPEND_QPEE = 0x32,
126 CMD_UNSUSPEND_QPEE = 0x33,
127 /* special QPs and management commands */
128 CMD_CONF_SPECIAL_QP = 0x23,
129 CMD_MAD_IFC = 0x24,
130
131 /* multicast commands */
132 CMD_READ_MGM = 0x25,
133 CMD_WRITE_MGM = 0x26,
134 CMD_MGID_HASH = 0x27,
135
136 /* miscellaneous commands */
137 CMD_DIAG_RPRT = 0x30,
138 CMD_NOP = 0x31,
139
140 /* debug commands */
141 CMD_QUERY_DEBUG_MSG = 0x2a,
142 CMD_SET_DEBUG_MSG = 0x2b,
143};
144
145/*
146 * According to Mellanox code, FW may be starved and never complete
147 * commands. So we can't use strict timeouts described in PRM -- we
148 * just arbitrarily select 60 seconds for now.
149 */
150#if 0
151/*
152 * Round up and add 1 to make sure we get the full wait time (since we
153 * will be starting in the middle of a jiffy)
154 */
155enum {
156 CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1,
157 CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1,
158 CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1
159};
160#else
161enum {
162 CMD_TIME_CLASS_A = 60 * HZ,
163 CMD_TIME_CLASS_B = 60 * HZ,
164 CMD_TIME_CLASS_C = 60 * HZ
165};
166#endif
167
168enum {
169 GO_BIT_TIMEOUT = HZ * 10
170};
171
172struct mthca_cmd_context {
173 struct completion done;
174 struct timer_list timer;
175 int result;
176 int next;
177 u64 out_param;
178 u16 token;
179 u8 status;
180};
181
182static inline int go_bit(struct mthca_dev *dev)
183{
184 return readl(dev->hcr + HCR_STATUS_OFFSET) &
185 swab32(1 << HCR_GO_BIT);
186}
187
188static int mthca_cmd_post(struct mthca_dev *dev,
189 u64 in_param,
190 u64 out_param,
191 u32 in_modifier,
192 u8 op_modifier,
193 u16 op,
194 u16 token,
195 int event)
196{
197 int err = 0;
198
199 if (down_interruptible(&dev->cmd.hcr_sem))
200 return -EINTR;
201
202 if (event) {
203 unsigned long end = jiffies + GO_BIT_TIMEOUT;
204
205 while (go_bit(dev) && time_before(jiffies, end)) {
206 set_current_state(TASK_RUNNING);
207 schedule();
208 }
209 }
210
211 if (go_bit(dev)) {
212 err = -EAGAIN;
213 goto out;
214 }
215
216 /*
217 * We use writel (instead of something like memcpy_toio)
218 * because writes of less than 32 bits to the HCR don't work
219 * (and some architectures such as ia64 implement memcpy_toio
220 * in terms of writeb).
221 */
222 __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4);
223 __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4);
224 __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4);
225 __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4);
226 __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
227 __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4);
228
229 /* __raw_writel may not order writes. */
230 wmb();
231
232 __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) |
233 (event ? (1 << HCA_E_BIT) : 0) |
234 (op_modifier << HCR_OPMOD_SHIFT) |
235 op), dev->hcr + 6 * 4);
236
237out:
238 up(&dev->cmd.hcr_sem);
239 return err;
240}
241
242static int mthca_cmd_poll(struct mthca_dev *dev,
243 u64 in_param,
244 u64 *out_param,
245 int out_is_imm,
246 u32 in_modifier,
247 u8 op_modifier,
248 u16 op,
249 unsigned long timeout,
250 u8 *status)
251{
252 int err = 0;
253 unsigned long end;
254
255 if (down_interruptible(&dev->cmd.poll_sem))
256 return -EINTR;
257
258 err = mthca_cmd_post(dev, in_param,
259 out_param ? *out_param : 0,
260 in_modifier, op_modifier,
261 op, CMD_POLL_TOKEN, 0);
262 if (err)
263 goto out;
264
265 end = timeout + jiffies;
266 while (go_bit(dev) && time_before(jiffies, end)) {
267 set_current_state(TASK_RUNNING);
268 schedule();
269 }
270
271 if (go_bit(dev)) {
272 err = -EBUSY;
273 goto out;
274 }
275
276 if (out_is_imm) {
277 memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64));
278 be64_to_cpus(out_param);
279 }
280
281 *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
282
283out:
284 up(&dev->cmd.poll_sem);
285 return err;
286}
287
288void mthca_cmd_event(struct mthca_dev *dev,
289 u16 token,
290 u8 status,
291 u64 out_param)
292{
293 struct mthca_cmd_context *context =
294 &dev->cmd.context[token & dev->cmd.token_mask];
295
296 /* previously timed out command completing at long last */
297 if (token != context->token)
298 return;
299
300 context->result = 0;
301 context->status = status;
302 context->out_param = out_param;
303
304 context->token += dev->cmd.token_mask + 1;
305
306 complete(&context->done);
307}
308
309static void event_timeout(unsigned long context_ptr)
310{
311 struct mthca_cmd_context *context =
312 (struct mthca_cmd_context *) context_ptr;
313
314 context->result = -EBUSY;
315 complete(&context->done);
316}
317
318static int mthca_cmd_wait(struct mthca_dev *dev,
319 u64 in_param,
320 u64 *out_param,
321 int out_is_imm,
322 u32 in_modifier,
323 u8 op_modifier,
324 u16 op,
325 unsigned long timeout,
326 u8 *status)
327{
328 int err = 0;
329 struct mthca_cmd_context *context;
330
331 if (down_interruptible(&dev->cmd.event_sem))
332 return -EINTR;
333
334 spin_lock(&dev->cmd.context_lock);
335 BUG_ON(dev->cmd.free_head < 0);
336 context = &dev->cmd.context[dev->cmd.free_head];
337 dev->cmd.free_head = context->next;
338 spin_unlock(&dev->cmd.context_lock);
339
340 init_completion(&context->done);
341
342 err = mthca_cmd_post(dev, in_param,
343 out_param ? *out_param : 0,
344 in_modifier, op_modifier,
345 op, context->token, 1);
346 if (err)
347 goto out;
348
349 context->timer.expires = jiffies + timeout;
350 add_timer(&context->timer);
351
352 wait_for_completion(&context->done);
353 del_timer_sync(&context->timer);
354
355 err = context->result;
356 if (err)
357 goto out;
358
359 *status = context->status;
360 if (*status)
361 mthca_dbg(dev, "Command %02x completed with status %02x\n",
362 op, *status);
363
364 if (out_is_imm)
365 *out_param = context->out_param;
366
367out:
368 spin_lock(&dev->cmd.context_lock);
369 context->next = dev->cmd.free_head;
370 dev->cmd.free_head = context - dev->cmd.context;
371 spin_unlock(&dev->cmd.context_lock);
372
373 up(&dev->cmd.event_sem);
374 return err;
375}
376
377/* Invoke a command with an output mailbox */
378static int mthca_cmd_box(struct mthca_dev *dev,
379 u64 in_param,
380 u64 out_param,
381 u32 in_modifier,
382 u8 op_modifier,
383 u16 op,
384 unsigned long timeout,
385 u8 *status)
386{
387 if (dev->cmd.use_events)
388 return mthca_cmd_wait(dev, in_param, &out_param, 0,
389 in_modifier, op_modifier, op,
390 timeout, status);
391 else
392 return mthca_cmd_poll(dev, in_param, &out_param, 0,
393 in_modifier, op_modifier, op,
394 timeout, status);
395}
396
397/* Invoke a command with no output parameter */
398static int mthca_cmd(struct mthca_dev *dev,
399 u64 in_param,
400 u32 in_modifier,
401 u8 op_modifier,
402 u16 op,
403 unsigned long timeout,
404 u8 *status)
405{
406 return mthca_cmd_box(dev, in_param, 0, in_modifier,
407 op_modifier, op, timeout, status);
408}
409
410/*
411 * Invoke a command with an immediate output parameter (and copy the
412 * output into the caller's out_param pointer after the command
413 * executes).
414 */
415static int mthca_cmd_imm(struct mthca_dev *dev,
416 u64 in_param,
417 u64 *out_param,
418 u32 in_modifier,
419 u8 op_modifier,
420 u16 op,
421 unsigned long timeout,
422 u8 *status)
423{
424 if (dev->cmd.use_events)
425 return mthca_cmd_wait(dev, in_param, out_param, 1,
426 in_modifier, op_modifier, op,
427 timeout, status);
428 else
429 return mthca_cmd_poll(dev, in_param, out_param, 1,
430 in_modifier, op_modifier, op,
431 timeout, status);
432}
433
434/*
435 * Switch to using events to issue FW commands (should be called after
436 * event queue to command events has been initialized).
437 */
438int mthca_cmd_use_events(struct mthca_dev *dev)
439{
440 int i;
441
442 dev->cmd.context = kmalloc(dev->cmd.max_cmds *
443 sizeof (struct mthca_cmd_context),
444 GFP_KERNEL);
445 if (!dev->cmd.context)
446 return -ENOMEM;
447
448 for (i = 0; i < dev->cmd.max_cmds; ++i) {
449 dev->cmd.context[i].token = i;
450 dev->cmd.context[i].next = i + 1;
451 init_timer(&dev->cmd.context[i].timer);
452 dev->cmd.context[i].timer.data =
453 (unsigned long) &dev->cmd.context[i];
454 dev->cmd.context[i].timer.function = event_timeout;
455 }
456
457 dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
458 dev->cmd.free_head = 0;
459
460 sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
461 spin_lock_init(&dev->cmd.context_lock);
462
463 for (dev->cmd.token_mask = 1;
464 dev->cmd.token_mask < dev->cmd.max_cmds;
465 dev->cmd.token_mask <<= 1)
466 ; /* nothing */
467 --dev->cmd.token_mask;
468
469 dev->cmd.use_events = 1;
470 down(&dev->cmd.poll_sem);
471
472 return 0;
473}
474
475/*
476 * Switch back to polling (used when shutting down the device)
477 */
478void mthca_cmd_use_polling(struct mthca_dev *dev)
479{
480 int i;
481
482 dev->cmd.use_events = 0;
483
484 for (i = 0; i < dev->cmd.max_cmds; ++i)
485 down(&dev->cmd.event_sem);
486
487 kfree(dev->cmd.context);
488
489 up(&dev->cmd.poll_sem);
490}
491
492int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
493{
494 u64 out;
495 int ret;
496
497 ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status);
498
499 if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR)
500 mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, "
501 "sladdr=%d, SPD source=%s\n",
502 (int) (out >> 6) & 0xf, (int) (out >> 4) & 3,
503 (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM");
504
505 return ret;
506}
507
508int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
509{
510 return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
511}
512
513static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
514 u64 virt, u8 *status)
515{
516 u32 *inbox;
517 dma_addr_t indma;
518 struct mthca_icm_iter iter;
519 int lg;
520 int nent = 0;
521 int i;
522 int err = 0;
523 int ts = 0, tc = 0;
524
525 inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
526 if (!inbox)
527 return -ENOMEM;
528
529 memset(inbox, 0, PAGE_SIZE);
530
531 for (mthca_icm_first(icm, &iter);
532 !mthca_icm_last(&iter);
533 mthca_icm_next(&iter)) {
534 /*
535 * We have to pass pages that are aligned to their
536 * size, so find the least significant 1 in the
537 * address or size and use that as our log2 size.
538 */
539 lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
540 if (lg < 12) {
541 mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n",
542 (unsigned long long) mthca_icm_addr(&iter),
543 mthca_icm_size(&iter));
544 err = -EINVAL;
545 goto out;
546 }
547 for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
548 if (virt != -1) {
549 *((__be64 *) (inbox + nent * 4)) =
550 cpu_to_be64(virt);
551 virt += 1 << lg;
552 }
553
554 *((__be64 *) (inbox + nent * 4 + 2)) =
555 cpu_to_be64((mthca_icm_addr(&iter) +
556 (i << lg)) | (lg - 12));
557 ts += 1 << (lg - 10);
558 ++tc;
559
560 if (nent == PAGE_SIZE / 16) {
561 err = mthca_cmd(dev, indma, nent, 0, op,
562 CMD_TIME_CLASS_B, status);
563 if (err || *status)
564 goto out;
565 nent = 0;
566 }
567 }
568 }
569
570 if (nent)
571 err = mthca_cmd(dev, indma, nent, 0, op,
572 CMD_TIME_CLASS_B, status);
573
574 switch (op) {
575 case CMD_MAP_FA:
576 mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
577 break;
578 case CMD_MAP_ICM_AUX:
579 mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
580 break;
581 case CMD_MAP_ICM:
582 mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
583 tc, ts, (unsigned long long) virt - (ts << 10));
584 break;
585 }
586
587out:
588 pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
589 return err;
590}
591
592int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
593{
594 return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
595}
596
597int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
598{
599 return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
600}
601
602int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
603{
604 return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
605}
606
607int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
608{
609 u32 *outbox;
610 dma_addr_t outdma;
611 int err = 0;
612 u8 lg;
613
614#define QUERY_FW_OUT_SIZE 0x100
615#define QUERY_FW_VER_OFFSET 0x00
616#define QUERY_FW_MAX_CMD_OFFSET 0x0f
617#define QUERY_FW_ERR_START_OFFSET 0x30
618#define QUERY_FW_ERR_SIZE_OFFSET 0x38
619
620#define QUERY_FW_START_OFFSET 0x20
621#define QUERY_FW_END_OFFSET 0x28
622
623#define QUERY_FW_SIZE_OFFSET 0x00
624#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
625#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
626#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
627
628 outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
629 if (!outbox) {
630 return -ENOMEM;
631 }
632
633 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
634 CMD_TIME_CLASS_A, status);
635
636 if (err)
637 goto out;
638
639 MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET);
640 /*
641 * FW subminor version is at more signifant bits than minor
642 * version, so swap here.
643 */
644 dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) |
645 ((dev->fw_ver & 0xffff0000ull) >> 16) |
646 ((dev->fw_ver & 0x0000ffffull) << 16);
647
648 MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
649 dev->cmd.max_cmds = 1 << lg;
650
651 mthca_dbg(dev, "FW version %012llx, max commands %d\n",
652 (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
653
654 if (dev->hca_type == ARBEL_NATIVE) {
655 MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
656 MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
657 MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET);
658 MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET);
659 mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
660
661 /*
662 * Arbel page size is always 4 KB; round up number of
663 * system pages needed.
664 */
665 dev->fw.arbel.fw_pages =
666 (dev->fw.arbel.fw_pages + (1 << (PAGE_SHIFT - 12)) - 1) >>
667 (PAGE_SHIFT - 12);
668
669 mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
670 (unsigned long long) dev->fw.arbel.clr_int_base,
671 (unsigned long long) dev->fw.arbel.eq_arm_base,
672 (unsigned long long) dev->fw.arbel.eq_set_ci_base);
673 } else {
674 MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET);
675 MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET);
676
677 mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n",
678 (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10),
679 (unsigned long long) dev->fw.tavor.fw_start,
680 (unsigned long long) dev->fw.tavor.fw_end);
681 }
682
683out:
684 pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
685 return err;
686}
687
688int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
689{
690 u8 info;
691 u32 *outbox;
692 dma_addr_t outdma;
693 int err = 0;
694
695#define ENABLE_LAM_OUT_SIZE 0x100
696#define ENABLE_LAM_START_OFFSET 0x00
697#define ENABLE_LAM_END_OFFSET 0x08
698#define ENABLE_LAM_INFO_OFFSET 0x13
699
700#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
701#define ENABLE_LAM_INFO_ECC_MASK 0x3
702
703 outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
704 if (!outbox)
705 return -ENOMEM;
706
707 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
708 CMD_TIME_CLASS_C, status);
709
710 if (err)
711 goto out;
712
713 if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE)
714 goto out;
715
716 MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET);
717 MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET);
718 MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET);
719
720 if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) !=
721 !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
722 mthca_info(dev, "FW reports that HCA-attached memory "
723 "is %s hidden; does not match PCI config\n",
724 (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ?
725 "" : "not");
726 }
727 if (info & ENABLE_LAM_INFO_HIDDEN_FLAG)
728 mthca_dbg(dev, "HCA-attached memory is hidden.\n");
729
730 mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
731 (int) ((dev->ddr_end - dev->ddr_start) >> 10),
732 (unsigned long long) dev->ddr_start,
733 (unsigned long long) dev->ddr_end);
734
735out:
736 pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
737 return err;
738}
739
740int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
741{
742 return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status);
743}
744
745int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
746{
747 u8 info;
748 u32 *outbox;
749 dma_addr_t outdma;
750 int err = 0;
751
752#define QUERY_DDR_OUT_SIZE 0x100
753#define QUERY_DDR_START_OFFSET 0x00
754#define QUERY_DDR_END_OFFSET 0x08
755#define QUERY_DDR_INFO_OFFSET 0x13
756
757#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
758#define QUERY_DDR_INFO_ECC_MASK 0x3
759
760 outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
761 if (!outbox)
762 return -ENOMEM;
763
764 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
765 CMD_TIME_CLASS_A, status);
766
767 if (err)
768 goto out;
769
770 MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET);
771 MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET);
772 MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET);
773
774 if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) !=
775 !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
776 mthca_info(dev, "FW reports that HCA-attached memory "
777 "is %s hidden; does not match PCI config\n",
778 (info & QUERY_DDR_INFO_HIDDEN_FLAG) ?
779 "" : "not");
780 }
781 if (info & QUERY_DDR_INFO_HIDDEN_FLAG)
782 mthca_dbg(dev, "HCA-attached memory is hidden.\n");
783
784 mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
785 (int) ((dev->ddr_end - dev->ddr_start) >> 10),
786 (unsigned long long) dev->ddr_start,
787 (unsigned long long) dev->ddr_end);
788
789out:
790 pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
791 return err;
792}
793
794int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
795 struct mthca_dev_lim *dev_lim, u8 *status)
796{
797 u32 *outbox;
798 dma_addr_t outdma;
799 u8 field;
800 u16 size;
801 int err;
802
803#define QUERY_DEV_LIM_OUT_SIZE 0x100
804#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10
805#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11
806#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12
807#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13
808#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14
809#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15
810#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16
811#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17
812#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19
813#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a
814#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b
815#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d
816#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e
817#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f
818#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20
819#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21
820#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22
821#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23
822#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27
823#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29
824#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b
825#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f
826#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33
827#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35
828#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
829#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
830#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
831#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
832#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
833#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
834#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49
835#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b
836#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51
837#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52
838#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55
839#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56
840#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61
841#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62
842#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63
843#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64
844#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65
845#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66
846#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67
847#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80
848#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82
849#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84
850#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86
851#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88
852#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a
853#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c
854#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e
855#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90
856#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92
857#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96
858#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97
859#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98
860#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
861#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
862
863 outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
864 if (!outbox)
865 return -ENOMEM;
866
867 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
868 CMD_TIME_CLASS_A, status);
869
870 if (err)
871 goto out;
872
873 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
874 dev_lim->max_srq_sz = 1 << field;
875 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
876 dev_lim->max_qp_sz = 1 << field;
877 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
878 dev_lim->reserved_qps = 1 << (field & 0xf);
879 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
880 dev_lim->max_qps = 1 << (field & 0x1f);
881 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET);
882 dev_lim->reserved_srqs = 1 << (field >> 4);
883 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET);
884 dev_lim->max_srqs = 1 << (field & 0x1f);
885 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET);
886 dev_lim->reserved_eecs = 1 << (field & 0xf);
887 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET);
888 dev_lim->max_eecs = 1 << (field & 0x1f);
889 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET);
890 dev_lim->max_cq_sz = 1 << field;
891 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET);
892 dev_lim->reserved_cqs = 1 << (field & 0xf);
893 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET);
894 dev_lim->max_cqs = 1 << (field & 0x1f);
895 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET);
896 dev_lim->max_mpts = 1 << (field & 0x3f);
897 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET);
898 dev_lim->reserved_eqs = 1 << (field & 0xf);
899 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
900 dev_lim->max_eqs = 1 << (field & 0x7);
901 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
902 dev_lim->reserved_mtts = 1 << (field >> 4);
903 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
904 dev_lim->max_mrw_sz = 1 << field;
905 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
906 dev_lim->reserved_mrws = 1 << (field & 0xf);
907 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET);
908 dev_lim->max_mtt_seg = 1 << (field & 0x3f);
909 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET);
910 dev_lim->max_requester_per_qp = 1 << (field & 0x3f);
911 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET);
912 dev_lim->max_responder_per_qp = 1 << (field & 0x3f);
913 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET);
914 dev_lim->max_rdma_global = 1 << (field & 0x3f);
915 MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET);
916 dev_lim->local_ca_ack_delay = field & 0x1f;
917 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET);
918 dev_lim->max_mtu = field >> 4;
919 dev_lim->max_port_width = field & 0xf;
920 MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET);
921 dev_lim->max_vl = field >> 4;
922 dev_lim->num_ports = field & 0xf;
923 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
924 dev_lim->max_gids = 1 << (field & 0xf);
925 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
926 dev_lim->max_pkeys = 1 << (field & 0xf);
927 MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
928 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET);
929 dev_lim->reserved_uars = field >> 4;
930 MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET);
931 dev_lim->uar_size = 1 << ((field & 0x3f) + 20);
932 MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET);
933 dev_lim->min_page_sz = 1 << field;
934 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET);
935 dev_lim->max_sg = field;
936
937 MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET);
938 dev_lim->max_desc_sz = size;
939
940 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET);
941 dev_lim->max_qp_per_mcg = 1 << field;
942 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET);
943 dev_lim->reserved_mgms = field & 0xf;
944 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET);
945 dev_lim->max_mcgs = 1 << field;
946 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET);
947 dev_lim->reserved_pds = field >> 4;
948 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET);
949 dev_lim->max_pds = 1 << (field & 0x3f);
950 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET);
951 dev_lim->reserved_rdds = field >> 4;
952 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET);
953 dev_lim->max_rdds = 1 << (field & 0x3f);
954
955 MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET);
956 dev_lim->eec_entry_sz = size;
957 MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET);
958 dev_lim->qpc_entry_sz = size;
959 MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET);
960 dev_lim->eeec_entry_sz = size;
961 MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET);
962 dev_lim->eqpc_entry_sz = size;
963 MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET);
964 dev_lim->eqc_entry_sz = size;
965 MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET);
966 dev_lim->cqc_entry_sz = size;
967 MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET);
968 dev_lim->srq_entry_sz = size;
969 MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET);
970 dev_lim->uar_scratch_entry_sz = size;
971
972 mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
973 dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
974 mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
975 dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
976 mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
977 dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz);
978 mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
979 dev_lim->reserved_mrws, dev_lim->reserved_mtts);
980 mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
981 dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
982 mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
983 dev_lim->max_pds, dev_lim->reserved_mgms);
984
985 mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
986
987 if (dev->hca_type == ARBEL_NATIVE) {
988 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
989 dev_lim->hca.arbel.resize_srq = field & 1;
990 MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET);
991 dev_lim->mtt_seg_sz = size;
992 MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
993 dev_lim->mpt_entry_sz = size;
994 MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
995 dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f);
996 MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox,
997 QUERY_DEV_LIM_BMME_FLAGS_OFFSET);
998 MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox,
999 QUERY_DEV_LIM_RSVD_LKEY_OFFSET);
1000 MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET);
1001 dev_lim->hca.arbel.lam_required = field & 1;
1002 MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox,
1003 QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET);
1004
1005 if (dev_lim->hca.arbel.bmme_flags & 1)
1006 mthca_dbg(dev, "Base MM extensions: yes "
1007 "(flags %d, max PBL %d, rsvd L_Key %08x)\n",
1008 dev_lim->hca.arbel.bmme_flags,
1009 dev_lim->hca.arbel.max_pbl_sz,
1010 dev_lim->hca.arbel.reserved_lkey);
1011 else
1012 mthca_dbg(dev, "Base MM extensions: no\n");
1013
1014 mthca_dbg(dev, "Max ICM size %lld MB\n",
1015 (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
1016 } else {
1017 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
1018 dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
1019 dev_lim->mtt_seg_sz = MTHCA_MTT_SEG_SIZE;
1020 dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
1021 }
1022
1023out:
1024 pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
1025 return err;
1026}
1027
1028int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
1029 struct mthca_adapter *adapter, u8 *status)
1030{
1031 u32 *outbox;
1032 dma_addr_t outdma;
1033 int err;
1034
1035#define QUERY_ADAPTER_OUT_SIZE 0x100
1036#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00
1037#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
1038#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
1039#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
1040
1041 outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
1042 if (!outbox)
1043 return -ENOMEM;
1044
1045 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
1046 CMD_TIME_CLASS_A, status);
1047
1048 if (err)
1049 goto out;
1050
1051 MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
1052 MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
1053 MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
1054 MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
1055
1056out:
1057 pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
1058 return err;
1059}
1060
1061int mthca_INIT_HCA(struct mthca_dev *dev,
1062 struct mthca_init_hca_param *param,
1063 u8 *status)
1064{
1065 u32 *inbox;
1066 dma_addr_t indma;
1067 int err;
1068
1069#define INIT_HCA_IN_SIZE 0x200
1070#define INIT_HCA_FLAGS_OFFSET 0x014
1071#define INIT_HCA_QPC_OFFSET 0x020
1072#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
1073#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
1074#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20)
1075#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27)
1076#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
1077#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
1078#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
1079#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
1080#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
1081#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
1082#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
1083#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
1084#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
1085#define INIT_HCA_UDAV_OFFSET 0x0b0
1086#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0)
1087#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4)
1088#define INIT_HCA_MCAST_OFFSET 0x0c0
1089#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
1090#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
1091#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
1092#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
1093#define INIT_HCA_TPT_OFFSET 0x0f0
1094#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
1095#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09)
1096#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
1097#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
1098#define INIT_HCA_UAR_OFFSET 0x120
1099#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00)
1100#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09)
1101#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
1102#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
1103#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
1104#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
1105
1106 inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
1107 if (!inbox)
1108 return -ENOMEM;
1109
1110 memset(inbox, 0, INIT_HCA_IN_SIZE);
1111
1112#if defined(__LITTLE_ENDIAN)
1113 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
1114#elif defined(__BIG_ENDIAN)
1115 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
1116#else
1117#error Host endianness not defined
1118#endif
1119 /* Check port for UD address vector: */
1120 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
1121
1122 /* We leave wqe_quota, responder_exu, etc as 0 (default) */
1123
1124 /* QPC/EEC/CQC/EQC/RDB attributes */
1125
1126 MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
1127 MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
1128 MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET);
1129 MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET);
1130 MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
1131 MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
1132 MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
1133 MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
1134 MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET);
1135 MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET);
1136 MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
1137 MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
1138 MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET);
1139
1140 /* UD AV attributes */
1141
1142 /* multicast attributes */
1143
1144 MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
1145 MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
1146 MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET);
1147 MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
1148
1149 /* TPT attributes */
1150
1151 MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET);
1152 if (dev->hca_type != ARBEL_NATIVE)
1153 MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
1154 MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
1155 MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
1156
1157 /* UAR attributes */
1158 {
1159 u8 uar_page_sz = PAGE_SHIFT - 12;
1160 MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
1161 }
1162
1163 MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);
1164
1165 if (dev->hca_type == ARBEL_NATIVE) {
1166 MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET);
1167 MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
1168 MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
1169 }
1170
1171 err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
1172 HZ, status);
1173
1174 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
1175 return err;
1176}
1177
1178int mthca_INIT_IB(struct mthca_dev *dev,
1179 struct mthca_init_ib_param *param,
1180 int port, u8 *status)
1181{
1182 u32 *inbox;
1183 dma_addr_t indma;
1184 int err;
1185 u32 flags;
1186
1187#define INIT_IB_IN_SIZE 56
1188#define INIT_IB_FLAGS_OFFSET 0x00
1189#define INIT_IB_FLAG_SIG (1 << 18)
1190#define INIT_IB_FLAG_NG (1 << 17)
1191#define INIT_IB_FLAG_G0 (1 << 16)
1192#define INIT_IB_FLAG_1X (1 << 8)
1193#define INIT_IB_FLAG_4X (1 << 9)
1194#define INIT_IB_FLAG_12X (1 << 11)
1195#define INIT_IB_VL_SHIFT 4
1196#define INIT_IB_MTU_SHIFT 12
1197#define INIT_IB_MAX_GID_OFFSET 0x06
1198#define INIT_IB_MAX_PKEY_OFFSET 0x0a
1199#define INIT_IB_GUID0_OFFSET 0x10
1200#define INIT_IB_NODE_GUID_OFFSET 0x18
1201#define INIT_IB_SI_GUID_OFFSET 0x20
1202
1203 inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
1204 if (!inbox)
1205 return -ENOMEM;
1206
1207 memset(inbox, 0, INIT_IB_IN_SIZE);
1208
1209 flags = 0;
1210 flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0;
1211 flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0;
1212 flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0;
1213 flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0;
1214 flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0;
1215 flags |= param->vl_cap << INIT_IB_VL_SHIFT;
1216 flags |= param->mtu_cap << INIT_IB_MTU_SHIFT;
1217 MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET);
1218
1219 MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET);
1220 MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET);
1221 MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET);
1222 MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
1223 MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
1224
1225 err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
1226 CMD_TIME_CLASS_A, status);
1227
1228 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
1229 return err;
1230}
1231
1232int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status)
1233{
1234 return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status);
1235}
1236
1237int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
1238{
1239 return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
1240}
1241
1242int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
1243 int port, u8 *status)
1244{
1245 u32 *inbox;
1246 dma_addr_t indma;
1247 int err;
1248 u32 flags = 0;
1249
1250#define SET_IB_IN_SIZE 0x40
1251#define SET_IB_FLAGS_OFFSET 0x00
1252#define SET_IB_FLAG_SIG (1 << 18)
1253#define SET_IB_FLAG_RQK (1 << 0)
1254#define SET_IB_CAP_MASK_OFFSET 0x04
1255#define SET_IB_SI_GUID_OFFSET 0x08
1256
1257 inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
1258 if (!inbox)
1259 return -ENOMEM;
1260
1261 memset(inbox, 0, SET_IB_IN_SIZE);
1262
1263 flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0;
1264 flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0;
1265 MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET);
1266
1267 MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
1268 MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
1269
1270 err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
1271 CMD_TIME_CLASS_B, status);
1272
1273 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
1274 return err;
1275}
1276
1277int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
1278{
1279 return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
1280}
1281
1282int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
1283{
1284 u64 *inbox;
1285 dma_addr_t indma;
1286 int err;
1287
1288 inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
1289 if (!inbox)
1290 return -ENOMEM;
1291
1292 inbox[0] = cpu_to_be64(virt);
1293 inbox[1] = cpu_to_be64(dma_addr);
1294
1295 err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
1296
1297 pci_free_consistent(dev->pdev, 16, inbox, indma);
1298
1299 if (!err)
1300 mthca_dbg(dev, "Mapped page at %llx for ICM.\n",
1301 (unsigned long long) virt);
1302
1303 return err;
1304}
1305
1306int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
1307{
1308 mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
1309 page_count, (unsigned long long) virt);
1310
1311 return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
1312}
1313
1314int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
1315{
1316 return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
1317}
1318
1319int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
1320{
1321 return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
1322}
1323
1324int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
1325 u8 *status)
1326{
1327 int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
1328 CMD_TIME_CLASS_A, status);
1329
1330 if (ret || status)
1331 return ret;
1332
1333 /*
1334 * Arbel page size is always 4 KB; round up number of system
1335 * pages needed.
1336 */
1337 *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
1338
1339 return 0;
1340}
1341
1342int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
1343 int mpt_index, u8 *status)
1344{
1345 dma_addr_t indma;
1346 int err;
1347
1348 indma = pci_map_single(dev->pdev, mpt_entry,
1349 MTHCA_MPT_ENTRY_SIZE,
1350 PCI_DMA_TODEVICE);
1351 if (pci_dma_mapping_error(indma))
1352 return -ENOMEM;
1353
1354 err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
1355 CMD_TIME_CLASS_B, status);
1356
1357 pci_unmap_single(dev->pdev, indma,
1358 MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
1359 return err;
1360}
1361
1362int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
1363 int mpt_index, u8 *status)
1364{
1365 dma_addr_t outdma = 0;
1366 int err;
1367
1368 if (mpt_entry) {
1369 outdma = pci_map_single(dev->pdev, mpt_entry,
1370 MTHCA_MPT_ENTRY_SIZE,
1371 PCI_DMA_FROMDEVICE);
1372 if (pci_dma_mapping_error(outdma))
1373 return -ENOMEM;
1374 }
1375
1376 err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
1377 CMD_HW2SW_MPT,
1378 CMD_TIME_CLASS_B, status);
1379
1380 if (mpt_entry)
1381 pci_unmap_single(dev->pdev, outdma,
1382 MTHCA_MPT_ENTRY_SIZE,
1383 PCI_DMA_FROMDEVICE);
1384 return err;
1385}
1386
1387int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
1388 int num_mtt, u8 *status)
1389{
1390 dma_addr_t indma;
1391 int err;
1392
1393 indma = pci_map_single(dev->pdev, mtt_entry,
1394 (num_mtt + 2) * 8,
1395 PCI_DMA_TODEVICE);
1396 if (pci_dma_mapping_error(indma))
1397 return -ENOMEM;
1398
1399 err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
1400 CMD_TIME_CLASS_B, status);
1401
1402 pci_unmap_single(dev->pdev, indma,
1403 (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
1404 return err;
1405}
1406
1407int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
1408 int eq_num, u8 *status)
1409{
1410 mthca_dbg(dev, "%s mask %016llx for eqn %d\n",
1411 unmap ? "Clearing" : "Setting",
1412 (unsigned long long) event_mask, eq_num);
1413 return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num,
1414 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
1415}
1416
1417int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
1418 int eq_num, u8 *status)
1419{
1420 dma_addr_t indma;
1421 int err;
1422
1423 indma = pci_map_single(dev->pdev, eq_context,
1424 MTHCA_EQ_CONTEXT_SIZE,
1425 PCI_DMA_TODEVICE);
1426 if (pci_dma_mapping_error(indma))
1427 return -ENOMEM;
1428
1429 err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
1430 CMD_TIME_CLASS_A, status);
1431
1432 pci_unmap_single(dev->pdev, indma,
1433 MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
1434 return err;
1435}
1436
1437int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
1438 int eq_num, u8 *status)
1439{
1440 dma_addr_t outdma = 0;
1441 int err;
1442
1443 outdma = pci_map_single(dev->pdev, eq_context,
1444 MTHCA_EQ_CONTEXT_SIZE,
1445 PCI_DMA_FROMDEVICE);
1446 if (pci_dma_mapping_error(outdma))
1447 return -ENOMEM;
1448
1449 err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
1450 CMD_HW2SW_EQ,
1451 CMD_TIME_CLASS_A, status);
1452
1453 pci_unmap_single(dev->pdev, outdma,
1454 MTHCA_EQ_CONTEXT_SIZE,
1455 PCI_DMA_FROMDEVICE);
1456 return err;
1457}
1458
1459int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
1460 int cq_num, u8 *status)
1461{
1462 dma_addr_t indma;
1463 int err;
1464
1465 indma = pci_map_single(dev->pdev, cq_context,
1466 MTHCA_CQ_CONTEXT_SIZE,
1467 PCI_DMA_TODEVICE);
1468 if (pci_dma_mapping_error(indma))
1469 return -ENOMEM;
1470
1471 err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
1472 CMD_TIME_CLASS_A, status);
1473
1474 pci_unmap_single(dev->pdev, indma,
1475 MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
1476 return err;
1477}
1478
1479int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
1480 int cq_num, u8 *status)
1481{
1482 dma_addr_t outdma = 0;
1483 int err;
1484
1485 outdma = pci_map_single(dev->pdev, cq_context,
1486 MTHCA_CQ_CONTEXT_SIZE,
1487 PCI_DMA_FROMDEVICE);
1488 if (pci_dma_mapping_error(outdma))
1489 return -ENOMEM;
1490
1491 err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
1492 CMD_HW2SW_CQ,
1493 CMD_TIME_CLASS_A, status);
1494
1495 pci_unmap_single(dev->pdev, outdma,
1496 MTHCA_CQ_CONTEXT_SIZE,
1497 PCI_DMA_FROMDEVICE);
1498 return err;
1499}
1500
1501int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1502 int is_ee, void *qp_context, u32 optmask,
1503 u8 *status)
1504{
1505 static const u16 op[] = {
1506 [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE,
1507 [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE,
1508 [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE,
1509 [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE,
1510 [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE,
1511 [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE,
1512 [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE,
1513 [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE,
1514 [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE,
1515 [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE,
1516 [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
1517 };
1518 u8 op_mod = 0;
1519
1520 dma_addr_t indma;
1521 int err;
1522
1523 if (trans < 0 || trans >= ARRAY_SIZE(op))
1524 return -EINVAL;
1525
1526 if (trans == MTHCA_TRANS_ANY2RST) {
1527 indma = 0;
1528 op_mod = 3; /* don't write outbox, any->reset */
1529
1530 /* For debugging */
1531 qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
1532 &indma);
1533 op_mod = 2; /* write outbox, any->reset */
1534 } else {
1535 indma = pci_map_single(dev->pdev, qp_context,
1536 MTHCA_QP_CONTEXT_SIZE,
1537 PCI_DMA_TODEVICE);
1538 if (pci_dma_mapping_error(indma))
1539 return -ENOMEM;
1540
1541 if (0) {
1542 int i;
1543 mthca_dbg(dev, "Dumping QP context:\n");
1544 printk(" opt param mask: %08x\n", be32_to_cpup(qp_context));
1545 for (i = 0; i < 0x100 / 4; ++i) {
1546 if (i % 8 == 0)
1547 printk(" [%02x] ", i * 4);
1548 printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
1549 if ((i + 1) % 8 == 0)
1550 printk("\n");
1551 }
1552 }
1553 }
1554
1555 if (trans == MTHCA_TRANS_ANY2RST) {
1556 err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
1557 op_mod, op[trans], CMD_TIME_CLASS_C, status);
1558
1559 if (0) {
1560 int i;
1561 mthca_dbg(dev, "Dumping QP context:\n");
1562 printk(" %08x\n", be32_to_cpup(qp_context));
1563 for (i = 0; i < 0x100 / 4; ++i) {
1564 if (i % 8 == 0)
1565 printk("[%02x] ", i * 4);
1566 printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
1567 if ((i + 1) % 8 == 0)
1568 printk("\n");
1569 }
1570 }
1571
1572 } else
1573 err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
1574 op_mod, op[trans], CMD_TIME_CLASS_C, status);
1575
1576 if (trans != MTHCA_TRANS_ANY2RST)
1577 pci_unmap_single(dev->pdev, indma,
1578 MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
1579 else
1580 pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
1581 qp_context, indma);
1582 return err;
1583}
1584
1585int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
1586 void *qp_context, u8 *status)
1587{
1588 dma_addr_t outdma = 0;
1589 int err;
1590
1591 outdma = pci_map_single(dev->pdev, qp_context,
1592 MTHCA_QP_CONTEXT_SIZE,
1593 PCI_DMA_FROMDEVICE);
1594 if (pci_dma_mapping_error(outdma))
1595 return -ENOMEM;
1596
1597 err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
1598 CMD_QUERY_QPEE,
1599 CMD_TIME_CLASS_A, status);
1600
1601 pci_unmap_single(dev->pdev, outdma,
1602 MTHCA_QP_CONTEXT_SIZE,
1603 PCI_DMA_FROMDEVICE);
1604 return err;
1605}
1606
1607int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
1608 u8 *status)
1609{
1610 u8 op_mod;
1611
1612 switch (type) {
1613 case IB_QPT_SMI:
1614 op_mod = 0;
1615 break;
1616 case IB_QPT_GSI:
1617 op_mod = 1;
1618 break;
1619 case IB_QPT_RAW_IPV6:
1620 op_mod = 2;
1621 break;
1622 case IB_QPT_RAW_ETY:
1623 op_mod = 3;
1624 break;
1625 default:
1626 return -EINVAL;
1627 }
1628
1629 return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP,
1630 CMD_TIME_CLASS_B, status);
1631}
1632
1633int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
1634 int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
1635 void *in_mad, void *response_mad, u8 *status)
1636{
1637 void *box;
1638 dma_addr_t dma;
1639 int err;
1640 u32 in_modifier = port;
1641 u8 op_modifier = 0;
1642
1643#define MAD_IFC_BOX_SIZE 0x400
1644#define MAD_IFC_MY_QPN_OFFSET 0x100
1645#define MAD_IFC_RQPN_OFFSET 0x104
1646#define MAD_IFC_SL_OFFSET 0x108
1647#define MAD_IFC_G_PATH_OFFSET 0x109
1648#define MAD_IFC_RLID_OFFSET 0x10a
1649#define MAD_IFC_PKEY_OFFSET 0x10e
1650#define MAD_IFC_GRH_OFFSET 0x140
1651
1652 box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
1653 if (!box)
1654 return -ENOMEM;
1655
1656 memcpy(box, in_mad, 256);
1657
1658 /*
1659 * Key check traps can't be generated unless we have in_wc to
1660 * tell us where to send the trap.
1661 */
1662 if (ignore_mkey || !in_wc)
1663 op_modifier |= 0x1;
1664 if (ignore_bkey || !in_wc)
1665 op_modifier |= 0x2;
1666
1667 if (in_wc) {
1668 u8 val;
1669
1670 memset(box + 256, 0, 256);
1671
1672 MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
1673 MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
1674
1675 val = in_wc->sl << 4;
1676 MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET);
1677
1678 val = in_wc->dlid_path_bits |
1679 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
1680 MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET);
1681
1682 MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET);
1683 MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
1684
1685 if (in_grh)
1686 memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
1687
1688 op_modifier |= 0x10;
1689
1690 in_modifier |= in_wc->slid << 16;
1691 }
1692
1693 err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
1694 CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
1695
1696 if (!err && !*status)
1697 memcpy(response_mad, box + 512, 256);
1698
1699 pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
1700 return err;
1701}
1702
1703int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
1704 u8 *status)
1705{
1706 dma_addr_t outdma = 0;
1707 int err;
1708
1709 outdma = pci_map_single(dev->pdev, mgm,
1710 MTHCA_MGM_ENTRY_SIZE,
1711 PCI_DMA_FROMDEVICE);
1712 if (pci_dma_mapping_error(outdma))
1713 return -ENOMEM;
1714
1715 err = mthca_cmd_box(dev, 0, outdma, index, 0,
1716 CMD_READ_MGM,
1717 CMD_TIME_CLASS_A, status);
1718
1719 pci_unmap_single(dev->pdev, outdma,
1720 MTHCA_MGM_ENTRY_SIZE,
1721 PCI_DMA_FROMDEVICE);
1722 return err;
1723}
1724
1725int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
1726 u8 *status)
1727{
1728 dma_addr_t indma;
1729 int err;
1730
1731 indma = pci_map_single(dev->pdev, mgm,
1732 MTHCA_MGM_ENTRY_SIZE,
1733 PCI_DMA_TODEVICE);
1734 if (pci_dma_mapping_error(indma))
1735 return -ENOMEM;
1736
1737 err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
1738 CMD_TIME_CLASS_A, status);
1739
1740 pci_unmap_single(dev->pdev, indma,
1741 MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
1742 return err;
1743}
1744
1745int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
1746 u8 *status)
1747{
1748 dma_addr_t indma;
1749 u64 imm;
1750 int err;
1751
1752 indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
1753 if (pci_dma_mapping_error(indma))
1754 return -ENOMEM;
1755
1756 err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
1757 CMD_TIME_CLASS_A, status);
1758 *hash = imm;
1759
1760 pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
1761 return err;
1762}
1763
1764int mthca_NOP(struct mthca_dev *dev, u8 *status)
1765{
1766 return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
1767}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
new file mode 100644
index 000000000000..a8bc6aa36ff1
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -0,0 +1,310 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef MTHCA_CMD_H
36#define MTHCA_CMD_H
37
38#include <ib_verbs.h>
39
40#define MTHCA_CMD_MAILBOX_ALIGN 16UL
41#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
42
43enum {
44 /* command completed successfully: */
45 MTHCA_CMD_STAT_OK = 0x00,
46 /* Internal error (such as a bus error) occurred while processing command: */
47 MTHCA_CMD_STAT_INTERNAL_ERR = 0x01,
48 /* Operation/command not supported or opcode modifier not supported: */
49 MTHCA_CMD_STAT_BAD_OP = 0x02,
50 /* Parameter not supported or parameter out of range: */
51 MTHCA_CMD_STAT_BAD_PARAM = 0x03,
52 /* System not enabled or bad system state: */
53 MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04,
54 /* Attempt to access reserved or unallocaterd resource: */
55 MTHCA_CMD_STAT_BAD_RESOURCE = 0x05,
56 /* Requested resource is currently executing a command, or is otherwise busy: */
57 MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06,
58 /* memory error: */
59 MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07,
60 /* Required capability exceeds device limits: */
61 MTHCA_CMD_STAT_EXCEED_LIM = 0x08,
62 /* Resource is not in the appropriate state or ownership: */
63 MTHCA_CMD_STAT_BAD_RES_STATE = 0x09,
64 /* Index out of range: */
65 MTHCA_CMD_STAT_BAD_INDEX = 0x0a,
66 /* FW image corrupted: */
67 MTHCA_CMD_STAT_BAD_NVMEM = 0x0b,
68 /* Attempt to modify a QP/EE which is not in the presumed state: */
69 MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10,
70 /* Bad segment parameters (Address/Size): */
71 MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20,
72 /* Memory Region has Memory Windows bound to: */
73 MTHCA_CMD_STAT_REG_BOUND = 0x21,
74 /* HCA local attached memory not present: */
75 MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
76 /* Bad management packet (silently discarded): */
77 MTHCA_CMD_STAT_BAD_PKT = 0x30,
78 /* More outstanding CQEs in CQ than new CQ size: */
79 MTHCA_CMD_STAT_BAD_SIZE = 0x40
80};
81
82enum {
83 MTHCA_TRANS_INVALID = 0,
84 MTHCA_TRANS_RST2INIT,
85 MTHCA_TRANS_INIT2INIT,
86 MTHCA_TRANS_INIT2RTR,
87 MTHCA_TRANS_RTR2RTS,
88 MTHCA_TRANS_RTS2RTS,
89 MTHCA_TRANS_SQERR2RTS,
90 MTHCA_TRANS_ANY2ERR,
91 MTHCA_TRANS_RTS2SQD,
92 MTHCA_TRANS_SQD2SQD,
93 MTHCA_TRANS_SQD2RTS,
94 MTHCA_TRANS_ANY2RST,
95};
96
97enum {
98 DEV_LIM_FLAG_RC = 1 << 0,
99 DEV_LIM_FLAG_UC = 1 << 1,
100 DEV_LIM_FLAG_UD = 1 << 2,
101 DEV_LIM_FLAG_RD = 1 << 3,
102 DEV_LIM_FLAG_RAW_IPV6 = 1 << 4,
103 DEV_LIM_FLAG_RAW_ETHER = 1 << 5,
104 DEV_LIM_FLAG_SRQ = 1 << 6,
105 DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8,
106 DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9,
107 DEV_LIM_FLAG_MW = 1 << 16,
108 DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17,
109 DEV_LIM_FLAG_ATOMIC = 1 << 18,
110 DEV_LIM_FLAG_RAW_MULTI = 1 << 19,
111 DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20,
112 DEV_LIM_FLAG_UD_MULTI = 1 << 21,
113};
114
115struct mthca_dev_lim {
116 int max_srq_sz;
117 int max_qp_sz;
118 int reserved_qps;
119 int max_qps;
120 int reserved_srqs;
121 int max_srqs;
122 int reserved_eecs;
123 int max_eecs;
124 int max_cq_sz;
125 int reserved_cqs;
126 int max_cqs;
127 int max_mpts;
128 int reserved_eqs;
129 int max_eqs;
130 int reserved_mtts;
131 int max_mrw_sz;
132 int reserved_mrws;
133 int max_mtt_seg;
134 int max_requester_per_qp;
135 int max_responder_per_qp;
136 int max_rdma_global;
137 int local_ca_ack_delay;
138 int max_mtu;
139 int max_port_width;
140 int max_vl;
141 int num_ports;
142 int max_gids;
143 int max_pkeys;
144 u32 flags;
145 int reserved_uars;
146 int uar_size;
147 int min_page_sz;
148 int max_sg;
149 int max_desc_sz;
150 int max_qp_per_mcg;
151 int reserved_mgms;
152 int max_mcgs;
153 int reserved_pds;
154 int max_pds;
155 int reserved_rdds;
156 int max_rdds;
157 int eec_entry_sz;
158 int qpc_entry_sz;
159 int eeec_entry_sz;
160 int eqpc_entry_sz;
161 int eqc_entry_sz;
162 int cqc_entry_sz;
163 int srq_entry_sz;
164 int uar_scratch_entry_sz;
165 int mtt_seg_sz;
166 int mpt_entry_sz;
167 union {
168 struct {
169 int max_avs;
170 } tavor;
171 struct {
172 int resize_srq;
173 int max_pbl_sz;
174 u8 bmme_flags;
175 u32 reserved_lkey;
176 int lam_required;
177 u64 max_icm_sz;
178 } arbel;
179 } hca;
180};
181
182struct mthca_adapter {
183 u32 vendor_id;
184 u32 device_id;
185 u32 revision_id;
186 u8 inta_pin;
187};
188
189struct mthca_init_hca_param {
190 u64 qpc_base;
191 u64 eec_base;
192 u64 srqc_base;
193 u64 cqc_base;
194 u64 eqpc_base;
195 u64 eeec_base;
196 u64 eqc_base;
197 u64 rdb_base;
198 u64 mc_base;
199 u64 mpt_base;
200 u64 mtt_base;
201 u64 uar_scratch_base;
202 u64 uarc_base;
203 u16 log_mc_entry_sz;
204 u16 mc_hash_sz;
205 u8 log_num_qps;
206 u8 log_num_eecs;
207 u8 log_num_srqs;
208 u8 log_num_cqs;
209 u8 log_num_eqs;
210 u8 log_mc_table_sz;
211 u8 mtt_seg_sz;
212 u8 log_mpt_sz;
213 u8 log_uar_sz;
214 u8 log_uarc_sz;
215};
216
217struct mthca_init_ib_param {
218 int enable_1x;
219 int enable_4x;
220 int vl_cap;
221 int mtu_cap;
222 u16 gid_cap;
223 u16 pkey_cap;
224 int set_guid0;
225 u64 guid0;
226 int set_node_guid;
227 u64 node_guid;
228 int set_si_guid;
229 u64 si_guid;
230};
231
232struct mthca_set_ib_param {
233 int set_si_guid;
234 int reset_qkey_viol;
235 u64 si_guid;
236 u32 cap_mask;
237};
238
239int mthca_cmd_use_events(struct mthca_dev *dev);
240void mthca_cmd_use_polling(struct mthca_dev *dev);
241void mthca_cmd_event(struct mthca_dev *dev, u16 token,
242 u8 status, u64 out_param);
243
244int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
245int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
246int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
247int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
248int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
249int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
250int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
251int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
252int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
253int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
254 struct mthca_dev_lim *dev_lim, u8 *status);
255int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
256 struct mthca_adapter *adapter, u8 *status);
257int mthca_INIT_HCA(struct mthca_dev *dev,
258 struct mthca_init_hca_param *param,
259 u8 *status);
260int mthca_INIT_IB(struct mthca_dev *dev,
261 struct mthca_init_ib_param *param,
262 int port, u8 *status);
263int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
264int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
265int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
266 int port, u8 *status);
267int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
268int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
269int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
270int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
271int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
272int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
273 u8 *status);
274int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
275 int mpt_index, u8 *status);
276int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
277 int mpt_index, u8 *status);
278int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
279 int num_mtt, u8 *status);
280int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
281 int eq_num, u8 *status);
282int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
283 int eq_num, u8 *status);
284int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
285 int eq_num, u8 *status);
286int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
287 int cq_num, u8 *status);
288int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
289 int cq_num, u8 *status);
290int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
291 int is_ee, void *qp_context, u32 optmask,
292 u8 *status);
293int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
294 void *qp_context, u8 *status);
295int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
296 u8 *status);
297int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
298 int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
299 void *in_mad, void *response_mad, u8 *status);
300int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
301 u8 *status);
302int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
303 u8 *status);
304int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
305 u8 *status);
306int mthca_NOP(struct mthca_dev *dev, u8 *status);
307
308#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
309
310#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
new file mode 100644
index 000000000000..b4bfbbfe2c3d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef MTHCA_CONFIG_REG_H
36#define MTHCA_CONFIG_REG_H
37
38#include <asm/page.h>
39
40#define MTHCA_HCR_BASE 0x80680
41#define MTHCA_HCR_SIZE 0x0001c
42#define MTHCA_ECR_BASE 0x80700
43#define MTHCA_ECR_SIZE 0x00008
44#define MTHCA_ECR_CLR_BASE 0x80708
45#define MTHCA_ECR_CLR_SIZE 0x00008
46#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
47#define MTHCA_CLR_INT_BASE 0xf00d8
48#define MTHCA_CLR_INT_SIZE 0x00008
49#define MTHCA_EQ_SET_CI_SIZE (8 * 32)
50
51#endif /* MTHCA_CONFIG_REG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
new file mode 100644
index 000000000000..5dead2df7eb0
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -0,0 +1,918 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
33 */
34
35#include <linux/init.h>
36#include <linux/hardirq.h>
37
38#include <ib_pack.h>
39
40#include "mthca_dev.h"
41#include "mthca_cmd.h"
42#include "mthca_memfree.h"
43
44enum {
45 MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
46};
47
48enum {
49 MTHCA_CQ_ENTRY_SIZE = 0x20
50};
51
52/*
53 * Must be packed because start is 64 bits but only aligned to 32 bits.
54 */
55struct mthca_cq_context {
56 u32 flags;
57 u64 start;
58 u32 logsize_usrpage;
59 u32 error_eqn; /* Tavor only */
60 u32 comp_eqn;
61 u32 pd;
62 u32 lkey;
63 u32 last_notified_index;
64 u32 solicit_producer_index;
65 u32 consumer_index;
66 u32 producer_index;
67 u32 cqn;
68 u32 ci_db; /* Arbel only */
69 u32 state_db; /* Arbel only */
70 u32 reserved;
71} __attribute__((packed));
72
73#define MTHCA_CQ_STATUS_OK ( 0 << 28)
74#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28)
75#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28)
76#define MTHCA_CQ_FLAG_TR ( 1 << 18)
77#define MTHCA_CQ_FLAG_OI ( 1 << 17)
78#define MTHCA_CQ_STATE_DISARMED ( 0 << 8)
79#define MTHCA_CQ_STATE_ARMED ( 1 << 8)
80#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8)
81#define MTHCA_EQ_STATE_FIRED (10 << 8)
82
83enum {
84 MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
85};
86
87enum {
88 SYNDROME_LOCAL_LENGTH_ERR = 0x01,
89 SYNDROME_LOCAL_QP_OP_ERR = 0x02,
90 SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
91 SYNDROME_LOCAL_PROT_ERR = 0x04,
92 SYNDROME_WR_FLUSH_ERR = 0x05,
93 SYNDROME_MW_BIND_ERR = 0x06,
94 SYNDROME_BAD_RESP_ERR = 0x10,
95 SYNDROME_LOCAL_ACCESS_ERR = 0x11,
96 SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
97 SYNDROME_REMOTE_ACCESS_ERR = 0x13,
98 SYNDROME_REMOTE_OP_ERR = 0x14,
99 SYNDROME_RETRY_EXC_ERR = 0x15,
100 SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
101 SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
102 SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
103 SYNDROME_REMOTE_ABORTED_ERR = 0x22,
104 SYNDROME_INVAL_EECN_ERR = 0x23,
105 SYNDROME_INVAL_EEC_STATE_ERR = 0x24
106};
107
108struct mthca_cqe {
109 u32 my_qpn;
110 u32 my_ee;
111 u32 rqpn;
112 u16 sl_g_mlpath;
113 u16 rlid;
114 u32 imm_etype_pkey_eec;
115 u32 byte_cnt;
116 u32 wqe;
117 u8 opcode;
118 u8 is_send;
119 u8 reserved;
120 u8 owner;
121};
122
123struct mthca_err_cqe {
124 u32 my_qpn;
125 u32 reserved1[3];
126 u8 syndrome;
127 u8 reserved2;
128 u16 db_cnt;
129 u32 reserved3;
130 u32 wqe;
131 u8 opcode;
132 u8 reserved4[2];
133 u8 owner;
134};
135
136#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7)
137#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7)
138
139#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24)
140#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24)
141#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24)
142#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24)
143#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
144
145#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24)
146#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
147#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
148
149static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
150{
151 if (cq->is_direct)
152 return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
153 else
154 return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
155 + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
156}
157
158static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
159{
160 struct mthca_cqe *cqe = get_cqe(cq, i);
161 return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
162}
163
164static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
165{
166 return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
167}
168
169static inline void set_cqe_hw(struct mthca_cqe *cqe)
170{
171 cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
172}
173
174/*
175 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
176 * should be correct before calling update_cons_index().
177 */
178static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
179 int incr)
180{
181 u32 doorbell[2];
182
183 if (dev->hca_type == ARBEL_NATIVE) {
184 *cq->set_ci_db = cpu_to_be32(cq->cons_index);
185 wmb();
186 } else {
187 doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
188 doorbell[1] = cpu_to_be32(incr - 1);
189
190 mthca_write64(doorbell,
191 dev->kar + MTHCA_CQ_DOORBELL,
192 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
193 }
194}
195
196void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
197{
198 struct mthca_cq *cq;
199
200 cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
201
202 if (!cq) {
203 mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
204 return;
205 }
206
207 ++cq->arm_sn;
208
209 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
210}
211
212void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
213{
214 struct mthca_cq *cq;
215 struct mthca_cqe *cqe;
216 int prod_index;
217 int nfreed = 0;
218
219 spin_lock_irq(&dev->cq_table.lock);
220 cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
221 if (cq)
222 atomic_inc(&cq->refcount);
223 spin_unlock_irq(&dev->cq_table.lock);
224
225 if (!cq)
226 return;
227
228 spin_lock_irq(&cq->lock);
229
230 /*
231 * First we need to find the current producer index, so we
232 * know where to start cleaning from. It doesn't matter if HW
233 * adds new entries after this loop -- the QP we're worried
234 * about is already in RESET, so the new entries won't come
235 * from our QP and therefore don't need to be checked.
236 */
237 for (prod_index = cq->cons_index;
238 cqe_sw(cq, prod_index & cq->ibcq.cqe);
239 ++prod_index)
240 if (prod_index == cq->cons_index + cq->ibcq.cqe)
241 break;
242
243 if (0)
244 mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
245 qpn, cqn, cq->cons_index, prod_index);
246
247 /*
248 * Now sweep backwards through the CQ, removing CQ entries
249 * that match our QP by copying older entries on top of them.
250 */
251 while (prod_index > cq->cons_index) {
252 cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
253 if (cqe->my_qpn == cpu_to_be32(qpn))
254 ++nfreed;
255 else if (nfreed)
256 memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
257 cq->ibcq.cqe),
258 cqe,
259 MTHCA_CQ_ENTRY_SIZE);
260 --prod_index;
261 }
262
263 if (nfreed) {
264 wmb();
265 cq->cons_index += nfreed;
266 update_cons_index(dev, cq, nfreed);
267 }
268
269 spin_unlock_irq(&cq->lock);
270 if (atomic_dec_and_test(&cq->refcount))
271 wake_up(&cq->wait);
272}
273
274static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
275 struct mthca_qp *qp, int wqe_index, int is_send,
276 struct mthca_err_cqe *cqe,
277 struct ib_wc *entry, int *free_cqe)
278{
279 int err;
280 int dbd;
281 u32 new_wqe;
282
283 if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
284 int j;
285
286 mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
287 cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
288 be32_to_cpu(cqe->wqe));
289
290 for (j = 0; j < 8; ++j)
291 printk(KERN_DEBUG " [%2x] %08x\n",
292 j * 4, be32_to_cpu(((u32 *) cqe)[j]));
293 }
294
295 /*
296 * For completions in error, only work request ID, status (and
297 * freed resource count for RD) have to be set.
298 */
299 switch (cqe->syndrome) {
300 case SYNDROME_LOCAL_LENGTH_ERR:
301 entry->status = IB_WC_LOC_LEN_ERR;
302 break;
303 case SYNDROME_LOCAL_QP_OP_ERR:
304 entry->status = IB_WC_LOC_QP_OP_ERR;
305 break;
306 case SYNDROME_LOCAL_EEC_OP_ERR:
307 entry->status = IB_WC_LOC_EEC_OP_ERR;
308 break;
309 case SYNDROME_LOCAL_PROT_ERR:
310 entry->status = IB_WC_LOC_PROT_ERR;
311 break;
312 case SYNDROME_WR_FLUSH_ERR:
313 entry->status = IB_WC_WR_FLUSH_ERR;
314 break;
315 case SYNDROME_MW_BIND_ERR:
316 entry->status = IB_WC_MW_BIND_ERR;
317 break;
318 case SYNDROME_BAD_RESP_ERR:
319 entry->status = IB_WC_BAD_RESP_ERR;
320 break;
321 case SYNDROME_LOCAL_ACCESS_ERR:
322 entry->status = IB_WC_LOC_ACCESS_ERR;
323 break;
324 case SYNDROME_REMOTE_INVAL_REQ_ERR:
325 entry->status = IB_WC_REM_INV_REQ_ERR;
326 break;
327 case SYNDROME_REMOTE_ACCESS_ERR:
328 entry->status = IB_WC_REM_ACCESS_ERR;
329 break;
330 case SYNDROME_REMOTE_OP_ERR:
331 entry->status = IB_WC_REM_OP_ERR;
332 break;
333 case SYNDROME_RETRY_EXC_ERR:
334 entry->status = IB_WC_RETRY_EXC_ERR;
335 break;
336 case SYNDROME_RNR_RETRY_EXC_ERR:
337 entry->status = IB_WC_RNR_RETRY_EXC_ERR;
338 break;
339 case SYNDROME_LOCAL_RDD_VIOL_ERR:
340 entry->status = IB_WC_LOC_RDD_VIOL_ERR;
341 break;
342 case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
343 entry->status = IB_WC_REM_INV_RD_REQ_ERR;
344 break;
345 case SYNDROME_REMOTE_ABORTED_ERR:
346 entry->status = IB_WC_REM_ABORT_ERR;
347 break;
348 case SYNDROME_INVAL_EECN_ERR:
349 entry->status = IB_WC_INV_EECN_ERR;
350 break;
351 case SYNDROME_INVAL_EEC_STATE_ERR:
352 entry->status = IB_WC_INV_EEC_STATE_ERR;
353 break;
354 default:
355 entry->status = IB_WC_GENERAL_ERR;
356 break;
357 }
358
359 err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
360 if (err)
361 return err;
362
363 /*
364 * If we're at the end of the WQE chain, or we've used up our
365 * doorbell count, free the CQE. Otherwise just update it for
366 * the next poll operation.
367 */
368 if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
369 return 0;
370
371 cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
372 cqe->wqe = new_wqe;
373 cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
374
375 *free_cqe = 0;
376
377 return 0;
378}
379
380static void dump_cqe(struct mthca_cqe *cqe)
381{
382 int j;
383
384 for (j = 0; j < 8; ++j)
385 printk(KERN_DEBUG " [%2x] %08x\n",
386 j * 4, be32_to_cpu(((u32 *) cqe)[j]));
387}
388
389static inline int mthca_poll_one(struct mthca_dev *dev,
390 struct mthca_cq *cq,
391 struct mthca_qp **cur_qp,
392 int *freed,
393 struct ib_wc *entry)
394{
395 struct mthca_wq *wq;
396 struct mthca_cqe *cqe;
397 int wqe_index;
398 int is_error;
399 int is_send;
400 int free_cqe = 1;
401 int err = 0;
402
403 cqe = next_cqe_sw(cq);
404 if (!cqe)
405 return -EAGAIN;
406
407 /*
408 * Make sure we read CQ entry contents after we've checked the
409 * ownership bit.
410 */
411 rmb();
412
413 if (0) {
414 mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
415 cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
416 be32_to_cpu(cqe->wqe));
417
418 dump_cqe(cqe);
419 }
420
421 is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
422 MTHCA_ERROR_CQE_OPCODE_MASK;
423 is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
424
425 if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
426 /*
427 * We do not have to take the QP table lock here,
428 * because CQs will be locked while QPs are removed
429 * from the table.
430 */
431 *cur_qp = mthca_array_get(&dev->qp_table.qp,
432 be32_to_cpu(cqe->my_qpn) &
433 (dev->limits.num_qps - 1));
434 if (!*cur_qp) {
435 mthca_warn(dev, "CQ entry for unknown QP %06x\n",
436 be32_to_cpu(cqe->my_qpn) & 0xffffff);
437 err = -EINVAL;
438 goto out;
439 }
440 }
441
442 entry->qp_num = (*cur_qp)->qpn;
443
444 if (is_send) {
445 wq = &(*cur_qp)->sq;
446 wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
447 >> wq->wqe_shift);
448 entry->wr_id = (*cur_qp)->wrid[wqe_index +
449 (*cur_qp)->rq.max];
450 } else {
451 wq = &(*cur_qp)->rq;
452 wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift;
453 entry->wr_id = (*cur_qp)->wrid[wqe_index];
454 }
455
456 if (wq->last_comp < wqe_index)
457 wq->tail += wqe_index - wq->last_comp;
458 else
459 wq->tail += wqe_index + wq->max - wq->last_comp;
460
461 wq->last_comp = wqe_index;
462
463 if (0)
464 mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n",
465 is_send ? "Send" : "Receive",
466 (*cur_qp)->qpn, wqe_index, wq->max);
467
468 if (is_error) {
469 err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
470 (struct mthca_err_cqe *) cqe,
471 entry, &free_cqe);
472 goto out;
473 }
474
475 if (is_send) {
476 entry->opcode = IB_WC_SEND; /* XXX */
477 } else {
478 entry->byte_len = be32_to_cpu(cqe->byte_cnt);
479 switch (cqe->opcode & 0x1f) {
480 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
481 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
482 entry->wc_flags = IB_WC_WITH_IMM;
483 entry->imm_data = cqe->imm_etype_pkey_eec;
484 entry->opcode = IB_WC_RECV;
485 break;
486 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
487 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
488 entry->wc_flags = IB_WC_WITH_IMM;
489 entry->imm_data = cqe->imm_etype_pkey_eec;
490 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
491 break;
492 default:
493 entry->wc_flags = 0;
494 entry->opcode = IB_WC_RECV;
495 break;
496 }
497 entry->slid = be16_to_cpu(cqe->rlid);
498 entry->sl = be16_to_cpu(cqe->sl_g_mlpath) >> 12;
499 entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff;
500 entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f;
501 entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
502 entry->wc_flags |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ?
503 IB_WC_GRH : 0;
504 }
505
506 entry->status = IB_WC_SUCCESS;
507
508 out:
509 if (likely(free_cqe)) {
510 set_cqe_hw(cqe);
511 ++(*freed);
512 ++cq->cons_index;
513 }
514
515 return err;
516}
517
518int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
519 struct ib_wc *entry)
520{
521 struct mthca_dev *dev = to_mdev(ibcq->device);
522 struct mthca_cq *cq = to_mcq(ibcq);
523 struct mthca_qp *qp = NULL;
524 unsigned long flags;
525 int err = 0;
526 int freed = 0;
527 int npolled;
528
529 spin_lock_irqsave(&cq->lock, flags);
530
531 for (npolled = 0; npolled < num_entries; ++npolled) {
532 err = mthca_poll_one(dev, cq, &qp,
533 &freed, entry + npolled);
534 if (err)
535 break;
536 }
537
538 if (freed) {
539 wmb();
540 update_cons_index(dev, cq, freed);
541 }
542
543 spin_unlock_irqrestore(&cq->lock, flags);
544
545 return err == 0 || err == -EAGAIN ? npolled : err;
546}
547
548int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
549{
550 u32 doorbell[2];
551
552 doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
553 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
554 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
555 to_mcq(cq)->cqn);
556 doorbell[1] = 0xffffffff;
557
558 mthca_write64(doorbell,
559 to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
560 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
561
562 return 0;
563}
564
565int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
566{
567 struct mthca_cq *cq = to_mcq(ibcq);
568 u32 doorbell[2];
569 u32 sn;
570 u32 ci;
571
572 sn = cq->arm_sn & 3;
573 ci = cpu_to_be32(cq->cons_index);
574
575 doorbell[0] = ci;
576 doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
577 (notify == IB_CQ_SOLICITED ? 1 : 2));
578
579 mthca_write_db_rec(doorbell, cq->arm_db);
580
581 /*
582 * Make sure that the doorbell record in host memory is
583 * written before ringing the doorbell via PCI MMIO.
584 */
585 wmb();
586
587 doorbell[0] = cpu_to_be32((sn << 28) |
588 (notify == IB_CQ_SOLICITED ?
589 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
590 MTHCA_ARBEL_CQ_DB_REQ_NOT) |
591 cq->cqn);
592 doorbell[1] = ci;
593
594 mthca_write64(doorbell,
595 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
596 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
597
598 return 0;
599}
600
601static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
602{
603 int i;
604 int size;
605
606 if (cq->is_direct)
607 pci_free_consistent(dev->pdev,
608 (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
609 cq->queue.direct.buf,
610 pci_unmap_addr(&cq->queue.direct,
611 mapping));
612 else {
613 size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
614 for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
615 if (cq->queue.page_list[i].buf)
616 pci_free_consistent(dev->pdev, PAGE_SIZE,
617 cq->queue.page_list[i].buf,
618 pci_unmap_addr(&cq->queue.page_list[i],
619 mapping));
620
621 kfree(cq->queue.page_list);
622 }
623}
624
625static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
626 struct mthca_cq *cq)
627{
628 int err = -ENOMEM;
629 int npages, shift;
630 u64 *dma_list = NULL;
631 dma_addr_t t;
632 int i;
633
634 if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) {
635 cq->is_direct = 1;
636 npages = 1;
637 shift = get_order(size) + PAGE_SHIFT;
638
639 cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
640 size, &t);
641 if (!cq->queue.direct.buf)
642 return -ENOMEM;
643
644 pci_unmap_addr_set(&cq->queue.direct, mapping, t);
645
646 memset(cq->queue.direct.buf, 0, size);
647
648 while (t & ((1 << shift) - 1)) {
649 --shift;
650 npages *= 2;
651 }
652
653 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
654 if (!dma_list)
655 goto err_free;
656
657 for (i = 0; i < npages; ++i)
658 dma_list[i] = t + i * (1 << shift);
659 } else {
660 cq->is_direct = 0;
661 npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
662 shift = PAGE_SHIFT;
663
664 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
665 if (!dma_list)
666 return -ENOMEM;
667
668 cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list,
669 GFP_KERNEL);
670 if (!cq->queue.page_list)
671 goto err_out;
672
673 for (i = 0; i < npages; ++i)
674 cq->queue.page_list[i].buf = NULL;
675
676 for (i = 0; i < npages; ++i) {
677 cq->queue.page_list[i].buf =
678 pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
679 if (!cq->queue.page_list[i].buf)
680 goto err_free;
681
682 dma_list[i] = t;
683 pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t);
684
685 memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE);
686 }
687 }
688
689 err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
690 dma_list, shift, npages,
691 0, size,
692 MTHCA_MPT_FLAG_LOCAL_WRITE |
693 MTHCA_MPT_FLAG_LOCAL_READ,
694 &cq->mr);
695 if (err)
696 goto err_free;
697
698 kfree(dma_list);
699
700 return 0;
701
702err_free:
703 mthca_free_cq_buf(dev, cq);
704
705err_out:
706 kfree(dma_list);
707
708 return err;
709}
710
711int mthca_init_cq(struct mthca_dev *dev, int nent,
712 struct mthca_cq *cq)
713{
714 int size = nent * MTHCA_CQ_ENTRY_SIZE;
715 void *mailbox = NULL;
716 struct mthca_cq_context *cq_context;
717 int err = -ENOMEM;
718 u8 status;
719 int i;
720
721 might_sleep();
722
723 cq->ibcq.cqe = nent - 1;
724
725 cq->cqn = mthca_alloc(&dev->cq_table.alloc);
726 if (cq->cqn == -1)
727 return -ENOMEM;
728
729 if (dev->hca_type == ARBEL_NATIVE) {
730 cq->arm_sn = 1;
731
732 err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
733 if (err)
734 goto err_out;
735
736 err = -ENOMEM;
737
738 cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
739 cq->cqn, &cq->set_ci_db);
740 if (cq->set_ci_db_index < 0)
741 goto err_out_icm;
742
743 cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
744 cq->cqn, &cq->arm_db);
745 if (cq->arm_db_index < 0)
746 goto err_out_ci;
747 }
748
749 mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
750 GFP_KERNEL);
751 if (!mailbox)
752 goto err_out_mailbox;
753
754 cq_context = MAILBOX_ALIGN(mailbox);
755
756 err = mthca_alloc_cq_buf(dev, size, cq);
757 if (err)
758 goto err_out_mailbox;
759
760 for (i = 0; i < nent; ++i)
761 set_cqe_hw(get_cqe(cq, i));
762
763 spin_lock_init(&cq->lock);
764 atomic_set(&cq->refcount, 1);
765 init_waitqueue_head(&cq->wait);
766
767 memset(cq_context, 0, sizeof *cq_context);
768 cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
769 MTHCA_CQ_STATE_DISARMED |
770 MTHCA_CQ_FLAG_TR);
771 cq_context->start = cpu_to_be64(0);
772 cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
773 dev->driver_uar.index);
774 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
775 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
776 cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
777 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
778 cq_context->cqn = cpu_to_be32(cq->cqn);
779
780 if (dev->hca_type == ARBEL_NATIVE) {
781 cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index);
782 cq_context->state_db = cpu_to_be32(cq->arm_db_index);
783 }
784
785 err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
786 if (err) {
787 mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
788 goto err_out_free_mr;
789 }
790
791 if (status) {
792 mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
793 status);
794 err = -EINVAL;
795 goto err_out_free_mr;
796 }
797
798 spin_lock_irq(&dev->cq_table.lock);
799 if (mthca_array_set(&dev->cq_table.cq,
800 cq->cqn & (dev->limits.num_cqs - 1),
801 cq)) {
802 spin_unlock_irq(&dev->cq_table.lock);
803 goto err_out_free_mr;
804 }
805 spin_unlock_irq(&dev->cq_table.lock);
806
807 cq->cons_index = 0;
808
809 kfree(mailbox);
810
811 return 0;
812
813err_out_free_mr:
814 mthca_free_mr(dev, &cq->mr);
815 mthca_free_cq_buf(dev, cq);
816
817err_out_mailbox:
818 kfree(mailbox);
819
820 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
821
822err_out_ci:
823 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
824
825err_out_icm:
826 mthca_table_put(dev, dev->cq_table.table, cq->cqn);
827
828err_out:
829 mthca_free(&dev->cq_table.alloc, cq->cqn);
830
831 return err;
832}
833
834void mthca_free_cq(struct mthca_dev *dev,
835 struct mthca_cq *cq)
836{
837 void *mailbox;
838 int err;
839 u8 status;
840
841 might_sleep();
842
843 mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
844 GFP_KERNEL);
845 if (!mailbox) {
846 mthca_warn(dev, "No memory for mailbox to free CQ.\n");
847 return;
848 }
849
850 err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
851 if (err)
852 mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
853 else if (status)
854 mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
855 status);
856
857 if (0) {
858 u32 *ctx = MAILBOX_ALIGN(mailbox);
859 int j;
860
861 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
862 cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
863 for (j = 0; j < 16; ++j)
864 printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
865 }
866
867 spin_lock_irq(&dev->cq_table.lock);
868 mthca_array_clear(&dev->cq_table.cq,
869 cq->cqn & (dev->limits.num_cqs - 1));
870 spin_unlock_irq(&dev->cq_table.lock);
871
872 if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
873 synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
874 else
875 synchronize_irq(dev->pdev->irq);
876
877 atomic_dec(&cq->refcount);
878 wait_event(cq->wait, !atomic_read(&cq->refcount));
879
880 mthca_free_mr(dev, &cq->mr);
881 mthca_free_cq_buf(dev, cq);
882
883 if (dev->hca_type == ARBEL_NATIVE) {
884 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
885 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
886 mthca_table_put(dev, dev->cq_table.table, cq->cqn);
887 }
888
889 mthca_free(&dev->cq_table.alloc, cq->cqn);
890 kfree(mailbox);
891}
892
893int __devinit mthca_init_cq_table(struct mthca_dev *dev)
894{
895 int err;
896
897 spin_lock_init(&dev->cq_table.lock);
898
899 err = mthca_alloc_init(&dev->cq_table.alloc,
900 dev->limits.num_cqs,
901 (1 << 24) - 1,
902 dev->limits.reserved_cqs);
903 if (err)
904 return err;
905
906 err = mthca_array_init(&dev->cq_table.cq,
907 dev->limits.num_cqs);
908 if (err)
909 mthca_alloc_cleanup(&dev->cq_table.alloc);
910
911 return err;
912}
913
914void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev)
915{
916 mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
917 mthca_alloc_cleanup(&dev->cq_table.alloc);
918}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
new file mode 100644
index 000000000000..56b2bfb5adb1
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -0,0 +1,437 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef MTHCA_DEV_H
36#define MTHCA_DEV_H
37
38#include <linux/spinlock.h>
39#include <linux/kernel.h>
40#include <linux/pci.h>
41#include <linux/dma-mapping.h>
42#include <asm/semaphore.h>
43
44#include "mthca_provider.h"
45#include "mthca_doorbell.h"
46
47#define DRV_NAME "ib_mthca"
48#define PFX DRV_NAME ": "
49#define DRV_VERSION "0.06-pre"
50#define DRV_RELDATE "November 8, 2004"
51
52/* Types of supported HCA */
53enum {
54 TAVOR, /* MT23108 */
55 ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
56 ARBEL_NATIVE /* MT25208 with extended features */
57};
58
59enum {
60 MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
61 MTHCA_FLAG_SRQ = 1 << 2,
62 MTHCA_FLAG_MSI = 1 << 3,
63 MTHCA_FLAG_MSI_X = 1 << 4,
64 MTHCA_FLAG_NO_LAM = 1 << 5
65};
66
67enum {
68 MTHCA_MAX_PORTS = 2
69};
70
71enum {
72 MTHCA_EQ_CONTEXT_SIZE = 0x40,
73 MTHCA_CQ_CONTEXT_SIZE = 0x40,
74 MTHCA_QP_CONTEXT_SIZE = 0x200,
75 MTHCA_RDB_ENTRY_SIZE = 0x20,
76 MTHCA_AV_SIZE = 0x20,
77 MTHCA_MGM_ENTRY_SIZE = 0x40,
78
79 /* Arbel FW gives us these, but we need them for Tavor */
80 MTHCA_MPT_ENTRY_SIZE = 0x40,
81 MTHCA_MTT_SEG_SIZE = 0x40,
82};
83
84enum {
85 MTHCA_EQ_CMD,
86 MTHCA_EQ_ASYNC,
87 MTHCA_EQ_COMP,
88 MTHCA_NUM_EQ
89};
90
91struct mthca_cmd {
92 int use_events;
93 struct semaphore hcr_sem;
94 struct semaphore poll_sem;
95 struct semaphore event_sem;
96 int max_cmds;
97 spinlock_t context_lock;
98 int free_head;
99 struct mthca_cmd_context *context;
100 u16 token_mask;
101};
102
103struct mthca_limits {
104 int num_ports;
105 int vl_cap;
106 int mtu_cap;
107 int gid_table_len;
108 int pkey_table_len;
109 int local_ca_ack_delay;
110 int num_uars;
111 int max_sg;
112 int num_qps;
113 int reserved_qps;
114 int num_srqs;
115 int reserved_srqs;
116 int num_eecs;
117 int reserved_eecs;
118 int num_cqs;
119 int reserved_cqs;
120 int num_eqs;
121 int reserved_eqs;
122 int num_mpts;
123 int num_mtt_segs;
124 int mtt_seg_size;
125 int reserved_mtts;
126 int reserved_mrws;
127 int reserved_uars;
128 int num_mgms;
129 int num_amgms;
130 int reserved_mcgs;
131 int num_pds;
132 int reserved_pds;
133};
134
135struct mthca_alloc {
136 u32 last;
137 u32 top;
138 u32 max;
139 u32 mask;
140 spinlock_t lock;
141 unsigned long *table;
142};
143
144struct mthca_array {
145 struct {
146 void **page;
147 int used;
148 } *page_list;
149};
150
151struct mthca_uar_table {
152 struct mthca_alloc alloc;
153 u64 uarc_base;
154 int uarc_size;
155};
156
157struct mthca_pd_table {
158 struct mthca_alloc alloc;
159};
160
161struct mthca_mr_table {
162 struct mthca_alloc mpt_alloc;
163 int max_mtt_order;
164 unsigned long **mtt_buddy;
165 u64 mtt_base;
166 struct mthca_icm_table *mtt_table;
167 struct mthca_icm_table *mpt_table;
168};
169
170struct mthca_eq_table {
171 struct mthca_alloc alloc;
172 void __iomem *clr_int;
173 u32 clr_mask;
174 u32 arm_mask;
175 struct mthca_eq eq[MTHCA_NUM_EQ];
176 u64 icm_virt;
177 struct page *icm_page;
178 dma_addr_t icm_dma;
179 int have_irq;
180 u8 inta_pin;
181};
182
183struct mthca_cq_table {
184 struct mthca_alloc alloc;
185 spinlock_t lock;
186 struct mthca_array cq;
187 struct mthca_icm_table *table;
188};
189
190struct mthca_qp_table {
191 struct mthca_alloc alloc;
192 u32 rdb_base;
193 int rdb_shift;
194 int sqp_start;
195 spinlock_t lock;
196 struct mthca_array qp;
197 struct mthca_icm_table *qp_table;
198 struct mthca_icm_table *eqp_table;
199};
200
201struct mthca_av_table {
202 struct pci_pool *pool;
203 int num_ddr_avs;
204 u64 ddr_av_base;
205 void __iomem *av_map;
206 struct mthca_alloc alloc;
207};
208
209struct mthca_mcg_table {
210 struct semaphore sem;
211 struct mthca_alloc alloc;
212 struct mthca_icm_table *table;
213};
214
215struct mthca_dev {
216 struct ib_device ib_dev;
217 struct pci_dev *pdev;
218
219 int hca_type;
220 unsigned long mthca_flags;
221 unsigned long device_cap_flags;
222
223 u32 rev_id;
224
225 /* firmware info */
226 u64 fw_ver;
227 union {
228 struct {
229 u64 fw_start;
230 u64 fw_end;
231 } tavor;
232 struct {
233 u64 clr_int_base;
234 u64 eq_arm_base;
235 u64 eq_set_ci_base;
236 struct mthca_icm *fw_icm;
237 struct mthca_icm *aux_icm;
238 u16 fw_pages;
239 } arbel;
240 } fw;
241
242 u64 ddr_start;
243 u64 ddr_end;
244
245 MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
246 struct semaphore cap_mask_mutex;
247
248 void __iomem *hcr;
249 void __iomem *kar;
250 void __iomem *clr_base;
251 union {
252 struct {
253 void __iomem *ecr_base;
254 } tavor;
255 struct {
256 void __iomem *eq_arm;
257 void __iomem *eq_set_ci_base;
258 } arbel;
259 } eq_regs;
260
261 struct mthca_cmd cmd;
262 struct mthca_limits limits;
263
264 struct mthca_uar_table uar_table;
265 struct mthca_pd_table pd_table;
266 struct mthca_mr_table mr_table;
267 struct mthca_eq_table eq_table;
268 struct mthca_cq_table cq_table;
269 struct mthca_qp_table qp_table;
270 struct mthca_av_table av_table;
271 struct mthca_mcg_table mcg_table;
272
273 struct mthca_uar driver_uar;
274 struct mthca_db_table *db_tab;
275 struct mthca_pd driver_pd;
276 struct mthca_mr driver_mr;
277
278 struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
279 struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
280 spinlock_t sm_lock;
281};
282
283#define mthca_dbg(mdev, format, arg...) \
284 dev_dbg(&mdev->pdev->dev, format, ## arg)
285#define mthca_err(mdev, format, arg...) \
286 dev_err(&mdev->pdev->dev, format, ## arg)
287#define mthca_info(mdev, format, arg...) \
288 dev_info(&mdev->pdev->dev, format, ## arg)
289#define mthca_warn(mdev, format, arg...) \
290 dev_warn(&mdev->pdev->dev, format, ## arg)
291
292extern void __buggy_use_of_MTHCA_GET(void);
293extern void __buggy_use_of_MTHCA_PUT(void);
294
295#define MTHCA_GET(dest, source, offset) \
296 do { \
297 void *__p = (char *) (source) + (offset); \
298 switch (sizeof (dest)) { \
299 case 1: (dest) = *(u8 *) __p; break; \
300 case 2: (dest) = be16_to_cpup(__p); break; \
301 case 4: (dest) = be32_to_cpup(__p); break; \
302 case 8: (dest) = be64_to_cpup(__p); break; \
303 default: __buggy_use_of_MTHCA_GET(); \
304 } \
305 } while (0)
306
307#define MTHCA_PUT(dest, source, offset) \
308 do { \
309 __typeof__(source) *__p = \
310 (__typeof__(source) *) ((char *) (dest) + (offset)); \
311 switch (sizeof(source)) { \
312 case 1: *__p = (source); break; \
313 case 2: *__p = cpu_to_be16(source); break; \
314 case 4: *__p = cpu_to_be32(source); break; \
315 case 8: *__p = cpu_to_be64(source); break; \
316 default: __buggy_use_of_MTHCA_PUT(); \
317 } \
318 } while (0)
319
320int mthca_reset(struct mthca_dev *mdev);
321
322u32 mthca_alloc(struct mthca_alloc *alloc);
323void mthca_free(struct mthca_alloc *alloc, u32 obj);
324int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
325 u32 reserved);
326void mthca_alloc_cleanup(struct mthca_alloc *alloc);
327void *mthca_array_get(struct mthca_array *array, int index);
328int mthca_array_set(struct mthca_array *array, int index, void *value);
329void mthca_array_clear(struct mthca_array *array, int index);
330int mthca_array_init(struct mthca_array *array, int nent);
331void mthca_array_cleanup(struct mthca_array *array, int nent);
332
333int mthca_init_uar_table(struct mthca_dev *dev);
334int mthca_init_pd_table(struct mthca_dev *dev);
335int mthca_init_mr_table(struct mthca_dev *dev);
336int mthca_init_eq_table(struct mthca_dev *dev);
337int mthca_init_cq_table(struct mthca_dev *dev);
338int mthca_init_qp_table(struct mthca_dev *dev);
339int mthca_init_av_table(struct mthca_dev *dev);
340int mthca_init_mcg_table(struct mthca_dev *dev);
341
342void mthca_cleanup_uar_table(struct mthca_dev *dev);
343void mthca_cleanup_pd_table(struct mthca_dev *dev);
344void mthca_cleanup_mr_table(struct mthca_dev *dev);
345void mthca_cleanup_eq_table(struct mthca_dev *dev);
346void mthca_cleanup_cq_table(struct mthca_dev *dev);
347void mthca_cleanup_qp_table(struct mthca_dev *dev);
348void mthca_cleanup_av_table(struct mthca_dev *dev);
349void mthca_cleanup_mcg_table(struct mthca_dev *dev);
350
351int mthca_register_device(struct mthca_dev *dev);
352void mthca_unregister_device(struct mthca_dev *dev);
353
354int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
355void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
356
357int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
358void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
359
360int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
361 u32 access, struct mthca_mr *mr);
362int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
363 u64 *buffer_list, int buffer_size_shift,
364 int list_len, u64 iova, u64 total_size,
365 u32 access, struct mthca_mr *mr);
366void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
367
368int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
369void mthca_unmap_eq_icm(struct mthca_dev *dev);
370
371int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
372 struct ib_wc *entry);
373int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
374int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
375int mthca_init_cq(struct mthca_dev *dev, int nent,
376 struct mthca_cq *cq);
377void mthca_free_cq(struct mthca_dev *dev,
378 struct mthca_cq *cq);
379void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
380void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
381
382void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
383 enum ib_event_type event_type);
384int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
385int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
386 struct ib_send_wr **bad_wr);
387int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
388 struct ib_recv_wr **bad_wr);
389int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
390 struct ib_send_wr **bad_wr);
391int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
392 struct ib_recv_wr **bad_wr);
393int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
394 int index, int *dbd, u32 *new_wqe);
395int mthca_alloc_qp(struct mthca_dev *dev,
396 struct mthca_pd *pd,
397 struct mthca_cq *send_cq,
398 struct mthca_cq *recv_cq,
399 enum ib_qp_type type,
400 enum ib_sig_type send_policy,
401 struct mthca_qp *qp);
402int mthca_alloc_sqp(struct mthca_dev *dev,
403 struct mthca_pd *pd,
404 struct mthca_cq *send_cq,
405 struct mthca_cq *recv_cq,
406 enum ib_sig_type send_policy,
407 int qpn,
408 int port,
409 struct mthca_sqp *sqp);
410void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
411int mthca_create_ah(struct mthca_dev *dev,
412 struct mthca_pd *pd,
413 struct ib_ah_attr *ah_attr,
414 struct mthca_ah *ah);
415int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
416int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
417 struct ib_ud_header *header);
418
419int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
420int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
421
422int mthca_process_mad(struct ib_device *ibdev,
423 int mad_flags,
424 u8 port_num,
425 struct ib_wc *in_wc,
426 struct ib_grh *in_grh,
427 struct ib_mad *in_mad,
428 struct ib_mad *out_mad);
429int mthca_create_agents(struct mthca_dev *dev);
430void mthca_free_agents(struct mthca_dev *dev);
431
432static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
433{
434 return container_of(ibdev, struct mthca_dev, ib_dev);
435}
436
437#endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
new file mode 100644
index 000000000000..78b183cab54c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/types.h>
36
37#define MTHCA_RD_DOORBELL 0x00
38#define MTHCA_SEND_DOORBELL 0x10
39#define MTHCA_RECEIVE_DOORBELL 0x18
40#define MTHCA_CQ_DOORBELL 0x20
41#define MTHCA_EQ_DOORBELL 0x28
42
43#if BITS_PER_LONG == 64
44/*
45 * Assume that we can just write a 64-bit doorbell atomically. s390
46 * actually doesn't have writeq() but S/390 systems don't even have
47 * PCI so we won't worry about it.
48 */
49
50#define MTHCA_DECLARE_DOORBELL_LOCK(name)
51#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
52#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
53
54static inline void mthca_write64(u32 val[2], void __iomem *dest,
55 spinlock_t *doorbell_lock)
56{
57 __raw_writeq(*(u64 *) val, dest);
58}
59
60static inline void mthca_write_db_rec(u32 val[2], u32 *db)
61{
62 *(u64 *) db = *(u64 *) val;
63}
64
65#else
66
67/*
68 * Just fall back to a spinlock to protect the doorbell if
69 * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
70 * MMIO writes.
71 */
72
73#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
74#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
75#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
76
77static inline void mthca_write64(u32 val[2], void __iomem *dest,
78 spinlock_t *doorbell_lock)
79{
80 unsigned long flags;
81
82 spin_lock_irqsave(doorbell_lock, flags);
83 __raw_writel(val[0], dest);
84 __raw_writel(val[1], dest + 4);
85 spin_unlock_irqrestore(doorbell_lock, flags);
86}
87
88static inline void mthca_write_db_rec(u32 val[2], u32 *db)
89{
90 db[0] = val[0];
91 wmb();
92 db[1] = val[1];
93}
94
95#endif
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
new file mode 100644
index 000000000000..623daab5c92b
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -0,0 +1,964 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
33 */
34
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/interrupt.h>
38#include <linux/pci.h>
39
40#include "mthca_dev.h"
41#include "mthca_cmd.h"
42#include "mthca_config_reg.h"
43
44enum {
45 MTHCA_NUM_ASYNC_EQE = 0x80,
46 MTHCA_NUM_CMD_EQE = 0x80,
47 MTHCA_EQ_ENTRY_SIZE = 0x20
48};
49
50/*
51 * Must be packed because start is 64 bits but only aligned to 32 bits.
52 */
53struct mthca_eq_context {
54 u32 flags;
55 u64 start;
56 u32 logsize_usrpage;
57 u32 tavor_pd; /* reserved for Arbel */
58 u8 reserved1[3];
59 u8 intr;
60 u32 arbel_pd; /* lost_count for Tavor */
61 u32 lkey;
62 u32 reserved2[2];
63 u32 consumer_index;
64 u32 producer_index;
65 u32 reserved3[4];
66} __attribute__((packed));
67
68#define MTHCA_EQ_STATUS_OK ( 0 << 28)
69#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28)
70#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28)
71#define MTHCA_EQ_OWNER_SW ( 0 << 24)
72#define MTHCA_EQ_OWNER_HW ( 1 << 24)
73#define MTHCA_EQ_FLAG_TR ( 1 << 18)
74#define MTHCA_EQ_FLAG_OI ( 1 << 17)
75#define MTHCA_EQ_STATE_ARMED ( 1 << 8)
76#define MTHCA_EQ_STATE_FIRED ( 2 << 8)
77#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8)
78#define MTHCA_EQ_STATE_ARBEL ( 8 << 8)
79
80enum {
81 MTHCA_EVENT_TYPE_COMP = 0x00,
82 MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
83 MTHCA_EVENT_TYPE_COMM_EST = 0x02,
84 MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
85 MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
86 MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
87 MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
88 MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
89 MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
90 MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
91 MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
92 MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
93 MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
94 MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09,
95 MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
96 MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e,
97 MTHCA_EVENT_TYPE_CMD = 0x0a
98};
99
100#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \
101 (1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \
102 (1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \
103 (1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \
104 (1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \
105 (1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \
106 (1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \
107 (1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
108 (1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \
109 (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
110 (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
111 (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
112#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
113 (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
114#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
115
116#define MTHCA_EQ_DB_INC_CI (1 << 24)
117#define MTHCA_EQ_DB_REQ_NOT (2 << 24)
118#define MTHCA_EQ_DB_DISARM_CQ (3 << 24)
119#define MTHCA_EQ_DB_SET_CI (4 << 24)
120#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
121
122struct mthca_eqe {
123 u8 reserved1;
124 u8 type;
125 u8 reserved2;
126 u8 subtype;
127 union {
128 u32 raw[6];
129 struct {
130 u32 cqn;
131 } __attribute__((packed)) comp;
132 struct {
133 u16 reserved1;
134 u16 token;
135 u32 reserved2;
136 u8 reserved3[3];
137 u8 status;
138 u64 out_param;
139 } __attribute__((packed)) cmd;
140 struct {
141 u32 qpn;
142 } __attribute__((packed)) qp;
143 struct {
144 u32 cqn;
145 u32 reserved1;
146 u8 reserved2[3];
147 u8 syndrome;
148 } __attribute__((packed)) cq_err;
149 struct {
150 u32 reserved1[2];
151 u32 port;
152 } __attribute__((packed)) port_change;
153 } event;
154 u8 reserved3[3];
155 u8 owner;
156} __attribute__((packed));
157
158#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7)
159#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7)
160
161static inline u64 async_mask(struct mthca_dev *dev)
162{
163 return dev->mthca_flags & MTHCA_FLAG_SRQ ?
164 MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
165 MTHCA_ASYNC_EVENT_MASK;
166}
167
168static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
169{
170 u32 doorbell[2];
171
172 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
173 doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
174
175 /*
176 * This barrier makes sure that all updates to ownership bits
177 * done by set_eqe_hw() hit memory before the consumer index
178 * is updated. set_eq_ci() allows the HCA to possibly write
179 * more EQ entries, and we want to avoid the exceedingly
180 * unlikely possibility of the HCA writing an entry and then
181 * having set_eqe_hw() overwrite the owner field.
182 */
183 wmb();
184 mthca_write64(doorbell,
185 dev->kar + MTHCA_EQ_DOORBELL,
186 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
187}
188
189static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
190{
191 /* See comment in tavor_set_eq_ci() above. */
192 wmb();
193 __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base +
194 eq->eqn * 8);
195 /* We still want ordering, just not swabbing, so add a barrier */
196 mb();
197}
198
199static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
200{
201 if (dev->hca_type == ARBEL_NATIVE)
202 arbel_set_eq_ci(dev, eq, ci);
203 else
204 tavor_set_eq_ci(dev, eq, ci);
205}
206
207static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
208{
209 u32 doorbell[2];
210
211 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
212 doorbell[1] = 0;
213
214 mthca_write64(doorbell,
215 dev->kar + MTHCA_EQ_DOORBELL,
216 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
217}
218
219static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
220{
221 writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
222}
223
224static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
225{
226 if (dev->hca_type != ARBEL_NATIVE) {
227 u32 doorbell[2];
228
229 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
230 doorbell[1] = cpu_to_be32(cqn);
231
232 mthca_write64(doorbell,
233 dev->kar + MTHCA_EQ_DOORBELL,
234 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
235 }
236}
237
238static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
239{
240 unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
241 return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
242}
243
244static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
245{
246 struct mthca_eqe* eqe;
247 eqe = get_eqe(eq, eq->cons_index);
248 return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
249}
250
251static inline void set_eqe_hw(struct mthca_eqe *eqe)
252{
253 eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW;
254}
255
256static void port_change(struct mthca_dev *dev, int port, int active)
257{
258 struct ib_event record;
259
260 mthca_dbg(dev, "Port change to %s for port %d\n",
261 active ? "active" : "down", port);
262
263 record.device = &dev->ib_dev;
264 record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
265 record.element.port_num = port;
266
267 ib_dispatch_event(&record);
268}
269
270static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
271{
272 struct mthca_eqe *eqe;
273 int disarm_cqn;
274 int eqes_found = 0;
275
276 while ((eqe = next_eqe_sw(eq))) {
277 int set_ci = 0;
278
279 /*
280 * Make sure we read EQ entry contents after we've
281 * checked the ownership bit.
282 */
283 rmb();
284
285 switch (eqe->type) {
286 case MTHCA_EVENT_TYPE_COMP:
287 disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
288 disarm_cq(dev, eq->eqn, disarm_cqn);
289 mthca_cq_event(dev, disarm_cqn);
290 break;
291
292 case MTHCA_EVENT_TYPE_PATH_MIG:
293 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
294 IB_EVENT_PATH_MIG);
295 break;
296
297 case MTHCA_EVENT_TYPE_COMM_EST:
298 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
299 IB_EVENT_COMM_EST);
300 break;
301
302 case MTHCA_EVENT_TYPE_SQ_DRAINED:
303 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
304 IB_EVENT_SQ_DRAINED);
305 break;
306
307 case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
308 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
309 IB_EVENT_QP_FATAL);
310 break;
311
312 case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
313 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
314 IB_EVENT_PATH_MIG_ERR);
315 break;
316
317 case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
318 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
319 IB_EVENT_QP_REQ_ERR);
320 break;
321
322 case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
323 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
324 IB_EVENT_QP_ACCESS_ERR);
325 break;
326
327 case MTHCA_EVENT_TYPE_CMD:
328 mthca_cmd_event(dev,
329 be16_to_cpu(eqe->event.cmd.token),
330 eqe->event.cmd.status,
331 be64_to_cpu(eqe->event.cmd.out_param));
332 /*
333 * cmd_event() may add more commands.
334 * The card will think the queue has overflowed if
335 * we don't tell it we've been processing events.
336 */
337 set_ci = 1;
338 break;
339
340 case MTHCA_EVENT_TYPE_PORT_CHANGE:
341 port_change(dev,
342 (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
343 eqe->subtype == 0x4);
344 break;
345
346 case MTHCA_EVENT_TYPE_CQ_ERROR:
347 mthca_warn(dev, "CQ %s on CQN %08x\n",
348 eqe->event.cq_err.syndrome == 1 ?
349 "overrun" : "access violation",
350 be32_to_cpu(eqe->event.cq_err.cqn));
351 break;
352
353 case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
354 mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
355 break;
356
357 case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
358 case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
359 case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
360 case MTHCA_EVENT_TYPE_ECC_DETECT:
361 default:
362 mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
363 eqe->type, eqe->subtype, eq->eqn);
364 break;
365 };
366
367 set_eqe_hw(eqe);
368 ++eq->cons_index;
369 eqes_found = 1;
370
371 if (unlikely(set_ci)) {
372 /*
373 * Conditional on hca_type is OK here because
374 * this is a rare case, not the fast path.
375 */
376 set_eq_ci(dev, eq, eq->cons_index);
377 set_ci = 0;
378 }
379 }
380
381 /*
382 * Rely on caller to set consumer index so that we don't have
383 * to test hca_type in our interrupt handling fast path.
384 */
385 return eqes_found;
386}
387
388static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
389{
390 struct mthca_dev *dev = dev_ptr;
391 u32 ecr;
392 int i;
393
394 if (dev->eq_table.clr_mask)
395 writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
396
397 ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
398 if (ecr) {
399 writel(ecr, dev->eq_regs.tavor.ecr_base +
400 MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
401
402 for (i = 0; i < MTHCA_NUM_EQ; ++i)
403 if (ecr & dev->eq_table.eq[i].eqn_mask &&
404 mthca_eq_int(dev, &dev->eq_table.eq[i])) {
405 tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
406 dev->eq_table.eq[i].cons_index);
407 tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
408 }
409 }
410
411 return IRQ_RETVAL(ecr);
412}
413
414static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
415 struct pt_regs *regs)
416{
417 struct mthca_eq *eq = eq_ptr;
418 struct mthca_dev *dev = eq->dev;
419
420 mthca_eq_int(dev, eq);
421 tavor_set_eq_ci(dev, eq, eq->cons_index);
422 tavor_eq_req_not(dev, eq->eqn);
423
424 /* MSI-X vectors always belong to us */
425 return IRQ_HANDLED;
426}
427
428static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
429{
430 struct mthca_dev *dev = dev_ptr;
431 int work = 0;
432 int i;
433
434 if (dev->eq_table.clr_mask)
435 writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
436
437 for (i = 0; i < MTHCA_NUM_EQ; ++i)
438 if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
439 work = 1;
440 arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
441 dev->eq_table.eq[i].cons_index);
442 }
443
444 arbel_eq_req_not(dev, dev->eq_table.arm_mask);
445
446 return IRQ_RETVAL(work);
447}
448
449static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
450 struct pt_regs *regs)
451{
452 struct mthca_eq *eq = eq_ptr;
453 struct mthca_dev *dev = eq->dev;
454
455 mthca_eq_int(dev, eq);
456 arbel_set_eq_ci(dev, eq, eq->cons_index);
457 arbel_eq_req_not(dev, eq->eqn_mask);
458
459 /* MSI-X vectors always belong to us */
460 return IRQ_HANDLED;
461}
462
463static int __devinit mthca_create_eq(struct mthca_dev *dev,
464 int nent,
465 u8 intr,
466 struct mthca_eq *eq)
467{
468 int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
469 PAGE_SIZE;
470 u64 *dma_list = NULL;
471 dma_addr_t t;
472 void *mailbox = NULL;
473 struct mthca_eq_context *eq_context;
474 int err = -ENOMEM;
475 int i;
476 u8 status;
477
478 /* Make sure EQ size is aligned to a power of 2 size. */
479 for (i = 1; i < nent; i <<= 1)
480 ; /* nothing */
481 nent = i;
482
483 eq->dev = dev;
484
485 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
486 GFP_KERNEL);
487 if (!eq->page_list)
488 goto err_out;
489
490 for (i = 0; i < npages; ++i)
491 eq->page_list[i].buf = NULL;
492
493 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
494 if (!dma_list)
495 goto err_out_free;
496
497 mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
498 GFP_KERNEL);
499 if (!mailbox)
500 goto err_out_free;
501 eq_context = MAILBOX_ALIGN(mailbox);
502
503 for (i = 0; i < npages; ++i) {
504 eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
505 PAGE_SIZE, &t);
506 if (!eq->page_list[i].buf)
507 goto err_out_free;
508
509 dma_list[i] = t;
510 pci_unmap_addr_set(&eq->page_list[i], mapping, t);
511
512 memset(eq->page_list[i].buf, 0, PAGE_SIZE);
513 }
514
515 for (i = 0; i < nent; ++i)
516 set_eqe_hw(get_eqe(eq, i));
517
518 eq->eqn = mthca_alloc(&dev->eq_table.alloc);
519 if (eq->eqn == -1)
520 goto err_out_free;
521
522 err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
523 dma_list, PAGE_SHIFT, npages,
524 0, npages * PAGE_SIZE,
525 MTHCA_MPT_FLAG_LOCAL_WRITE |
526 MTHCA_MPT_FLAG_LOCAL_READ,
527 &eq->mr);
528 if (err)
529 goto err_out_free_eq;
530
531 eq->nent = nent;
532
533 memset(eq_context, 0, sizeof *eq_context);
534 eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK |
535 MTHCA_EQ_OWNER_HW |
536 MTHCA_EQ_STATE_ARMED |
537 MTHCA_EQ_FLAG_TR);
538 if (dev->hca_type == ARBEL_NATIVE)
539 eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
540
541 eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
542 if (dev->hca_type == ARBEL_NATIVE) {
543 eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
544 } else {
545 eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
546 eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num);
547 }
548 eq_context->intr = intr;
549 eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
550
551 err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
552 if (err) {
553 mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
554 goto err_out_free_mr;
555 }
556 if (status) {
557 mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
558 status);
559 err = -EINVAL;
560 goto err_out_free_mr;
561 }
562
563 kfree(dma_list);
564 kfree(mailbox);
565
566 eq->eqn_mask = swab32(1 << eq->eqn);
567 eq->cons_index = 0;
568
569 dev->eq_table.arm_mask |= eq->eqn_mask;
570
571 mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
572 eq->eqn, nent);
573
574 return err;
575
576 err_out_free_mr:
577 mthca_free_mr(dev, &eq->mr);
578
579 err_out_free_eq:
580 mthca_free(&dev->eq_table.alloc, eq->eqn);
581
582 err_out_free:
583 for (i = 0; i < npages; ++i)
584 if (eq->page_list[i].buf)
585 pci_free_consistent(dev->pdev, PAGE_SIZE,
586 eq->page_list[i].buf,
587 pci_unmap_addr(&eq->page_list[i],
588 mapping));
589
590 kfree(eq->page_list);
591 kfree(dma_list);
592 kfree(mailbox);
593
594 err_out:
595 return err;
596}
597
598static void mthca_free_eq(struct mthca_dev *dev,
599 struct mthca_eq *eq)
600{
601 void *mailbox = NULL;
602 int err;
603 u8 status;
604 int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
605 PAGE_SIZE;
606 int i;
607
608 mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
609 GFP_KERNEL);
610 if (!mailbox)
611 return;
612
613 err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
614 eq->eqn, &status);
615 if (err)
616 mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
617 if (status)
618 mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
619 status);
620
621 dev->eq_table.arm_mask &= ~eq->eqn_mask;
622
623 if (0) {
624 mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
625 for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
626 if (i % 4 == 0)
627 printk("[%02x] ", i * 4);
628 printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
629 if ((i + 1) % 4 == 0)
630 printk("\n");
631 }
632 }
633
634 mthca_free_mr(dev, &eq->mr);
635 for (i = 0; i < npages; ++i)
636 pci_free_consistent(dev->pdev, PAGE_SIZE,
637 eq->page_list[i].buf,
638 pci_unmap_addr(&eq->page_list[i], mapping));
639
640 kfree(eq->page_list);
641 kfree(mailbox);
642}
643
644static void mthca_free_irqs(struct mthca_dev *dev)
645{
646 int i;
647
648 if (dev->eq_table.have_irq)
649 free_irq(dev->pdev->irq, dev);
650 for (i = 0; i < MTHCA_NUM_EQ; ++i)
651 if (dev->eq_table.eq[i].have_irq)
652 free_irq(dev->eq_table.eq[i].msi_x_vector,
653 dev->eq_table.eq + i);
654}
655
656static int __devinit mthca_map_reg(struct mthca_dev *dev,
657 unsigned long offset, unsigned long size,
658 void __iomem **map)
659{
660 unsigned long base = pci_resource_start(dev->pdev, 0);
661
662 if (!request_mem_region(base + offset, size, DRV_NAME))
663 return -EBUSY;
664
665 *map = ioremap(base + offset, size);
666 if (!*map) {
667 release_mem_region(base + offset, size);
668 return -ENOMEM;
669 }
670
671 return 0;
672}
673
674static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
675 unsigned long size, void __iomem *map)
676{
677 unsigned long base = pci_resource_start(dev->pdev, 0);
678
679 release_mem_region(base + offset, size);
680 iounmap(map);
681}
682
683static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
684{
685 unsigned long mthca_base;
686
687 mthca_base = pci_resource_start(dev->pdev, 0);
688
689 if (dev->hca_type == ARBEL_NATIVE) {
690 /*
691 * We assume that the EQ arm and EQ set CI registers
692 * fall within the first BAR. We can't trust the
693 * values firmware gives us, since those addresses are
694 * valid on the HCA's side of the PCI bus but not
695 * necessarily the host side.
696 */
697 if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
698 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
699 &dev->clr_base)) {
700 mthca_err(dev, "Couldn't map interrupt clear register, "
701 "aborting.\n");
702 return -ENOMEM;
703 }
704
705 /*
706 * Add 4 because we limit ourselves to EQs 0 ... 31,
707 * so we only need the low word of the register.
708 */
709 if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
710 dev->fw.arbel.eq_arm_base) + 4, 4,
711 &dev->eq_regs.arbel.eq_arm)) {
712 mthca_err(dev, "Couldn't map interrupt clear register, "
713 "aborting.\n");
714 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
715 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
716 dev->clr_base);
717 return -ENOMEM;
718 }
719
720 if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
721 dev->fw.arbel.eq_set_ci_base,
722 MTHCA_EQ_SET_CI_SIZE,
723 &dev->eq_regs.arbel.eq_set_ci_base)) {
724 mthca_err(dev, "Couldn't map interrupt clear register, "
725 "aborting.\n");
726 mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
727 dev->fw.arbel.eq_arm_base) + 4, 4,
728 dev->eq_regs.arbel.eq_arm);
729 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
730 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
731 dev->clr_base);
732 return -ENOMEM;
733 }
734 } else {
735 if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
736 &dev->clr_base)) {
737 mthca_err(dev, "Couldn't map interrupt clear register, "
738 "aborting.\n");
739 return -ENOMEM;
740 }
741
742 if (mthca_map_reg(dev, MTHCA_ECR_BASE,
743 MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
744 &dev->eq_regs.tavor.ecr_base)) {
745 mthca_err(dev, "Couldn't map ecr register, "
746 "aborting.\n");
747 mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
748 dev->clr_base);
749 return -ENOMEM;
750 }
751 }
752
753 return 0;
754
755}
756
757static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
758{
759 if (dev->hca_type == ARBEL_NATIVE) {
760 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
761 dev->fw.arbel.eq_set_ci_base,
762 MTHCA_EQ_SET_CI_SIZE,
763 dev->eq_regs.arbel.eq_set_ci_base);
764 mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
765 dev->fw.arbel.eq_arm_base) + 4, 4,
766 dev->eq_regs.arbel.eq_arm);
767 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
768 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
769 dev->clr_base);
770 } else {
771 mthca_unmap_reg(dev, MTHCA_ECR_BASE,
772 MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
773 dev->eq_regs.tavor.ecr_base);
774 mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
775 dev->clr_base);
776 }
777}
778
779int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
780{
781 int ret;
782 u8 status;
783
784 /*
785 * We assume that mapping one page is enough for the whole EQ
786 * context table. This is fine with all current HCAs, because
787 * we only use 32 EQs and each EQ uses 32 bytes of context
788 * memory, or 1 KB total.
789 */
790 dev->eq_table.icm_virt = icm_virt;
791 dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
792 if (!dev->eq_table.icm_page)
793 return -ENOMEM;
794 dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
795 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
796 if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
797 __free_page(dev->eq_table.icm_page);
798 return -ENOMEM;
799 }
800
801 ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
802 if (!ret && status)
803 ret = -EINVAL;
804 if (ret) {
805 pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
806 PCI_DMA_BIDIRECTIONAL);
807 __free_page(dev->eq_table.icm_page);
808 }
809
810 return ret;
811}
812
813void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
814{
815 u8 status;
816
817 mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
818 pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
819 PCI_DMA_BIDIRECTIONAL);
820 __free_page(dev->eq_table.icm_page);
821}
822
823int __devinit mthca_init_eq_table(struct mthca_dev *dev)
824{
825 int err;
826 u8 status;
827 u8 intr;
828 int i;
829
830 err = mthca_alloc_init(&dev->eq_table.alloc,
831 dev->limits.num_eqs,
832 dev->limits.num_eqs - 1,
833 dev->limits.reserved_eqs);
834 if (err)
835 return err;
836
837 err = mthca_map_eq_regs(dev);
838 if (err)
839 goto err_out_free;
840
841 if (dev->mthca_flags & MTHCA_FLAG_MSI ||
842 dev->mthca_flags & MTHCA_FLAG_MSI_X) {
843 dev->eq_table.clr_mask = 0;
844 } else {
845 dev->eq_table.clr_mask =
846 swab32(1 << (dev->eq_table.inta_pin & 31));
847 dev->eq_table.clr_int = dev->clr_base +
848 (dev->eq_table.inta_pin < 31 ? 4 : 0);
849 }
850
851 dev->eq_table.arm_mask = 0;
852
853 intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ?
854 128 : dev->eq_table.inta_pin;
855
856 err = mthca_create_eq(dev, dev->limits.num_cqs,
857 (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
858 &dev->eq_table.eq[MTHCA_EQ_COMP]);
859 if (err)
860 goto err_out_unmap;
861
862 err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
863 (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
864 &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
865 if (err)
866 goto err_out_comp;
867
868 err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE,
869 (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
870 &dev->eq_table.eq[MTHCA_EQ_CMD]);
871 if (err)
872 goto err_out_async;
873
874 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
875 static const char *eq_name[] = {
876 [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
877 [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
878 [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
879 };
880
881 for (i = 0; i < MTHCA_NUM_EQ; ++i) {
882 err = request_irq(dev->eq_table.eq[i].msi_x_vector,
883 dev->hca_type == ARBEL_NATIVE ?
884 mthca_arbel_msi_x_interrupt :
885 mthca_tavor_msi_x_interrupt,
886 0, eq_name[i], dev->eq_table.eq + i);
887 if (err)
888 goto err_out_cmd;
889 dev->eq_table.eq[i].have_irq = 1;
890 }
891 } else {
892 err = request_irq(dev->pdev->irq,
893 dev->hca_type == ARBEL_NATIVE ?
894 mthca_arbel_interrupt :
895 mthca_tavor_interrupt,
896 SA_SHIRQ, DRV_NAME, dev);
897 if (err)
898 goto err_out_cmd;
899 dev->eq_table.have_irq = 1;
900 }
901
902 err = mthca_MAP_EQ(dev, async_mask(dev),
903 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
904 if (err)
905 mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
906 dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
907 if (status)
908 mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
909 dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
910
911 err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
912 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
913 if (err)
914 mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
915 dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
916 if (status)
917 mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
918 dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
919
920 for (i = 0; i < MTHCA_EQ_CMD; ++i)
921 if (dev->hca_type == ARBEL_NATIVE)
922 arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
923 else
924 tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
925
926 return 0;
927
928err_out_cmd:
929 mthca_free_irqs(dev);
930 mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
931
932err_out_async:
933 mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
934
935err_out_comp:
936 mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
937
938err_out_unmap:
939 mthca_unmap_eq_regs(dev);
940
941err_out_free:
942 mthca_alloc_cleanup(&dev->eq_table.alloc);
943 return err;
944}
945
946void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev)
947{
948 u8 status;
949 int i;
950
951 mthca_free_irqs(dev);
952
953 mthca_MAP_EQ(dev, async_mask(dev),
954 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
955 mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
956 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
957
958 for (i = 0; i < MTHCA_NUM_EQ; ++i)
959 mthca_free_eq(dev, &dev->eq_table.eq[i]);
960
961 mthca_unmap_eq_regs(dev);
962
963 mthca_alloc_cleanup(&dev->eq_table.alloc);
964}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
new file mode 100644
index 000000000000..7df223642015
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -0,0 +1,323 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <ib_verbs.h>
36#include <ib_mad.h>
37#include <ib_smi.h>
38
39#include "mthca_dev.h"
40#include "mthca_cmd.h"
41
42enum {
43 MTHCA_VENDOR_CLASS1 = 0x9,
44 MTHCA_VENDOR_CLASS2 = 0xa
45};
46
47struct mthca_trap_mad {
48 struct ib_mad *mad;
49 DECLARE_PCI_UNMAP_ADDR(mapping)
50};
51
52static void update_sm_ah(struct mthca_dev *dev,
53 u8 port_num, u16 lid, u8 sl)
54{
55 struct ib_ah *new_ah;
56 struct ib_ah_attr ah_attr;
57 unsigned long flags;
58
59 if (!dev->send_agent[port_num - 1][0])
60 return;
61
62 memset(&ah_attr, 0, sizeof ah_attr);
63 ah_attr.dlid = lid;
64 ah_attr.sl = sl;
65 ah_attr.port_num = port_num;
66
67 new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
68 &ah_attr);
69 if (IS_ERR(new_ah))
70 return;
71
72 spin_lock_irqsave(&dev->sm_lock, flags);
73 if (dev->sm_ah[port_num - 1])
74 ib_destroy_ah(dev->sm_ah[port_num - 1]);
75 dev->sm_ah[port_num - 1] = new_ah;
76 spin_unlock_irqrestore(&dev->sm_lock, flags);
77}
78
79/*
80 * Snoop SM MADs for port info and P_Key table sets, so we can
81 * synthesize LID change and P_Key change events.
82 */
83static void smp_snoop(struct ib_device *ibdev,
84 u8 port_num,
85 struct ib_mad *mad)
86{
87 struct ib_event event;
88
89 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
90 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
91 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
92 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
93 update_sm_ah(to_mdev(ibdev), port_num,
94 be16_to_cpup((__be16 *) (mad->data + 58)),
95 (*(u8 *) (mad->data + 76)) & 0xf);
96
97 event.device = ibdev;
98 event.event = IB_EVENT_LID_CHANGE;
99 event.element.port_num = port_num;
100 ib_dispatch_event(&event);
101 }
102
103 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
104 event.device = ibdev;
105 event.event = IB_EVENT_PKEY_CHANGE;
106 event.element.port_num = port_num;
107 ib_dispatch_event(&event);
108 }
109 }
110}
111
112static void forward_trap(struct mthca_dev *dev,
113 u8 port_num,
114 struct ib_mad *mad)
115{
116 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
117 struct mthca_trap_mad *tmad;
118 struct ib_sge gather_list;
119 struct ib_send_wr *bad_wr, wr = {
120 .opcode = IB_WR_SEND,
121 .sg_list = &gather_list,
122 .num_sge = 1,
123 .send_flags = IB_SEND_SIGNALED,
124 .wr = {
125 .ud = {
126 .remote_qpn = qpn,
127 .remote_qkey = qpn ? IB_QP1_QKEY : 0,
128 .timeout_ms = 0
129 }
130 }
131 };
132 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
133 int ret;
134 unsigned long flags;
135
136 if (agent) {
137 tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
138 if (!tmad)
139 return;
140
141 tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
142 if (!tmad->mad) {
143 kfree(tmad);
144 return;
145 }
146
147 memcpy(tmad->mad, mad, sizeof *mad);
148
149 wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
150 wr.wr_id = (unsigned long) tmad;
151
152 gather_list.addr = dma_map_single(agent->device->dma_device,
153 tmad->mad,
154 sizeof *tmad->mad,
155 DMA_TO_DEVICE);
156 gather_list.length = sizeof *tmad->mad;
157 gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
158 pci_unmap_addr_set(tmad, mapping, gather_list.addr);
159
160 /*
161 * We rely here on the fact that MLX QPs don't use the
162 * address handle after the send is posted (this is
163 * wrong following the IB spec strictly, but we know
164 * it's OK for our devices).
165 */
166 spin_lock_irqsave(&dev->sm_lock, flags);
167 wr.wr.ud.ah = dev->sm_ah[port_num - 1];
168 if (wr.wr.ud.ah)
169 ret = ib_post_send_mad(agent, &wr, &bad_wr);
170 else
171 ret = -EINVAL;
172 spin_unlock_irqrestore(&dev->sm_lock, flags);
173
174 if (ret) {
175 dma_unmap_single(agent->device->dma_device,
176 pci_unmap_addr(tmad, mapping),
177 sizeof *tmad->mad,
178 DMA_TO_DEVICE);
179 kfree(tmad->mad);
180 kfree(tmad);
181 }
182 }
183}
184
185int mthca_process_mad(struct ib_device *ibdev,
186 int mad_flags,
187 u8 port_num,
188 struct ib_wc *in_wc,
189 struct ib_grh *in_grh,
190 struct ib_mad *in_mad,
191 struct ib_mad *out_mad)
192{
193 int err;
194 u8 status;
195 u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE;
196
197 /* Forward locally generated traps to the SM */
198 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
199 slid == 0) {
200 forward_trap(to_mdev(ibdev), port_num, in_mad);
201 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
202 }
203
204 /*
205 * Only handle SM gets, sets and trap represses for SM class
206 *
207 * Only handle PMA and Mellanox vendor-specific class gets and
208 * sets for other classes.
209 */
210 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
211 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
212 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
213 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
214 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
215 return IB_MAD_RESULT_SUCCESS;
216
217 /*
218 * Don't process SMInfo queries or vendor-specific
219 * MADs -- the SMA can't handle them.
220 */
221 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
222 ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
223 IB_SMP_ATTR_VENDOR_MASK))
224 return IB_MAD_RESULT_SUCCESS;
225 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
226 in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
227 in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
228 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
229 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
230 return IB_MAD_RESULT_SUCCESS;
231 } else
232 return IB_MAD_RESULT_SUCCESS;
233
234 err = mthca_MAD_IFC(to_mdev(ibdev),
235 mad_flags & IB_MAD_IGNORE_MKEY,
236 mad_flags & IB_MAD_IGNORE_BKEY,
237 port_num, in_wc, in_grh, in_mad, out_mad,
238 &status);
239 if (err) {
240 mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
241 return IB_MAD_RESULT_FAILURE;
242 }
243 if (status == MTHCA_CMD_STAT_BAD_PKT)
244 return IB_MAD_RESULT_SUCCESS;
245 if (status) {
246 mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
247 status);
248 return IB_MAD_RESULT_FAILURE;
249 }
250
251 if (!out_mad->mad_hdr.status)
252 smp_snoop(ibdev, port_num, in_mad);
253
254 /* set return bit in status of directed route responses */
255 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
256 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
257
258 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
259 /* no response for trap repress */
260 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
261
262 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
263}
264
265static void send_handler(struct ib_mad_agent *agent,
266 struct ib_mad_send_wc *mad_send_wc)
267{
268 struct mthca_trap_mad *tmad =
269 (void *) (unsigned long) mad_send_wc->wr_id;
270
271 dma_unmap_single(agent->device->dma_device,
272 pci_unmap_addr(tmad, mapping),
273 sizeof *tmad->mad,
274 DMA_TO_DEVICE);
275 kfree(tmad->mad);
276 kfree(tmad);
277}
278
279int mthca_create_agents(struct mthca_dev *dev)
280{
281 struct ib_mad_agent *agent;
282 int p, q;
283
284 spin_lock_init(&dev->sm_lock);
285
286 for (p = 0; p < dev->limits.num_ports; ++p)
287 for (q = 0; q <= 1; ++q) {
288 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
289 q ? IB_QPT_GSI : IB_QPT_SMI,
290 NULL, 0, send_handler,
291 NULL, NULL);
292 if (IS_ERR(agent))
293 goto err;
294 dev->send_agent[p][q] = agent;
295 }
296
297 return 0;
298
299err:
300 for (p = 0; p < dev->limits.num_ports; ++p)
301 for (q = 0; q <= 1; ++q)
302 if (dev->send_agent[p][q])
303 ib_unregister_mad_agent(dev->send_agent[p][q]);
304
305 return PTR_ERR(agent);
306}
307
308void mthca_free_agents(struct mthca_dev *dev)
309{
310 struct ib_mad_agent *agent;
311 int p, q;
312
313 for (p = 0; p < dev->limits.num_ports; ++p) {
314 for (q = 0; q <= 1; ++q) {
315 agent = dev->send_agent[p][q];
316 dev->send_agent[p][q] = NULL;
317 ib_unregister_mad_agent(agent);
318 }
319
320 if (dev->sm_ah[p])
321 ib_destroy_ah(dev->sm_ah[p]);
322 }
323}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
new file mode 100644
index 000000000000..9e782bc1c38d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -0,0 +1,1123 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
33 */
34
35#include <linux/config.h>
36#include <linux/version.h>
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/errno.h>
40#include <linux/pci.h>
41#include <linux/interrupt.h>
42
43#include "mthca_dev.h"
44#include "mthca_config_reg.h"
45#include "mthca_cmd.h"
46#include "mthca_profile.h"
47#include "mthca_memfree.h"
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
51MODULE_LICENSE("Dual BSD/GPL");
52MODULE_VERSION(DRV_VERSION);
53
54#ifdef CONFIG_PCI_MSI
55
56static int msi_x = 0;
57module_param(msi_x, int, 0444);
58MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
59
60static int msi = 0;
61module_param(msi, int, 0444);
62MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
63
64#else /* CONFIG_PCI_MSI */
65
66#define msi_x (0)
67#define msi (0)
68
69#endif /* CONFIG_PCI_MSI */
70
71static const char mthca_version[] __devinitdata =
72 "ib_mthca: Mellanox InfiniBand HCA driver v"
73 DRV_VERSION " (" DRV_RELDATE ")\n";
74
75static struct mthca_profile default_profile = {
76 .num_qp = 1 << 16,
77 .rdb_per_qp = 4,
78 .num_cq = 1 << 16,
79 .num_mcg = 1 << 13,
80 .num_mpt = 1 << 17,
81 .num_mtt = 1 << 20,
82 .num_udav = 1 << 15, /* Tavor only */
83 .uarc_size = 1 << 18, /* Arbel only */
84};
85
86static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
87{
88 int cap;
89 u16 val;
90
91 /* First try to max out Read Byte Count */
92 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
93 if (cap) {
94 if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
95 mthca_err(mdev, "Couldn't read PCI-X command register, "
96 "aborting.\n");
97 return -ENODEV;
98 }
99 val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
100 if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
101 mthca_err(mdev, "Couldn't write PCI-X command register, "
102 "aborting.\n");
103 return -ENODEV;
104 }
105 } else if (mdev->hca_type == TAVOR)
106 mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
107
108 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
109 if (cap) {
110 if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
111 mthca_err(mdev, "Couldn't read PCI Express device control "
112 "register, aborting.\n");
113 return -ENODEV;
114 }
115 val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
116 if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
117 mthca_err(mdev, "Couldn't write PCI Express device control "
118 "register, aborting.\n");
119 return -ENODEV;
120 }
121 } else if (mdev->hca_type == ARBEL_NATIVE ||
122 mdev->hca_type == ARBEL_COMPAT)
123 mthca_info(mdev, "No PCI Express capability, "
124 "not setting Max Read Request Size.\n");
125
126 return 0;
127}
128
129static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
130{
131 int err;
132 u8 status;
133
134 err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
135 if (err) {
136 mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
137 return err;
138 }
139 if (status) {
140 mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
141 "aborting.\n", status);
142 return -EINVAL;
143 }
144 if (dev_lim->min_page_sz > PAGE_SIZE) {
145 mthca_err(mdev, "HCA minimum page size of %d bigger than "
146 "kernel PAGE_SIZE of %ld, aborting.\n",
147 dev_lim->min_page_sz, PAGE_SIZE);
148 return -ENODEV;
149 }
150 if (dev_lim->num_ports > MTHCA_MAX_PORTS) {
151 mthca_err(mdev, "HCA has %d ports, but we only support %d, "
152 "aborting.\n",
153 dev_lim->num_ports, MTHCA_MAX_PORTS);
154 return -ENODEV;
155 }
156
157 mdev->limits.num_ports = dev_lim->num_ports;
158 mdev->limits.vl_cap = dev_lim->max_vl;
159 mdev->limits.mtu_cap = dev_lim->max_mtu;
160 mdev->limits.gid_table_len = dev_lim->max_gids;
161 mdev->limits.pkey_table_len = dev_lim->max_pkeys;
162 mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
163 mdev->limits.max_sg = dev_lim->max_sg;
164 mdev->limits.reserved_qps = dev_lim->reserved_qps;
165 mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
166 mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
167 mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
168 mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
169 mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
170 mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
171 mdev->limits.reserved_uars = dev_lim->reserved_uars;
172 mdev->limits.reserved_pds = dev_lim->reserved_pds;
173
174 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
175 May be doable since hardware supports it for SRQ.
176
177 IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver.
178
179 IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not
180 supported by driver. */
181 mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
182 IB_DEVICE_PORT_ACTIVE_EVENT |
183 IB_DEVICE_SYS_IMAGE_GUID |
184 IB_DEVICE_RC_RNR_NAK_GEN;
185
186 if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR)
187 mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
188
189 if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR)
190 mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
191
192 if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI)
193 mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI;
194
195 if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG)
196 mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
197
198 if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE)
199 mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
200
201 if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
202 mdev->mthca_flags |= MTHCA_FLAG_SRQ;
203
204 return 0;
205}
206
207static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
208{
209 u8 status;
210 int err;
211 struct mthca_dev_lim dev_lim;
212 struct mthca_profile profile;
213 struct mthca_init_hca_param init_hca;
214 struct mthca_adapter adapter;
215
216 err = mthca_SYS_EN(mdev, &status);
217 if (err) {
218 mthca_err(mdev, "SYS_EN command failed, aborting.\n");
219 return err;
220 }
221 if (status) {
222 mthca_err(mdev, "SYS_EN returned status 0x%02x, "
223 "aborting.\n", status);
224 return -EINVAL;
225 }
226
227 err = mthca_QUERY_FW(mdev, &status);
228 if (err) {
229 mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
230 goto err_disable;
231 }
232 if (status) {
233 mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
234 "aborting.\n", status);
235 err = -EINVAL;
236 goto err_disable;
237 }
238 err = mthca_QUERY_DDR(mdev, &status);
239 if (err) {
240 mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
241 goto err_disable;
242 }
243 if (status) {
244 mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
245 "aborting.\n", status);
246 err = -EINVAL;
247 goto err_disable;
248 }
249
250 err = mthca_dev_lim(mdev, &dev_lim);
251
252 profile = default_profile;
253 profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
254 profile.uarc_size = 0;
255
256 err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
257 if (err < 0)
258 goto err_disable;
259
260 err = mthca_INIT_HCA(mdev, &init_hca, &status);
261 if (err) {
262 mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
263 goto err_disable;
264 }
265 if (status) {
266 mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
267 "aborting.\n", status);
268 err = -EINVAL;
269 goto err_disable;
270 }
271
272 err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
273 if (err) {
274 mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
275 goto err_close;
276 }
277 if (status) {
278 mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
279 "aborting.\n", status);
280 err = -EINVAL;
281 goto err_close;
282 }
283
284 mdev->eq_table.inta_pin = adapter.inta_pin;
285 mdev->rev_id = adapter.revision_id;
286
287 return 0;
288
289err_close:
290 mthca_CLOSE_HCA(mdev, 0, &status);
291
292err_disable:
293 mthca_SYS_DIS(mdev, &status);
294
295 return err;
296}
297
298static int __devinit mthca_load_fw(struct mthca_dev *mdev)
299{
300 u8 status;
301 int err;
302
303 /* FIXME: use HCA-attached memory for FW if present */
304
305 mdev->fw.arbel.fw_icm =
306 mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
307 GFP_HIGHUSER | __GFP_NOWARN);
308 if (!mdev->fw.arbel.fw_icm) {
309 mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
310 return -ENOMEM;
311 }
312
313 err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
314 if (err) {
315 mthca_err(mdev, "MAP_FA command failed, aborting.\n");
316 goto err_free;
317 }
318 if (status) {
319 mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
320 err = -EINVAL;
321 goto err_free;
322 }
323 err = mthca_RUN_FW(mdev, &status);
324 if (err) {
325 mthca_err(mdev, "RUN_FW command failed, aborting.\n");
326 goto err_unmap_fa;
327 }
328 if (status) {
329 mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
330 err = -EINVAL;
331 goto err_unmap_fa;
332 }
333
334 return 0;
335
336err_unmap_fa:
337 mthca_UNMAP_FA(mdev, &status);
338
339err_free:
340 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
341 return err;
342}
343
344static int __devinit mthca_init_icm(struct mthca_dev *mdev,
345 struct mthca_dev_lim *dev_lim,
346 struct mthca_init_hca_param *init_hca,
347 u64 icm_size)
348{
349 u64 aux_pages;
350 u8 status;
351 int err;
352
353 err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
354 if (err) {
355 mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
356 return err;
357 }
358 if (status) {
359 mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
360 "aborting.\n", status);
361 return -EINVAL;
362 }
363
364 mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
365 (unsigned long long) icm_size >> 10,
366 (unsigned long long) aux_pages << 2);
367
368 mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
369 GFP_HIGHUSER | __GFP_NOWARN);
370 if (!mdev->fw.arbel.aux_icm) {
371 mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
372 return -ENOMEM;
373 }
374
375 err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
376 if (err) {
377 mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
378 goto err_free_aux;
379 }
380 if (status) {
381 mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
382 err = -EINVAL;
383 goto err_free_aux;
384 }
385
386 err = mthca_map_eq_icm(mdev, init_hca->eqc_base);
387 if (err) {
388 mthca_err(mdev, "Failed to map EQ context memory, aborting.\n");
389 goto err_unmap_aux;
390 }
391
392 mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
393 init_hca->mtt_seg_sz,
394 mdev->limits.num_mtt_segs,
395 mdev->limits.reserved_mtts, 1);
396 if (!mdev->mr_table.mtt_table) {
397 mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
398 err = -ENOMEM;
399 goto err_unmap_eq;
400 }
401
402 mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
403 dev_lim->mpt_entry_sz,
404 mdev->limits.num_mpts,
405 mdev->limits.reserved_mrws, 1);
406 if (!mdev->mr_table.mpt_table) {
407 mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
408 err = -ENOMEM;
409 goto err_unmap_mtt;
410 }
411
412 mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
413 dev_lim->qpc_entry_sz,
414 mdev->limits.num_qps,
415 mdev->limits.reserved_qps, 0);
416 if (!mdev->qp_table.qp_table) {
417 mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
418 err = -ENOMEM;
419 goto err_unmap_mpt;
420 }
421
422 mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
423 dev_lim->eqpc_entry_sz,
424 mdev->limits.num_qps,
425 mdev->limits.reserved_qps, 0);
426 if (!mdev->qp_table.eqp_table) {
427 mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
428 err = -ENOMEM;
429 goto err_unmap_qp;
430 }
431
432 mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
433 dev_lim->cqc_entry_sz,
434 mdev->limits.num_cqs,
435 mdev->limits.reserved_cqs, 0);
436 if (!mdev->cq_table.table) {
437 mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
438 err = -ENOMEM;
439 goto err_unmap_eqp;
440 }
441
442 /*
443 * It's not strictly required, but for simplicity just map the
444 * whole multicast group table now. The table isn't very big
445 * and it's a lot easier than trying to track ref counts.
446 */
447 mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base,
448 MTHCA_MGM_ENTRY_SIZE,
449 mdev->limits.num_mgms +
450 mdev->limits.num_amgms,
451 mdev->limits.num_mgms +
452 mdev->limits.num_amgms,
453 0);
454 if (!mdev->mcg_table.table) {
455 mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
456 err = -ENOMEM;
457 goto err_unmap_cq;
458 }
459
460 return 0;
461
462err_unmap_cq:
463 mthca_free_icm_table(mdev, mdev->cq_table.table);
464
465err_unmap_eqp:
466 mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
467
468err_unmap_qp:
469 mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
470
471err_unmap_mpt:
472 mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
473
474err_unmap_mtt:
475 mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
476
477err_unmap_eq:
478 mthca_unmap_eq_icm(mdev);
479
480err_unmap_aux:
481 mthca_UNMAP_ICM_AUX(mdev, &status);
482
483err_free_aux:
484 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
485
486 return err;
487}
488
489static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
490{
491 struct mthca_dev_lim dev_lim;
492 struct mthca_profile profile;
493 struct mthca_init_hca_param init_hca;
494 struct mthca_adapter adapter;
495 u64 icm_size;
496 u8 status;
497 int err;
498
499 err = mthca_QUERY_FW(mdev, &status);
500 if (err) {
501 mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
502 return err;
503 }
504 if (status) {
505 mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
506 "aborting.\n", status);
507 return -EINVAL;
508 }
509
510 err = mthca_ENABLE_LAM(mdev, &status);
511 if (err) {
512 mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
513 return err;
514 }
515 if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
516 mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
517 mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
518 } else if (status) {
519 mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
520 "aborting.\n", status);
521 return -EINVAL;
522 }
523
524 err = mthca_load_fw(mdev);
525 if (err) {
526 mthca_err(mdev, "Failed to start FW, aborting.\n");
527 goto err_disable;
528 }
529
530 err = mthca_dev_lim(mdev, &dev_lim);
531 if (err) {
532 mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
533 goto err_stop_fw;
534 }
535
536 profile = default_profile;
537 profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
538 profile.num_udav = 0;
539
540 icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
541 if ((int) icm_size < 0) {
542 err = icm_size;
543 goto err_stop_fw;
544 }
545
546 err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size);
547 if (err)
548 goto err_stop_fw;
549
550 err = mthca_INIT_HCA(mdev, &init_hca, &status);
551 if (err) {
552 mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
553 goto err_free_icm;
554 }
555 if (status) {
556 mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
557 "aborting.\n", status);
558 err = -EINVAL;
559 goto err_free_icm;
560 }
561
562 err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
563 if (err) {
564 mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
565 goto err_free_icm;
566 }
567 if (status) {
568 mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
569 "aborting.\n", status);
570 err = -EINVAL;
571 goto err_free_icm;
572 }
573
574 mdev->eq_table.inta_pin = adapter.inta_pin;
575 mdev->rev_id = adapter.revision_id;
576
577 return 0;
578
579err_free_icm:
580 mthca_free_icm_table(mdev, mdev->cq_table.table);
581 mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
582 mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
583 mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
584 mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
585 mthca_unmap_eq_icm(mdev);
586
587 mthca_UNMAP_ICM_AUX(mdev, &status);
588 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
589
590err_stop_fw:
591 mthca_UNMAP_FA(mdev, &status);
592 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
593
594err_disable:
595 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
596 mthca_DISABLE_LAM(mdev, &status);
597
598 return err;
599}
600
601static int __devinit mthca_init_hca(struct mthca_dev *mdev)
602{
603 if (mdev->hca_type == ARBEL_NATIVE)
604 return mthca_init_arbel(mdev);
605 else
606 return mthca_init_tavor(mdev);
607}
608
609static int __devinit mthca_setup_hca(struct mthca_dev *dev)
610{
611 int err;
612 u8 status;
613
614 MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
615
616 err = mthca_init_uar_table(dev);
617 if (err) {
618 mthca_err(dev, "Failed to initialize "
619 "user access region table, aborting.\n");
620 return err;
621 }
622
623 err = mthca_uar_alloc(dev, &dev->driver_uar);
624 if (err) {
625 mthca_err(dev, "Failed to allocate driver access region, "
626 "aborting.\n");
627 goto err_uar_table_free;
628 }
629
630 dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
631 if (!dev->kar) {
632 mthca_err(dev, "Couldn't map kernel access region, "
633 "aborting.\n");
634 err = -ENOMEM;
635 goto err_uar_free;
636 }
637
638 err = mthca_init_pd_table(dev);
639 if (err) {
640 mthca_err(dev, "Failed to initialize "
641 "protection domain table, aborting.\n");
642 goto err_kar_unmap;
643 }
644
645 err = mthca_init_mr_table(dev);
646 if (err) {
647 mthca_err(dev, "Failed to initialize "
648 "memory region table, aborting.\n");
649 goto err_pd_table_free;
650 }
651
652 err = mthca_pd_alloc(dev, &dev->driver_pd);
653 if (err) {
654 mthca_err(dev, "Failed to create driver PD, "
655 "aborting.\n");
656 goto err_mr_table_free;
657 }
658
659 err = mthca_init_eq_table(dev);
660 if (err) {
661 mthca_err(dev, "Failed to initialize "
662 "event queue table, aborting.\n");
663 goto err_pd_free;
664 }
665
666 err = mthca_cmd_use_events(dev);
667 if (err) {
668 mthca_err(dev, "Failed to switch to event-driven "
669 "firmware commands, aborting.\n");
670 goto err_eq_table_free;
671 }
672
673 err = mthca_NOP(dev, &status);
674 if (err || status) {
675 mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n");
676 if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
677 mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
678 else
679 mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
680
681 goto err_cmd_poll;
682 }
683
684 mthca_dbg(dev, "NOP command IRQ test passed\n");
685
686 err = mthca_init_cq_table(dev);
687 if (err) {
688 mthca_err(dev, "Failed to initialize "
689 "completion queue table, aborting.\n");
690 goto err_cmd_poll;
691 }
692
693 err = mthca_init_qp_table(dev);
694 if (err) {
695 mthca_err(dev, "Failed to initialize "
696 "queue pair table, aborting.\n");
697 goto err_cq_table_free;
698 }
699
700 err = mthca_init_av_table(dev);
701 if (err) {
702 mthca_err(dev, "Failed to initialize "
703 "address vector table, aborting.\n");
704 goto err_qp_table_free;
705 }
706
707 err = mthca_init_mcg_table(dev);
708 if (err) {
709 mthca_err(dev, "Failed to initialize "
710 "multicast group table, aborting.\n");
711 goto err_av_table_free;
712 }
713
714 return 0;
715
716err_av_table_free:
717 mthca_cleanup_av_table(dev);
718
719err_qp_table_free:
720 mthca_cleanup_qp_table(dev);
721
722err_cq_table_free:
723 mthca_cleanup_cq_table(dev);
724
725err_cmd_poll:
726 mthca_cmd_use_polling(dev);
727
728err_eq_table_free:
729 mthca_cleanup_eq_table(dev);
730
731err_pd_free:
732 mthca_pd_free(dev, &dev->driver_pd);
733
734err_mr_table_free:
735 mthca_cleanup_mr_table(dev);
736
737err_pd_table_free:
738 mthca_cleanup_pd_table(dev);
739
740err_kar_unmap:
741 iounmap(dev->kar);
742
743err_uar_free:
744 mthca_uar_free(dev, &dev->driver_uar);
745
746err_uar_table_free:
747 mthca_cleanup_uar_table(dev);
748 return err;
749}
750
751static int __devinit mthca_request_regions(struct pci_dev *pdev,
752 int ddr_hidden)
753{
754 int err;
755
756 /*
757 * We can't just use pci_request_regions() because the MSI-X
758 * table is right in the middle of the first BAR. If we did
759 * pci_request_region and grab all of the first BAR, then
760 * setting up MSI-X would fail, since the PCI core wants to do
761 * request_mem_region on the MSI-X vector table.
762 *
763 * So just request what we need right now, and request any
764 * other regions we need when setting up EQs.
765 */
766 if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
767 MTHCA_HCR_SIZE, DRV_NAME))
768 return -EBUSY;
769
770 err = pci_request_region(pdev, 2, DRV_NAME);
771 if (err)
772 goto err_bar2_failed;
773
774 if (!ddr_hidden) {
775 err = pci_request_region(pdev, 4, DRV_NAME);
776 if (err)
777 goto err_bar4_failed;
778 }
779
780 return 0;
781
782err_bar4_failed:
783 pci_release_region(pdev, 2);
784
785err_bar2_failed:
786 release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
787 MTHCA_HCR_SIZE);
788
789 return err;
790}
791
792static void mthca_release_regions(struct pci_dev *pdev,
793 int ddr_hidden)
794{
795 if (!ddr_hidden)
796 pci_release_region(pdev, 4);
797
798 pci_release_region(pdev, 2);
799
800 release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
801 MTHCA_HCR_SIZE);
802}
803
804static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
805{
806 struct msix_entry entries[3];
807 int err;
808
809 entries[0].entry = 0;
810 entries[1].entry = 1;
811 entries[2].entry = 2;
812
813 err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
814 if (err) {
815 if (err > 0)
816 mthca_info(mdev, "Only %d MSI-X vectors available, "
817 "not using MSI-X\n", err);
818 return err;
819 }
820
821 mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
822 mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
823 mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
824
825 return 0;
826}
827
828static void mthca_close_hca(struct mthca_dev *mdev)
829{
830 u8 status;
831
832 mthca_CLOSE_HCA(mdev, 0, &status);
833
834 if (mdev->hca_type == ARBEL_NATIVE) {
835 mthca_free_icm_table(mdev, mdev->cq_table.table);
836 mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
837 mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
838 mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
839 mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
840 mthca_unmap_eq_icm(mdev);
841
842 mthca_UNMAP_ICM_AUX(mdev, &status);
843 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
844
845 mthca_UNMAP_FA(mdev, &status);
846 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
847
848 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
849 mthca_DISABLE_LAM(mdev, &status);
850 } else
851 mthca_SYS_DIS(mdev, &status);
852}
853
854static int __devinit mthca_init_one(struct pci_dev *pdev,
855 const struct pci_device_id *id)
856{
857 static int mthca_version_printed = 0;
858 static int mthca_memfree_warned = 0;
859 int ddr_hidden = 0;
860 int err;
861 struct mthca_dev *mdev;
862
863 if (!mthca_version_printed) {
864 printk(KERN_INFO "%s", mthca_version);
865 ++mthca_version_printed;
866 }
867
868 printk(KERN_INFO PFX "Initializing %s (%s)\n",
869 pci_pretty_name(pdev), pci_name(pdev));
870
871 err = pci_enable_device(pdev);
872 if (err) {
873 dev_err(&pdev->dev, "Cannot enable PCI device, "
874 "aborting.\n");
875 return err;
876 }
877
878 /*
879 * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
880 * be present)
881 */
882 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
883 pci_resource_len(pdev, 0) != 1 << 20) {
884 dev_err(&pdev->dev, "Missing DCS, aborting.");
885 err = -ENODEV;
886 goto err_disable_pdev;
887 }
888 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
889 pci_resource_len(pdev, 2) != 1 << 23) {
890 dev_err(&pdev->dev, "Missing UAR, aborting.");
891 err = -ENODEV;
892 goto err_disable_pdev;
893 }
894 if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
895 ddr_hidden = 1;
896
897 err = mthca_request_regions(pdev, ddr_hidden);
898 if (err) {
899 dev_err(&pdev->dev, "Cannot obtain PCI resources, "
900 "aborting.\n");
901 goto err_disable_pdev;
902 }
903
904 pci_set_master(pdev);
905
906 err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
907 if (err) {
908 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
909 err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
910 if (err) {
911 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
912 goto err_free_res;
913 }
914 }
915 err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
916 if (err) {
917 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
918 "consistent PCI DMA mask.\n");
919 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
920 if (err) {
921 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
922 "aborting.\n");
923 goto err_free_res;
924 }
925 }
926
927 mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
928 if (!mdev) {
929 dev_err(&pdev->dev, "Device struct alloc failed, "
930 "aborting.\n");
931 err = -ENOMEM;
932 goto err_free_res;
933 }
934
935 mdev->pdev = pdev;
936 mdev->hca_type = id->driver_data;
937
938 if (mdev->hca_type == ARBEL_NATIVE && !mthca_memfree_warned++)
939 mthca_warn(mdev, "Warning: native MT25208 mode support is incomplete. "
940 "Your HCA may not work properly.\n");
941
942 if (ddr_hidden)
943 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
944
945 /*
946 * Now reset the HCA before we touch the PCI capabilities or
947 * attempt a firmware command, since a boot ROM may have left
948 * the HCA in an undefined state.
949 */
950 err = mthca_reset(mdev);
951 if (err) {
952 mthca_err(mdev, "Failed to reset HCA, aborting.\n");
953 goto err_free_dev;
954 }
955
956 if (msi_x && !mthca_enable_msi_x(mdev))
957 mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
958 if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
959 !pci_enable_msi(pdev))
960 mdev->mthca_flags |= MTHCA_FLAG_MSI;
961
962 sema_init(&mdev->cmd.hcr_sem, 1);
963 sema_init(&mdev->cmd.poll_sem, 1);
964 mdev->cmd.use_events = 0;
965
966 mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
967 if (!mdev->hcr) {
968 mthca_err(mdev, "Couldn't map command register, "
969 "aborting.\n");
970 err = -ENOMEM;
971 goto err_free_dev;
972 }
973
974 err = mthca_tune_pci(mdev);
975 if (err)
976 goto err_iounmap;
977
978 err = mthca_init_hca(mdev);
979 if (err)
980 goto err_iounmap;
981
982 err = mthca_setup_hca(mdev);
983 if (err)
984 goto err_close;
985
986 err = mthca_register_device(mdev);
987 if (err)
988 goto err_cleanup;
989
990 err = mthca_create_agents(mdev);
991 if (err)
992 goto err_unregister;
993
994 pci_set_drvdata(pdev, mdev);
995
996 return 0;
997
998err_unregister:
999 mthca_unregister_device(mdev);
1000
1001err_cleanup:
1002 mthca_cleanup_mcg_table(mdev);
1003 mthca_cleanup_av_table(mdev);
1004 mthca_cleanup_qp_table(mdev);
1005 mthca_cleanup_cq_table(mdev);
1006 mthca_cmd_use_polling(mdev);
1007 mthca_cleanup_eq_table(mdev);
1008
1009 mthca_pd_free(mdev, &mdev->driver_pd);
1010
1011 mthca_cleanup_mr_table(mdev);
1012 mthca_cleanup_pd_table(mdev);
1013 mthca_cleanup_uar_table(mdev);
1014
1015err_close:
1016 mthca_close_hca(mdev);
1017
1018err_iounmap:
1019 iounmap(mdev->hcr);
1020
1021err_free_dev:
1022 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1023 pci_disable_msix(pdev);
1024 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1025 pci_disable_msi(pdev);
1026
1027 ib_dealloc_device(&mdev->ib_dev);
1028
1029err_free_res:
1030 mthca_release_regions(pdev, ddr_hidden);
1031
1032err_disable_pdev:
1033 pci_disable_device(pdev);
1034 pci_set_drvdata(pdev, NULL);
1035 return err;
1036}
1037
1038static void __devexit mthca_remove_one(struct pci_dev *pdev)
1039{
1040 struct mthca_dev *mdev = pci_get_drvdata(pdev);
1041 u8 status;
1042 int p;
1043
1044 if (mdev) {
1045 mthca_free_agents(mdev);
1046 mthca_unregister_device(mdev);
1047
1048 for (p = 1; p <= mdev->limits.num_ports; ++p)
1049 mthca_CLOSE_IB(mdev, p, &status);
1050
1051 mthca_cleanup_mcg_table(mdev);
1052 mthca_cleanup_av_table(mdev);
1053 mthca_cleanup_qp_table(mdev);
1054 mthca_cleanup_cq_table(mdev);
1055 mthca_cmd_use_polling(mdev);
1056 mthca_cleanup_eq_table(mdev);
1057
1058 mthca_pd_free(mdev, &mdev->driver_pd);
1059
1060 mthca_cleanup_mr_table(mdev);
1061 mthca_cleanup_pd_table(mdev);
1062
1063 iounmap(mdev->kar);
1064 mthca_uar_free(mdev, &mdev->driver_uar);
1065 mthca_cleanup_uar_table(mdev);
1066
1067 mthca_close_hca(mdev);
1068
1069 iounmap(mdev->hcr);
1070
1071 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1072 pci_disable_msix(pdev);
1073 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1074 pci_disable_msi(pdev);
1075
1076 ib_dealloc_device(&mdev->ib_dev);
1077 mthca_release_regions(pdev, mdev->mthca_flags &
1078 MTHCA_FLAG_DDR_HIDDEN);
1079 pci_disable_device(pdev);
1080 pci_set_drvdata(pdev, NULL);
1081 }
1082}
1083
1084static struct pci_device_id mthca_pci_table[] = {
1085 { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
1086 .driver_data = TAVOR },
1087 { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
1088 .driver_data = TAVOR },
1089 { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
1090 .driver_data = ARBEL_COMPAT },
1091 { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
1092 .driver_data = ARBEL_COMPAT },
1093 { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL),
1094 .driver_data = ARBEL_NATIVE },
1095 { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
1096 .driver_data = ARBEL_NATIVE },
1097 { 0, }
1098};
1099
1100MODULE_DEVICE_TABLE(pci, mthca_pci_table);
1101
1102static struct pci_driver mthca_driver = {
1103 .name = "ib_mthca",
1104 .id_table = mthca_pci_table,
1105 .probe = mthca_init_one,
1106 .remove = __devexit_p(mthca_remove_one)
1107};
1108
1109static int __init mthca_init(void)
1110{
1111 int ret;
1112
1113 ret = pci_register_driver(&mthca_driver);
1114 return ret < 0 ? ret : 0;
1115}
1116
1117static void __exit mthca_cleanup(void)
1118{
1119 pci_unregister_driver(&mthca_driver);
1120}
1121
1122module_init(mthca_init);
1123module_exit(mthca_cleanup);
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
new file mode 100644
index 000000000000..70a6553a588e
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -0,0 +1,376 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/init.h>
36
37#include "mthca_dev.h"
38#include "mthca_cmd.h"
39
40enum {
41 MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
42};
43
44struct mthca_mgm {
45 u32 next_gid_index;
46 u32 reserved[3];
47 u8 gid[16];
48 u32 qp[MTHCA_QP_PER_MGM];
49};
50
51static const u8 zero_gid[16]; /* automatically initialized to 0 */
52
53/*
54 * Caller must hold MCG table semaphore. gid and mgm parameters must
55 * be properly aligned for command interface.
56 *
57 * Returns 0 unless a firmware command error occurs.
58 *
59 * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
60 * and *mgm holds MGM entry.
61 *
62 * if GID is found in AMGM, *index = index in AMGM, *prev = index of
63 * previous entry in hash chain and *mgm holds AMGM entry.
64 *
65 * If no AMGM exists for given gid, *index = -1, *prev = index of last
66 * entry in hash chain and *mgm holds end of hash chain.
67 */
68static int find_mgm(struct mthca_dev *dev,
69 u8 *gid, struct mthca_mgm *mgm,
70 u16 *hash, int *prev, int *index)
71{
72 void *mailbox;
73 u8 *mgid;
74 int err;
75 u8 status;
76
77 mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
78 if (!mailbox)
79 return -ENOMEM;
80 mgid = MAILBOX_ALIGN(mailbox);
81
82 memcpy(mgid, gid, 16);
83
84 err = mthca_MGID_HASH(dev, mgid, hash, &status);
85 if (err)
86 goto out;
87 if (status) {
88 mthca_err(dev, "MGID_HASH returned status %02x\n", status);
89 err = -EINVAL;
90 goto out;
91 }
92
93 if (0)
94 mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
95 "%04x:%04x:%04x:%04x is %04x\n",
96 be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]),
97 be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]),
98 be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]),
99 be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]),
100 *hash);
101
102 *index = *hash;
103 *prev = -1;
104
105 do {
106 err = mthca_READ_MGM(dev, *index, mgm, &status);
107 if (err)
108 goto out;
109 if (status) {
110 mthca_err(dev, "READ_MGM returned status %02x\n", status);
111 return -EINVAL;
112 }
113
114 if (!memcmp(mgm->gid, zero_gid, 16)) {
115 if (*index != *hash) {
116 mthca_err(dev, "Found zero MGID in AMGM.\n");
117 err = -EINVAL;
118 }
119 goto out;
120 }
121
122 if (!memcmp(mgm->gid, gid, 16))
123 goto out;
124
125 *prev = *index;
126 *index = be32_to_cpu(mgm->next_gid_index) >> 5;
127 } while (*index);
128
129 *index = -1;
130
131 out:
132 kfree(mailbox);
133 return err;
134}
135
136int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
137{
138 struct mthca_dev *dev = to_mdev(ibqp->device);
139 void *mailbox;
140 struct mthca_mgm *mgm;
141 u16 hash;
142 int index, prev;
143 int link = 0;
144 int i;
145 int err;
146 u8 status;
147
148 mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
149 if (!mailbox)
150 return -ENOMEM;
151 mgm = MAILBOX_ALIGN(mailbox);
152
153 if (down_interruptible(&dev->mcg_table.sem))
154 return -EINTR;
155
156 err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
157 if (err)
158 goto out;
159
160 if (index != -1) {
161 if (!memcmp(mgm->gid, zero_gid, 16))
162 memcpy(mgm->gid, gid->raw, 16);
163 } else {
164 link = 1;
165
166 index = mthca_alloc(&dev->mcg_table.alloc);
167 if (index == -1) {
168 mthca_err(dev, "No AMGM entries left\n");
169 err = -ENOMEM;
170 goto out;
171 }
172
173 err = mthca_READ_MGM(dev, index, mgm, &status);
174 if (err)
175 goto out;
176 if (status) {
177 mthca_err(dev, "READ_MGM returned status %02x\n", status);
178 err = -EINVAL;
179 goto out;
180 }
181
182 memcpy(mgm->gid, gid->raw, 16);
183 mgm->next_gid_index = 0;
184 }
185
186 for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
187 if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
188 mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
189 break;
190 }
191
192 if (i == MTHCA_QP_PER_MGM) {
193 mthca_err(dev, "MGM at index %x is full.\n", index);
194 err = -ENOMEM;
195 goto out;
196 }
197
198 err = mthca_WRITE_MGM(dev, index, mgm, &status);
199 if (err)
200 goto out;
201 if (status) {
202 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
203 err = -EINVAL;
204 }
205
206 if (!link)
207 goto out;
208
209 err = mthca_READ_MGM(dev, prev, mgm, &status);
210 if (err)
211 goto out;
212 if (status) {
213 mthca_err(dev, "READ_MGM returned status %02x\n", status);
214 err = -EINVAL;
215 goto out;
216 }
217
218 mgm->next_gid_index = cpu_to_be32(index << 5);
219
220 err = mthca_WRITE_MGM(dev, prev, mgm, &status);
221 if (err)
222 goto out;
223 if (status) {
224 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
225 err = -EINVAL;
226 }
227
228 out:
229 up(&dev->mcg_table.sem);
230 kfree(mailbox);
231 return err;
232}
233
234int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
235{
236 struct mthca_dev *dev = to_mdev(ibqp->device);
237 void *mailbox;
238 struct mthca_mgm *mgm;
239 u16 hash;
240 int prev, index;
241 int i, loc;
242 int err;
243 u8 status;
244
245 mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
246 if (!mailbox)
247 return -ENOMEM;
248 mgm = MAILBOX_ALIGN(mailbox);
249
250 if (down_interruptible(&dev->mcg_table.sem))
251 return -EINTR;
252
253 err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
254 if (err)
255 goto out;
256
257 if (index == -1) {
258 mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
259 "not found\n",
260 be16_to_cpu(((u16 *) gid->raw)[0]),
261 be16_to_cpu(((u16 *) gid->raw)[1]),
262 be16_to_cpu(((u16 *) gid->raw)[2]),
263 be16_to_cpu(((u16 *) gid->raw)[3]),
264 be16_to_cpu(((u16 *) gid->raw)[4]),
265 be16_to_cpu(((u16 *) gid->raw)[5]),
266 be16_to_cpu(((u16 *) gid->raw)[6]),
267 be16_to_cpu(((u16 *) gid->raw)[7]));
268 err = -EINVAL;
269 goto out;
270 }
271
272 for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) {
273 if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31)))
274 loc = i;
275 if (!(mgm->qp[i] & cpu_to_be32(1 << 31)))
276 break;
277 }
278
279 if (loc == -1) {
280 mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num);
281 err = -EINVAL;
282 goto out;
283 }
284
285 mgm->qp[loc] = mgm->qp[i - 1];
286 mgm->qp[i - 1] = 0;
287
288 err = mthca_WRITE_MGM(dev, index, mgm, &status);
289 if (err)
290 goto out;
291 if (status) {
292 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
293 err = -EINVAL;
294 goto out;
295 }
296
297 if (i != 1)
298 goto out;
299
300 goto out;
301
302 if (prev == -1) {
303 /* Remove entry from MGM */
304 if (be32_to_cpu(mgm->next_gid_index) >> 5) {
305 err = mthca_READ_MGM(dev,
306 be32_to_cpu(mgm->next_gid_index) >> 5,
307 mgm, &status);
308 if (err)
309 goto out;
310 if (status) {
311 mthca_err(dev, "READ_MGM returned status %02x\n",
312 status);
313 err = -EINVAL;
314 goto out;
315 }
316 } else
317 memset(mgm->gid, 0, 16);
318
319 err = mthca_WRITE_MGM(dev, index, mgm, &status);
320 if (err)
321 goto out;
322 if (status) {
323 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
324 err = -EINVAL;
325 goto out;
326 }
327 } else {
328 /* Remove entry from AMGM */
329 index = be32_to_cpu(mgm->next_gid_index) >> 5;
330 err = mthca_READ_MGM(dev, prev, mgm, &status);
331 if (err)
332 goto out;
333 if (status) {
334 mthca_err(dev, "READ_MGM returned status %02x\n", status);
335 err = -EINVAL;
336 goto out;
337 }
338
339 mgm->next_gid_index = cpu_to_be32(index << 5);
340
341 err = mthca_WRITE_MGM(dev, prev, mgm, &status);
342 if (err)
343 goto out;
344 if (status) {
345 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
346 err = -EINVAL;
347 goto out;
348 }
349 }
350
351 out:
352 up(&dev->mcg_table.sem);
353 kfree(mailbox);
354 return err;
355}
356
357int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
358{
359 int err;
360
361 err = mthca_alloc_init(&dev->mcg_table.alloc,
362 dev->limits.num_amgms,
363 dev->limits.num_amgms - 1,
364 0);
365 if (err)
366 return err;
367
368 init_MUTEX(&dev->mcg_table.sem);
369
370 return 0;
371}
372
373void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev)
374{
375 mthca_alloc_cleanup(&dev->mcg_table.alloc);
376}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
new file mode 100644
index 000000000000..7730b5960616
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -0,0 +1,465 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include "mthca_memfree.h"
36#include "mthca_dev.h"
37#include "mthca_cmd.h"
38
39/*
40 * We allocate in as big chunks as we can, up to a maximum of 256 KB
41 * per chunk.
42 */
43enum {
44 MTHCA_ICM_ALLOC_SIZE = 1 << 18,
45 MTHCA_TABLE_CHUNK_SIZE = 1 << 18
46};
47
48void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
49{
50 struct mthca_icm_chunk *chunk, *tmp;
51 int i;
52
53 if (!icm)
54 return;
55
56 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
57 if (chunk->nsg > 0)
58 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
59 PCI_DMA_BIDIRECTIONAL);
60
61 for (i = 0; i < chunk->npages; ++i)
62 __free_pages(chunk->mem[i].page,
63 get_order(chunk->mem[i].length));
64
65 kfree(chunk);
66 }
67
68 kfree(icm);
69}
70
71struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
72 unsigned int gfp_mask)
73{
74 struct mthca_icm *icm;
75 struct mthca_icm_chunk *chunk = NULL;
76 int cur_order;
77
78 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
79 if (!icm)
80 return icm;
81
82 icm->refcount = 0;
83 INIT_LIST_HEAD(&icm->chunk_list);
84
85 cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
86
87 while (npages > 0) {
88 if (!chunk) {
89 chunk = kmalloc(sizeof *chunk,
90 gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
91 if (!chunk)
92 goto fail;
93
94 chunk->npages = 0;
95 chunk->nsg = 0;
96 list_add_tail(&chunk->list, &icm->chunk_list);
97 }
98
99 while (1 << cur_order > npages)
100 --cur_order;
101
102 chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
103 if (chunk->mem[chunk->npages].page) {
104 chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
105 chunk->mem[chunk->npages].offset = 0;
106
107 if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
108 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
109 chunk->npages,
110 PCI_DMA_BIDIRECTIONAL);
111
112 if (chunk->nsg <= 0)
113 goto fail;
114
115 chunk = NULL;
116 }
117
118 npages -= 1 << cur_order;
119 } else {
120 --cur_order;
121 if (cur_order < 0)
122 goto fail;
123 }
124 }
125
126 if (chunk) {
127 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
128 chunk->npages,
129 PCI_DMA_BIDIRECTIONAL);
130
131 if (chunk->nsg <= 0)
132 goto fail;
133 }
134
135 return icm;
136
137fail:
138 mthca_free_icm(dev, icm);
139 return NULL;
140}
141
142int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
143{
144 int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
145 int ret = 0;
146 u8 status;
147
148 down(&table->mutex);
149
150 if (table->icm[i]) {
151 ++table->icm[i]->refcount;
152 goto out;
153 }
154
155 table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
156 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
157 __GFP_NOWARN);
158 if (!table->icm[i]) {
159 ret = -ENOMEM;
160 goto out;
161 }
162
163 if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
164 &status) || status) {
165 mthca_free_icm(dev, table->icm[i]);
166 table->icm[i] = NULL;
167 ret = -ENOMEM;
168 goto out;
169 }
170
171 ++table->icm[i]->refcount;
172
173out:
174 up(&table->mutex);
175 return ret;
176}
177
178void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
179{
180 int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
181 u8 status;
182
183 down(&table->mutex);
184
185 if (--table->icm[i]->refcount == 0) {
186 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
187 MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
188 mthca_free_icm(dev, table->icm[i]);
189 table->icm[i] = NULL;
190 }
191
192 up(&table->mutex);
193}
194
195struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
196 u64 virt, int obj_size,
197 int nobj, int reserved,
198 int use_lowmem)
199{
200 struct mthca_icm_table *table;
201 int num_icm;
202 int i;
203 u8 status;
204
205 num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE;
206
207 table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
208 if (!table)
209 return NULL;
210
211 table->virt = virt;
212 table->num_icm = num_icm;
213 table->num_obj = nobj;
214 table->obj_size = obj_size;
215 table->lowmem = use_lowmem;
216 init_MUTEX(&table->mutex);
217
218 for (i = 0; i < num_icm; ++i)
219 table->icm[i] = NULL;
220
221 for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
222 table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
223 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
224 __GFP_NOWARN);
225 if (!table->icm[i])
226 goto err;
227 if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
228 &status) || status) {
229 mthca_free_icm(dev, table->icm[i]);
230 table->icm[i] = NULL;
231 goto err;
232 }
233
234 /*
235 * Add a reference to this ICM chunk so that it never
236 * gets freed (since it contains reserved firmware objects).
237 */
238 ++table->icm[i]->refcount;
239 }
240
241 return table;
242
243err:
244 for (i = 0; i < num_icm; ++i)
245 if (table->icm[i]) {
246 mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
247 MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
248 mthca_free_icm(dev, table->icm[i]);
249 }
250
251 kfree(table);
252
253 return NULL;
254}
255
256void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
257{
258 int i;
259 u8 status;
260
261 for (i = 0; i < table->num_icm; ++i)
262 if (table->icm[i]) {
263 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
264 MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
265 mthca_free_icm(dev, table->icm[i]);
266 }
267
268 kfree(table);
269}
270
271static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
272{
273 return dev->uar_table.uarc_base +
274 dev->driver_uar.index * dev->uar_table.uarc_size +
275 page * 4096;
276}
277
278int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
279{
280 int group;
281 int start, end, dir;
282 int i, j;
283 struct mthca_db_page *page;
284 int ret = 0;
285 u8 status;
286
287 down(&dev->db_tab->mutex);
288
289 switch (type) {
290 case MTHCA_DB_TYPE_CQ_ARM:
291 case MTHCA_DB_TYPE_SQ:
292 group = 0;
293 start = 0;
294 end = dev->db_tab->max_group1;
295 dir = 1;
296 break;
297
298 case MTHCA_DB_TYPE_CQ_SET_CI:
299 case MTHCA_DB_TYPE_RQ:
300 case MTHCA_DB_TYPE_SRQ:
301 group = 1;
302 start = dev->db_tab->npages - 1;
303 end = dev->db_tab->min_group2;
304 dir = -1;
305 break;
306
307 default:
308 return -1;
309 }
310
311 for (i = start; i != end; i += dir)
312 if (dev->db_tab->page[i].db_rec &&
313 !bitmap_full(dev->db_tab->page[i].used,
314 MTHCA_DB_REC_PER_PAGE)) {
315 page = dev->db_tab->page + i;
316 goto found;
317 }
318
319 if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
320 ret = -ENOMEM;
321 goto out;
322 }
323
324 page = dev->db_tab->page + end;
325 page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
326 &page->mapping, GFP_KERNEL);
327 if (!page->db_rec) {
328 ret = -ENOMEM;
329 goto out;
330 }
331 memset(page->db_rec, 0, 4096);
332
333 ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
334 if (!ret && status)
335 ret = -EINVAL;
336 if (ret) {
337 dma_free_coherent(&dev->pdev->dev, 4096,
338 page->db_rec, page->mapping);
339 goto out;
340 }
341
342 bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
343 if (group == 0)
344 ++dev->db_tab->max_group1;
345 else
346 --dev->db_tab->min_group2;
347
348found:
349 j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
350 set_bit(j, page->used);
351
352 if (group == 1)
353 j = MTHCA_DB_REC_PER_PAGE - 1 - j;
354
355 ret = i * MTHCA_DB_REC_PER_PAGE + j;
356
357 page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
358
359 *db = (u32 *) &page->db_rec[j];
360
361out:
362 up(&dev->db_tab->mutex);
363
364 return ret;
365}
366
367void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
368{
369 int i, j;
370 struct mthca_db_page *page;
371 u8 status;
372
373 i = db_index / MTHCA_DB_REC_PER_PAGE;
374 j = db_index % MTHCA_DB_REC_PER_PAGE;
375
376 page = dev->db_tab->page + i;
377
378 down(&dev->db_tab->mutex);
379
380 page->db_rec[j] = 0;
381 if (i >= dev->db_tab->min_group2)
382 j = MTHCA_DB_REC_PER_PAGE - 1 - j;
383 clear_bit(j, page->used);
384
385 if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
386 i >= dev->db_tab->max_group1 - 1) {
387 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
388
389 dma_free_coherent(&dev->pdev->dev, 4096,
390 page->db_rec, page->mapping);
391 page->db_rec = NULL;
392
393 if (i == dev->db_tab->max_group1) {
394 --dev->db_tab->max_group1;
395 /* XXX may be able to unmap more pages now */
396 }
397 if (i == dev->db_tab->min_group2)
398 ++dev->db_tab->min_group2;
399 }
400
401 up(&dev->db_tab->mutex);
402}
403
404int mthca_init_db_tab(struct mthca_dev *dev)
405{
406 int i;
407
408 if (dev->hca_type != ARBEL_NATIVE)
409 return 0;
410
411 dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
412 if (!dev->db_tab)
413 return -ENOMEM;
414
415 init_MUTEX(&dev->db_tab->mutex);
416
417 dev->db_tab->npages = dev->uar_table.uarc_size / PAGE_SIZE;
418 dev->db_tab->max_group1 = 0;
419 dev->db_tab->min_group2 = dev->db_tab->npages - 1;
420
421 dev->db_tab->page = kmalloc(dev->db_tab->npages *
422 sizeof *dev->db_tab->page,
423 GFP_KERNEL);
424 if (!dev->db_tab->page) {
425 kfree(dev->db_tab);
426 return -ENOMEM;
427 }
428
429 for (i = 0; i < dev->db_tab->npages; ++i)
430 dev->db_tab->page[i].db_rec = NULL;
431
432 return 0;
433}
434
435void mthca_cleanup_db_tab(struct mthca_dev *dev)
436{
437 int i;
438 u8 status;
439
440 if (dev->hca_type != ARBEL_NATIVE)
441 return;
442
443 /*
444 * Because we don't always free our UARC pages when they
445 * become empty to make mthca_free_db() simpler we need to
446 * make a sweep through the doorbell pages and free any
447 * leftover pages now.
448 */
449 for (i = 0; i < dev->db_tab->npages; ++i) {
450 if (!dev->db_tab->page[i].db_rec)
451 continue;
452
453 if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
454 mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
455
456 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
457
458 dma_free_coherent(&dev->pdev->dev, 4096,
459 dev->db_tab->page[i].db_rec,
460 dev->db_tab->page[i].mapping);
461 }
462
463 kfree(dev->db_tab->page);
464 kfree(dev->db_tab);
465}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
new file mode 100644
index 000000000000..a8fa97e140f5
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -0,0 +1,161 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#ifndef MTHCA_MEMFREE_H
36#define MTHCA_MEMFREE_H
37
38#include <linux/list.h>
39#include <linux/pci.h>
40
41#include <asm/semaphore.h>
42
43#define MTHCA_ICM_CHUNK_LEN \
44 ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
45 (sizeof (struct scatterlist)))
46
47struct mthca_icm_chunk {
48 struct list_head list;
49 int npages;
50 int nsg;
51 struct scatterlist mem[MTHCA_ICM_CHUNK_LEN];
52};
53
54struct mthca_icm {
55 struct list_head chunk_list;
56 int refcount;
57};
58
59struct mthca_icm_table {
60 u64 virt;
61 int num_icm;
62 int num_obj;
63 int obj_size;
64 int lowmem;
65 struct semaphore mutex;
66 struct mthca_icm *icm[0];
67};
68
69struct mthca_icm_iter {
70 struct mthca_icm *icm;
71 struct mthca_icm_chunk *chunk;
72 int page_idx;
73};
74
75struct mthca_dev;
76
77struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
78 unsigned int gfp_mask);
79void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
80
81struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
82 u64 virt, int obj_size,
83 int nobj, int reserved,
84 int use_lowmem);
85void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
86int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
87void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
88
89static inline void mthca_icm_first(struct mthca_icm *icm,
90 struct mthca_icm_iter *iter)
91{
92 iter->icm = icm;
93 iter->chunk = list_empty(&icm->chunk_list) ?
94 NULL : list_entry(icm->chunk_list.next,
95 struct mthca_icm_chunk, list);
96 iter->page_idx = 0;
97}
98
99static inline int mthca_icm_last(struct mthca_icm_iter *iter)
100{
101 return !iter->chunk;
102}
103
104static inline void mthca_icm_next(struct mthca_icm_iter *iter)
105{
106 if (++iter->page_idx >= iter->chunk->nsg) {
107 if (iter->chunk->list.next == &iter->icm->chunk_list) {
108 iter->chunk = NULL;
109 return;
110 }
111
112 iter->chunk = list_entry(iter->chunk->list.next,
113 struct mthca_icm_chunk, list);
114 iter->page_idx = 0;
115 }
116}
117
118static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter)
119{
120 return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
121}
122
123static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
124{
125 return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
126}
127
128enum {
129 MTHCA_DB_REC_PER_PAGE = 4096 / 8
130};
131
132struct mthca_db_page {
133 DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
134 u64 *db_rec;
135 dma_addr_t mapping;
136};
137
138struct mthca_db_table {
139 int npages;
140 int max_group1;
141 int min_group2;
142 struct mthca_db_page *page;
143 struct semaphore mutex;
144};
145
146enum {
147 MTHCA_DB_TYPE_INVALID = 0x0,
148 MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
149 MTHCA_DB_TYPE_CQ_ARM = 0x2,
150 MTHCA_DB_TYPE_SQ = 0x3,
151 MTHCA_DB_TYPE_RQ = 0x4,
152 MTHCA_DB_TYPE_SRQ = 0x5,
153 MTHCA_DB_TYPE_GROUP_SEP = 0x7
154};
155
156int mthca_init_db_tab(struct mthca_dev *dev);
157void mthca_cleanup_db_tab(struct mthca_dev *dev);
158int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
159void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
160
161#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
new file mode 100644
index 000000000000..80a0cd97881b
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -0,0 +1,416 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/slab.h>
36#include <linux/init.h>
37#include <linux/errno.h>
38
39#include "mthca_dev.h"
40#include "mthca_cmd.h"
41
42/*
43 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
44 */
45struct mthca_mpt_entry {
46 u32 flags;
47 u32 page_size;
48 u32 key;
49 u32 pd;
50 u64 start;
51 u64 length;
52 u32 lkey;
53 u32 window_count;
54 u32 window_count_limit;
55 u64 mtt_seg;
56 u32 mtt_sz; /* Arbel only */
57 u32 reserved[2];
58} __attribute__((packed));
59
60#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
61#define MTHCA_MPT_FLAG_MIO (1 << 17)
62#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15)
63#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9)
64#define MTHCA_MPT_FLAG_REGION (1 << 8)
65
66#define MTHCA_MTT_FLAG_PRESENT 1
67
68/*
69 * Buddy allocator for MTT segments (currently not very efficient
70 * since it doesn't keep a free list and just searches linearly
71 * through the bitmaps)
72 */
73
74static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order)
75{
76 int o;
77 int m;
78 u32 seg;
79
80 spin_lock(&dev->mr_table.mpt_alloc.lock);
81
82 for (o = order; o <= dev->mr_table.max_mtt_order; ++o) {
83 m = 1 << (dev->mr_table.max_mtt_order - o);
84 seg = find_first_bit(dev->mr_table.mtt_buddy[o], m);
85 if (seg < m)
86 goto found;
87 }
88
89 spin_unlock(&dev->mr_table.mpt_alloc.lock);
90 return -1;
91
92 found:
93 clear_bit(seg, dev->mr_table.mtt_buddy[o]);
94
95 while (o > order) {
96 --o;
97 seg <<= 1;
98 set_bit(seg ^ 1, dev->mr_table.mtt_buddy[o]);
99 }
100
101 spin_unlock(&dev->mr_table.mpt_alloc.lock);
102
103 seg <<= order;
104
105 return seg;
106}
107
108static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
109{
110 seg >>= order;
111
112 spin_lock(&dev->mr_table.mpt_alloc.lock);
113
114 while (test_bit(seg ^ 1, dev->mr_table.mtt_buddy[order])) {
115 clear_bit(seg ^ 1, dev->mr_table.mtt_buddy[order]);
116 seg >>= 1;
117 ++order;
118 }
119
120 set_bit(seg, dev->mr_table.mtt_buddy[order]);
121
122 spin_unlock(&dev->mr_table.mpt_alloc.lock);
123}
124
125static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
126{
127 if (dev->hca_type == ARBEL_NATIVE)
128 return (ind >> 24) | (ind << 8);
129 else
130 return ind;
131}
132
133static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
134{
135 if (dev->hca_type == ARBEL_NATIVE)
136 return (key << 24) | (key >> 8);
137 else
138 return key;
139}
140
141int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
142 u32 access, struct mthca_mr *mr)
143{
144 void *mailbox;
145 struct mthca_mpt_entry *mpt_entry;
146 u32 key;
147 int err;
148 u8 status;
149
150 might_sleep();
151
152 mr->order = -1;
153 key = mthca_alloc(&dev->mr_table.mpt_alloc);
154 if (key == -1)
155 return -ENOMEM;
156 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
157
158 mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
159 GFP_KERNEL);
160 if (!mailbox) {
161 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
162 return -ENOMEM;
163 }
164 mpt_entry = MAILBOX_ALIGN(mailbox);
165
166 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
167 MTHCA_MPT_FLAG_MIO |
168 MTHCA_MPT_FLAG_PHYSICAL |
169 MTHCA_MPT_FLAG_REGION |
170 access);
171 mpt_entry->page_size = 0;
172 mpt_entry->key = cpu_to_be32(key);
173 mpt_entry->pd = cpu_to_be32(pd);
174 mpt_entry->start = 0;
175 mpt_entry->length = ~0ULL;
176
177 memset(&mpt_entry->lkey, 0,
178 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
179
180 err = mthca_SW2HW_MPT(dev, mpt_entry,
181 key & (dev->limits.num_mpts - 1),
182 &status);
183 if (err)
184 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
185 else if (status) {
186 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
187 status);
188 err = -EINVAL;
189 }
190
191 kfree(mailbox);
192 return err;
193}
194
195int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
196 u64 *buffer_list, int buffer_size_shift,
197 int list_len, u64 iova, u64 total_size,
198 u32 access, struct mthca_mr *mr)
199{
200 void *mailbox;
201 u64 *mtt_entry;
202 struct mthca_mpt_entry *mpt_entry;
203 u32 key;
204 int err = -ENOMEM;
205 u8 status;
206 int i;
207
208 might_sleep();
209 WARN_ON(buffer_size_shift >= 32);
210
211 key = mthca_alloc(&dev->mr_table.mpt_alloc);
212 if (key == -1)
213 return -ENOMEM;
214 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
215
216 for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
217 i < list_len;
218 i <<= 1, ++mr->order)
219 ; /* nothing */
220
221 mr->first_seg = mthca_alloc_mtt(dev, mr->order);
222 if (mr->first_seg == -1)
223 goto err_out_mpt_free;
224
225 /*
226 * If list_len is odd, we add one more dummy entry for
227 * firmware efficiency.
228 */
229 mailbox = kmalloc(max(sizeof *mpt_entry,
230 (size_t) 8 * (list_len + (list_len & 1) + 2)) +
231 MTHCA_CMD_MAILBOX_EXTRA,
232 GFP_KERNEL);
233 if (!mailbox)
234 goto err_out_free_mtt;
235
236 mtt_entry = MAILBOX_ALIGN(mailbox);
237
238 mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
239 mr->first_seg * dev->limits.mtt_seg_size);
240 mtt_entry[1] = 0;
241 for (i = 0; i < list_len; ++i)
242 mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
243 MTHCA_MTT_FLAG_PRESENT);
244 if (list_len & 1) {
245 mtt_entry[i + 2] = 0;
246 ++list_len;
247 }
248
249 if (0) {
250 mthca_dbg(dev, "Dumping MPT entry\n");
251 for (i = 0; i < list_len + 2; ++i)
252 printk(KERN_ERR "[%2d] %016llx\n",
253 i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
254 }
255
256 err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
257 if (err) {
258 mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
259 goto err_out_mailbox_free;
260 }
261 if (status) {
262 mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
263 status);
264 err = -EINVAL;
265 goto err_out_mailbox_free;
266 }
267
268 mpt_entry = MAILBOX_ALIGN(mailbox);
269
270 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
271 MTHCA_MPT_FLAG_MIO |
272 MTHCA_MPT_FLAG_REGION |
273 access);
274
275 mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
276 mpt_entry->key = cpu_to_be32(key);
277 mpt_entry->pd = cpu_to_be32(pd);
278 mpt_entry->start = cpu_to_be64(iova);
279 mpt_entry->length = cpu_to_be64(total_size);
280 memset(&mpt_entry->lkey, 0,
281 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
282 mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base +
283 mr->first_seg * dev->limits.mtt_seg_size);
284
285 if (0) {
286 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
287 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
288 if (i % 4 == 0)
289 printk("[%02x] ", i * 4);
290 printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
291 if ((i + 1) % 4 == 0)
292 printk("\n");
293 }
294 }
295
296 err = mthca_SW2HW_MPT(dev, mpt_entry,
297 key & (dev->limits.num_mpts - 1),
298 &status);
299 if (err)
300 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
301 else if (status) {
302 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
303 status);
304 err = -EINVAL;
305 }
306
307 kfree(mailbox);
308 return err;
309
310 err_out_mailbox_free:
311 kfree(mailbox);
312
313 err_out_free_mtt:
314 mthca_free_mtt(dev, mr->first_seg, mr->order);
315
316 err_out_mpt_free:
317 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
318 return err;
319}
320
321void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
322{
323 int err;
324 u8 status;
325
326 might_sleep();
327
328 err = mthca_HW2SW_MPT(dev, NULL,
329 key_to_hw_index(dev, mr->ibmr.lkey) &
330 (dev->limits.num_mpts - 1),
331 &status);
332 if (err)
333 mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
334 else if (status)
335 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
336 status);
337
338 if (mr->order >= 0)
339 mthca_free_mtt(dev, mr->first_seg, mr->order);
340
341 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
342}
343
344int __devinit mthca_init_mr_table(struct mthca_dev *dev)
345{
346 int err;
347 int i, s;
348
349 err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
350 dev->limits.num_mpts,
351 ~0, dev->limits.reserved_mrws);
352 if (err)
353 return err;
354
355 err = -ENOMEM;
356
357 for (i = 1, dev->mr_table.max_mtt_order = 0;
358 i < dev->limits.num_mtt_segs;
359 i <<= 1, ++dev->mr_table.max_mtt_order)
360 ; /* nothing */
361
362 dev->mr_table.mtt_buddy = kmalloc((dev->mr_table.max_mtt_order + 1) *
363 sizeof (long *),
364 GFP_KERNEL);
365 if (!dev->mr_table.mtt_buddy)
366 goto err_out;
367
368 for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
369 dev->mr_table.mtt_buddy[i] = NULL;
370
371 for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) {
372 s = BITS_TO_LONGS(1 << (dev->mr_table.max_mtt_order - i));
373 dev->mr_table.mtt_buddy[i] = kmalloc(s * sizeof (long),
374 GFP_KERNEL);
375 if (!dev->mr_table.mtt_buddy[i])
376 goto err_out_free;
377 bitmap_zero(dev->mr_table.mtt_buddy[i],
378 1 << (dev->mr_table.max_mtt_order - i));
379 }
380
381 set_bit(0, dev->mr_table.mtt_buddy[dev->mr_table.max_mtt_order]);
382
383 for (i = 0; i < dev->mr_table.max_mtt_order; ++i)
384 if (1 << i >= dev->limits.reserved_mtts)
385 break;
386
387 if (i == dev->mr_table.max_mtt_order) {
388 mthca_err(dev, "MTT table of order %d is "
389 "too small.\n", i);
390 goto err_out_free;
391 }
392
393 (void) mthca_alloc_mtt(dev, i);
394
395 return 0;
396
397 err_out_free:
398 for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
399 kfree(dev->mr_table.mtt_buddy[i]);
400
401 err_out:
402 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
403
404 return err;
405}
406
407void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
408{
409 int i;
410
411 /* XXX check if any MRs are still allocated? */
412 for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
413 kfree(dev->mr_table.mtt_buddy[i]);
414 kfree(dev->mr_table.mtt_buddy);
415 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
416}
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
new file mode 100644
index 000000000000..ea66847e4ea3
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/init.h>
36#include <linux/errno.h>
37
38#include "mthca_dev.h"
39
40int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
41{
42 int err;
43
44 might_sleep();
45
46 atomic_set(&pd->sqp_count, 0);
47 pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
48 if (pd->pd_num == -1)
49 return -ENOMEM;
50
51 err = mthca_mr_alloc_notrans(dev, pd->pd_num,
52 MTHCA_MPT_FLAG_LOCAL_READ |
53 MTHCA_MPT_FLAG_LOCAL_WRITE,
54 &pd->ntmr);
55 if (err)
56 mthca_free(&dev->pd_table.alloc, pd->pd_num);
57
58 return err;
59}
60
61void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
62{
63 might_sleep();
64 mthca_free_mr(dev, &pd->ntmr);
65 mthca_free(&dev->pd_table.alloc, pd->pd_num);
66}
67
68int __devinit mthca_init_pd_table(struct mthca_dev *dev)
69{
70 return mthca_alloc_init(&dev->pd_table.alloc,
71 dev->limits.num_pds,
72 (1 << 24) - 1,
73 dev->limits.reserved_pds);
74}
75
76void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev)
77{
78 /* XXX check if any PDs are still allocated? */
79 mthca_alloc_cleanup(&dev->pd_table.alloc);
80}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
new file mode 100644
index 000000000000..7881a8a919ca
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -0,0 +1,266 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/module.h>
36#include <linux/moduleparam.h>
37
38#include "mthca_profile.h"
39
40enum {
41 MTHCA_RES_QP,
42 MTHCA_RES_EEC,
43 MTHCA_RES_SRQ,
44 MTHCA_RES_CQ,
45 MTHCA_RES_EQP,
46 MTHCA_RES_EEEC,
47 MTHCA_RES_EQ,
48 MTHCA_RES_RDB,
49 MTHCA_RES_MCG,
50 MTHCA_RES_MPT,
51 MTHCA_RES_MTT,
52 MTHCA_RES_UAR,
53 MTHCA_RES_UDAV,
54 MTHCA_RES_UARC,
55 MTHCA_RES_NUM
56};
57
58enum {
59 MTHCA_NUM_EQS = 32,
60 MTHCA_NUM_PDS = 1 << 15
61};
62
63u64 mthca_make_profile(struct mthca_dev *dev,
64 struct mthca_profile *request,
65 struct mthca_dev_lim *dev_lim,
66 struct mthca_init_hca_param *init_hca)
67{
68 struct mthca_resource {
69 u64 size;
70 u64 start;
71 int type;
72 int num;
73 int log_num;
74 };
75
76 u64 mem_base, mem_avail;
77 u64 total_size = 0;
78 struct mthca_resource *profile;
79 struct mthca_resource tmp;
80 int i, j;
81
82 profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
83 if (!profile)
84 return -ENOMEM;
85
86 memset(profile, 0, MTHCA_RES_NUM * sizeof *profile);
87
88 profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
89 profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
90 profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
91 profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz;
92 profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz;
93 profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz;
94 profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz;
95 profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
96 profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
97 profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
98 profile[MTHCA_RES_MTT].size = dev_lim->mtt_seg_sz;
99 profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
100 profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
101 profile[MTHCA_RES_UARC].size = request->uarc_size;
102
103 profile[MTHCA_RES_QP].num = request->num_qp;
104 profile[MTHCA_RES_EQP].num = request->num_qp;
105 profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp;
106 profile[MTHCA_RES_CQ].num = request->num_cq;
107 profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS;
108 profile[MTHCA_RES_MCG].num = request->num_mcg;
109 profile[MTHCA_RES_MPT].num = request->num_mpt;
110 profile[MTHCA_RES_MTT].num = request->num_mtt;
111 profile[MTHCA_RES_UAR].num = request->num_uar;
112 profile[MTHCA_RES_UARC].num = request->num_uar;
113 profile[MTHCA_RES_UDAV].num = request->num_udav;
114
115 for (i = 0; i < MTHCA_RES_NUM; ++i) {
116 profile[i].type = i;
117 profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
118 profile[i].size *= profile[i].num;
119 if (dev->hca_type == ARBEL_NATIVE)
120 profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
121 }
122
123 if (dev->hca_type == ARBEL_NATIVE) {
124 mem_base = 0;
125 mem_avail = dev_lim->hca.arbel.max_icm_sz;
126 } else {
127 mem_base = dev->ddr_start;
128 mem_avail = dev->fw.tavor.fw_start - dev->ddr_start;
129 }
130
131 /*
132 * Sort the resources in decreasing order of size. Since they
133 * all have sizes that are powers of 2, we'll be able to keep
134 * resources aligned to their size and pack them without gaps
135 * using the sorted order.
136 */
137 for (i = MTHCA_RES_NUM; i > 0; --i)
138 for (j = 1; j < i; ++j) {
139 if (profile[j].size > profile[j - 1].size) {
140 tmp = profile[j];
141 profile[j] = profile[j - 1];
142 profile[j - 1] = tmp;
143 }
144 }
145
146 for (i = 0; i < MTHCA_RES_NUM; ++i) {
147 if (profile[i].size) {
148 profile[i].start = mem_base + total_size;
149 total_size += profile[i].size;
150 }
151 if (total_size > mem_avail) {
152 mthca_err(dev, "Profile requires 0x%llx bytes; "
153 "won't in 0x%llx bytes of context memory.\n",
154 (unsigned long long) total_size,
155 (unsigned long long) mem_avail);
156 kfree(profile);
157 return -ENOMEM;
158 }
159
160 if (profile[i].size)
161 mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx "
162 "(size 0x%8llx)\n",
163 i, profile[i].type, profile[i].log_num,
164 (unsigned long long) profile[i].start,
165 (unsigned long long) profile[i].size);
166 }
167
168 if (dev->hca_type == ARBEL_NATIVE)
169 mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
170 (int) (total_size >> 10));
171 else
172 mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n",
173 (int) (total_size >> 10), (int) (mem_avail >> 10),
174 (int) ((mem_avail - total_size) >> 10));
175
176 for (i = 0; i < MTHCA_RES_NUM; ++i) {
177 switch (profile[i].type) {
178 case MTHCA_RES_QP:
179 dev->limits.num_qps = profile[i].num;
180 init_hca->qpc_base = profile[i].start;
181 init_hca->log_num_qps = profile[i].log_num;
182 break;
183 case MTHCA_RES_EEC:
184 dev->limits.num_eecs = profile[i].num;
185 init_hca->eec_base = profile[i].start;
186 init_hca->log_num_eecs = profile[i].log_num;
187 break;
188 case MTHCA_RES_SRQ:
189 dev->limits.num_srqs = profile[i].num;
190 init_hca->srqc_base = profile[i].start;
191 init_hca->log_num_srqs = profile[i].log_num;
192 break;
193 case MTHCA_RES_CQ:
194 dev->limits.num_cqs = profile[i].num;
195 init_hca->cqc_base = profile[i].start;
196 init_hca->log_num_cqs = profile[i].log_num;
197 break;
198 case MTHCA_RES_EQP:
199 init_hca->eqpc_base = profile[i].start;
200 break;
201 case MTHCA_RES_EEEC:
202 init_hca->eeec_base = profile[i].start;
203 break;
204 case MTHCA_RES_EQ:
205 dev->limits.num_eqs = profile[i].num;
206 init_hca->eqc_base = profile[i].start;
207 init_hca->log_num_eqs = profile[i].log_num;
208 break;
209 case MTHCA_RES_RDB:
210 for (dev->qp_table.rdb_shift = 0;
211 profile[MTHCA_RES_QP].num << dev->qp_table.rdb_shift <
212 profile[i].num;
213 ++dev->qp_table.rdb_shift)
214 ; /* nothing */
215 dev->qp_table.rdb_base = (u32) profile[i].start;
216 init_hca->rdb_base = profile[i].start;
217 break;
218 case MTHCA_RES_MCG:
219 dev->limits.num_mgms = profile[i].num >> 1;
220 dev->limits.num_amgms = profile[i].num >> 1;
221 init_hca->mc_base = profile[i].start;
222 init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1;
223 init_hca->log_mc_table_sz = profile[i].log_num;
224 init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
225 break;
226 case MTHCA_RES_MPT:
227 dev->limits.num_mpts = profile[i].num;
228 init_hca->mpt_base = profile[i].start;
229 init_hca->log_mpt_sz = profile[i].log_num;
230 break;
231 case MTHCA_RES_MTT:
232 dev->limits.num_mtt_segs = profile[i].num;
233 dev->limits.mtt_seg_size = dev_lim->mtt_seg_sz;
234 dev->mr_table.mtt_base = profile[i].start;
235 init_hca->mtt_base = profile[i].start;
236 init_hca->mtt_seg_sz = ffs(dev_lim->mtt_seg_sz) - 7;
237 break;
238 case MTHCA_RES_UAR:
239 dev->limits.num_uars = profile[i].num;
240 init_hca->uar_scratch_base = profile[i].start;
241 break;
242 case MTHCA_RES_UDAV:
243 dev->av_table.ddr_av_base = profile[i].start;
244 dev->av_table.num_ddr_avs = profile[i].num;
245 break;
246 case MTHCA_RES_UARC:
247 dev->uar_table.uarc_size = request->uarc_size;
248 dev->uar_table.uarc_base = profile[i].start;
249 init_hca->uarc_base = profile[i].start;
250 init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
251 init_hca->log_uar_sz = ffs(request->num_uar) - 1;
252 break;
253 default:
254 break;
255 }
256 }
257
258 /*
259 * PDs don't take any HCA memory, but we assign them as part
260 * of the HCA profile anyway.
261 */
262 dev->limits.num_pds = MTHCA_NUM_PDS;
263
264 kfree(profile);
265 return total_size;
266}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
new file mode 100644
index 000000000000..daaf7999486c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -0,0 +1,58 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef MTHCA_PROFILE_H
36#define MTHCA_PROFILE_H
37
38#include "mthca_dev.h"
39#include "mthca_cmd.h"
40
41struct mthca_profile {
42 int num_qp;
43 int rdb_per_qp;
44 int num_cq;
45 int num_mcg;
46 int num_mpt;
47 int num_mtt;
48 int num_udav;
49 int num_uar;
50 int uarc_size;
51};
52
53u64 mthca_make_profile(struct mthca_dev *mdev,
54 struct mthca_profile *request,
55 struct mthca_dev_lim *dev_lim,
56 struct mthca_init_hca_param *init_hca);
57
58#endif /* MTHCA_PROFILE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
new file mode 100644
index 000000000000..bbf74cf43343
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -0,0 +1,660 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
33 */
34
35#include <ib_smi.h>
36
37#include "mthca_dev.h"
38#include "mthca_cmd.h"
39
40static int mthca_query_device(struct ib_device *ibdev,
41 struct ib_device_attr *props)
42{
43 struct ib_smp *in_mad = NULL;
44 struct ib_smp *out_mad = NULL;
45 int err = -ENOMEM;
46 struct mthca_dev* mdev = to_mdev(ibdev);
47
48 u8 status;
49
50 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
51 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
52 if (!in_mad || !out_mad)
53 goto out;
54
55 props->fw_ver = mdev->fw_ver;
56
57 memset(in_mad, 0, sizeof *in_mad);
58 in_mad->base_version = 1;
59 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
60 in_mad->class_version = 1;
61 in_mad->method = IB_MGMT_METHOD_GET;
62 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
63
64 err = mthca_MAD_IFC(mdev, 1, 1,
65 1, NULL, NULL, in_mad, out_mad,
66 &status);
67 if (err)
68 goto out;
69 if (status) {
70 err = -EINVAL;
71 goto out;
72 }
73
74 props->device_cap_flags = mdev->device_cap_flags;
75 props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) &
76 0xffffff;
77 props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30));
78 props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32));
79 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
80 memcpy(&props->node_guid, out_mad->data + 12, 8);
81
82 err = 0;
83 out:
84 kfree(in_mad);
85 kfree(out_mad);
86 return err;
87}
88
89static int mthca_query_port(struct ib_device *ibdev,
90 u8 port, struct ib_port_attr *props)
91{
92 struct ib_smp *in_mad = NULL;
93 struct ib_smp *out_mad = NULL;
94 int err = -ENOMEM;
95 u8 status;
96
97 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
98 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
99 if (!in_mad || !out_mad)
100 goto out;
101
102 memset(in_mad, 0, sizeof *in_mad);
103 in_mad->base_version = 1;
104 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
105 in_mad->class_version = 1;
106 in_mad->method = IB_MGMT_METHOD_GET;
107 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
108 in_mad->attr_mod = cpu_to_be32(port);
109
110 err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
111 port, NULL, NULL, in_mad, out_mad,
112 &status);
113 if (err)
114 goto out;
115 if (status) {
116 err = -EINVAL;
117 goto out;
118 }
119
120 props->lid = be16_to_cpup((u16 *) (out_mad->data + 16));
121 props->lmc = out_mad->data[34] & 0x7;
122 props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18));
123 props->sm_sl = out_mad->data[36] & 0xf;
124 props->state = out_mad->data[32] & 0xf;
125 props->phys_state = out_mad->data[33] >> 4;
126 props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20));
127 props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
128 props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
129 props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48));
130 props->active_width = out_mad->data[31] & 0xf;
131 props->active_speed = out_mad->data[35] >> 4;
132
133 out:
134 kfree(in_mad);
135 kfree(out_mad);
136 return err;
137}
138
139static int mthca_modify_port(struct ib_device *ibdev,
140 u8 port, int port_modify_mask,
141 struct ib_port_modify *props)
142{
143 struct mthca_set_ib_param set_ib;
144 struct ib_port_attr attr;
145 int err;
146 u8 status;
147
148 if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
149 return -ERESTARTSYS;
150
151 err = mthca_query_port(ibdev, port, &attr);
152 if (err)
153 goto out;
154
155 set_ib.set_si_guid = 0;
156 set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
157
158 set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
159 ~props->clr_port_cap_mask;
160
161 err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
162 if (err)
163 goto out;
164 if (status) {
165 err = -EINVAL;
166 goto out;
167 }
168
169out:
170 up(&to_mdev(ibdev)->cap_mask_mutex);
171 return err;
172}
173
174static int mthca_query_pkey(struct ib_device *ibdev,
175 u8 port, u16 index, u16 *pkey)
176{
177 struct ib_smp *in_mad = NULL;
178 struct ib_smp *out_mad = NULL;
179 int err = -ENOMEM;
180 u8 status;
181
182 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
183 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
184 if (!in_mad || !out_mad)
185 goto out;
186
187 memset(in_mad, 0, sizeof *in_mad);
188 in_mad->base_version = 1;
189 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
190 in_mad->class_version = 1;
191 in_mad->method = IB_MGMT_METHOD_GET;
192 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
193 in_mad->attr_mod = cpu_to_be32(index / 32);
194
195 err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
196 port, NULL, NULL, in_mad, out_mad,
197 &status);
198 if (err)
199 goto out;
200 if (status) {
201 err = -EINVAL;
202 goto out;
203 }
204
205 *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]);
206
207 out:
208 kfree(in_mad);
209 kfree(out_mad);
210 return err;
211}
212
213static int mthca_query_gid(struct ib_device *ibdev, u8 port,
214 int index, union ib_gid *gid)
215{
216 struct ib_smp *in_mad = NULL;
217 struct ib_smp *out_mad = NULL;
218 int err = -ENOMEM;
219 u8 status;
220
221 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
222 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
223 if (!in_mad || !out_mad)
224 goto out;
225
226 memset(in_mad, 0, sizeof *in_mad);
227 in_mad->base_version = 1;
228 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
229 in_mad->class_version = 1;
230 in_mad->method = IB_MGMT_METHOD_GET;
231 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
232 in_mad->attr_mod = cpu_to_be32(port);
233
234 err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
235 port, NULL, NULL, in_mad, out_mad,
236 &status);
237 if (err)
238 goto out;
239 if (status) {
240 err = -EINVAL;
241 goto out;
242 }
243
244 memcpy(gid->raw, out_mad->data + 8, 8);
245
246 memset(in_mad, 0, sizeof *in_mad);
247 in_mad->base_version = 1;
248 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
249 in_mad->class_version = 1;
250 in_mad->method = IB_MGMT_METHOD_GET;
251 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
252 in_mad->attr_mod = cpu_to_be32(index / 8);
253
254 err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
255 port, NULL, NULL, in_mad, out_mad,
256 &status);
257 if (err)
258 goto out;
259 if (status) {
260 err = -EINVAL;
261 goto out;
262 }
263
264 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
265
266 out:
267 kfree(in_mad);
268 kfree(out_mad);
269 return err;
270}
271
272static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
273{
274 struct mthca_pd *pd;
275 int err;
276
277 pd = kmalloc(sizeof *pd, GFP_KERNEL);
278 if (!pd)
279 return ERR_PTR(-ENOMEM);
280
281 err = mthca_pd_alloc(to_mdev(ibdev), pd);
282 if (err) {
283 kfree(pd);
284 return ERR_PTR(err);
285 }
286
287 return &pd->ibpd;
288}
289
290static int mthca_dealloc_pd(struct ib_pd *pd)
291{
292 mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
293 kfree(pd);
294
295 return 0;
296}
297
298static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
299 struct ib_ah_attr *ah_attr)
300{
301 int err;
302 struct mthca_ah *ah;
303
304 ah = kmalloc(sizeof *ah, GFP_KERNEL);
305 if (!ah)
306 return ERR_PTR(-ENOMEM);
307
308 err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
309 if (err) {
310 kfree(ah);
311 return ERR_PTR(err);
312 }
313
314 return &ah->ibah;
315}
316
317static int mthca_ah_destroy(struct ib_ah *ah)
318{
319 mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
320 kfree(ah);
321
322 return 0;
323}
324
325static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
326 struct ib_qp_init_attr *init_attr)
327{
328 struct mthca_qp *qp;
329 int err;
330
331 switch (init_attr->qp_type) {
332 case IB_QPT_RC:
333 case IB_QPT_UC:
334 case IB_QPT_UD:
335 {
336 qp = kmalloc(sizeof *qp, GFP_KERNEL);
337 if (!qp)
338 return ERR_PTR(-ENOMEM);
339
340 qp->sq.max = init_attr->cap.max_send_wr;
341 qp->rq.max = init_attr->cap.max_recv_wr;
342 qp->sq.max_gs = init_attr->cap.max_send_sge;
343 qp->rq.max_gs = init_attr->cap.max_recv_sge;
344
345 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
346 to_mcq(init_attr->send_cq),
347 to_mcq(init_attr->recv_cq),
348 init_attr->qp_type, init_attr->sq_sig_type,
349 qp);
350 qp->ibqp.qp_num = qp->qpn;
351 break;
352 }
353 case IB_QPT_SMI:
354 case IB_QPT_GSI:
355 {
356 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
357 if (!qp)
358 return ERR_PTR(-ENOMEM);
359
360 qp->sq.max = init_attr->cap.max_send_wr;
361 qp->rq.max = init_attr->cap.max_recv_wr;
362 qp->sq.max_gs = init_attr->cap.max_send_sge;
363 qp->rq.max_gs = init_attr->cap.max_recv_sge;
364
365 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
366
367 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
368 to_mcq(init_attr->send_cq),
369 to_mcq(init_attr->recv_cq),
370 init_attr->sq_sig_type,
371 qp->ibqp.qp_num, init_attr->port_num,
372 to_msqp(qp));
373 break;
374 }
375 default:
376 /* Don't support raw QPs */
377 return ERR_PTR(-ENOSYS);
378 }
379
380 if (err) {
381 kfree(qp);
382 return ERR_PTR(err);
383 }
384
385 init_attr->cap.max_inline_data = 0;
386
387 return &qp->ibqp;
388}
389
390static int mthca_destroy_qp(struct ib_qp *qp)
391{
392 mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
393 kfree(qp);
394 return 0;
395}
396
397static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
398{
399 struct mthca_cq *cq;
400 int nent;
401 int err;
402
403 cq = kmalloc(sizeof *cq, GFP_KERNEL);
404 if (!cq)
405 return ERR_PTR(-ENOMEM);
406
407 for (nent = 1; nent <= entries; nent <<= 1)
408 ; /* nothing */
409
410 err = mthca_init_cq(to_mdev(ibdev), nent, cq);
411 if (err) {
412 kfree(cq);
413 cq = ERR_PTR(err);
414 }
415
416 return &cq->ibcq;
417}
418
419static int mthca_destroy_cq(struct ib_cq *cq)
420{
421 mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
422 kfree(cq);
423
424 return 0;
425}
426
427static inline u32 convert_access(int acc)
428{
429 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
430 (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
431 (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
432 (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
433 MTHCA_MPT_FLAG_LOCAL_READ;
434}
435
436static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
437{
438 struct mthca_mr *mr;
439 int err;
440
441 mr = kmalloc(sizeof *mr, GFP_KERNEL);
442 if (!mr)
443 return ERR_PTR(-ENOMEM);
444
445 err = mthca_mr_alloc_notrans(to_mdev(pd->device),
446 to_mpd(pd)->pd_num,
447 convert_access(acc), mr);
448
449 if (err) {
450 kfree(mr);
451 return ERR_PTR(err);
452 }
453
454 return &mr->ibmr;
455}
456
457static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
458 struct ib_phys_buf *buffer_list,
459 int num_phys_buf,
460 int acc,
461 u64 *iova_start)
462{
463 struct mthca_mr *mr;
464 u64 *page_list;
465 u64 total_size;
466 u64 mask;
467 int shift;
468 int npages;
469 int err;
470 int i, j, n;
471
472 /* First check that we have enough alignment */
473 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
474 return ERR_PTR(-EINVAL);
475
476 if (num_phys_buf > 1 &&
477 ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
478 return ERR_PTR(-EINVAL);
479
480 mask = 0;
481 total_size = 0;
482 for (i = 0; i < num_phys_buf; ++i) {
483 if (buffer_list[i].addr & ~PAGE_MASK)
484 return ERR_PTR(-EINVAL);
485 if (i != 0 && i != num_phys_buf - 1 &&
486 (buffer_list[i].size & ~PAGE_MASK))
487 return ERR_PTR(-EINVAL);
488
489 total_size += buffer_list[i].size;
490 if (i > 0)
491 mask |= buffer_list[i].addr;
492 }
493
494 /* Find largest page shift we can use to cover buffers */
495 for (shift = PAGE_SHIFT; shift < 31; ++shift)
496 if (num_phys_buf > 1) {
497 if ((1ULL << shift) & mask)
498 break;
499 } else {
500 if (1ULL << shift >=
501 buffer_list[0].size +
502 (buffer_list[0].addr & ((1ULL << shift) - 1)))
503 break;
504 }
505
506 buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
507 buffer_list[0].addr &= ~0ull << shift;
508
509 mr = kmalloc(sizeof *mr, GFP_KERNEL);
510 if (!mr)
511 return ERR_PTR(-ENOMEM);
512
513 npages = 0;
514 for (i = 0; i < num_phys_buf; ++i)
515 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
516
517 if (!npages)
518 return &mr->ibmr;
519
520 page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
521 if (!page_list) {
522 kfree(mr);
523 return ERR_PTR(-ENOMEM);
524 }
525
526 n = 0;
527 for (i = 0; i < num_phys_buf; ++i)
528 for (j = 0;
529 j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
530 ++j)
531 page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
532
533 mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
534 "in PD %x; shift %d, npages %d.\n",
535 (unsigned long long) buffer_list[0].addr,
536 (unsigned long long) *iova_start,
537 to_mpd(pd)->pd_num,
538 shift, npages);
539
540 err = mthca_mr_alloc_phys(to_mdev(pd->device),
541 to_mpd(pd)->pd_num,
542 page_list, shift, npages,
543 *iova_start, total_size,
544 convert_access(acc), mr);
545
546 if (err) {
547 kfree(mr);
548 return ERR_PTR(err);
549 }
550
551 kfree(page_list);
552 return &mr->ibmr;
553}
554
555static int mthca_dereg_mr(struct ib_mr *mr)
556{
557 mthca_free_mr(to_mdev(mr->device), to_mmr(mr));
558 kfree(mr);
559 return 0;
560}
561
562static ssize_t show_rev(struct class_device *cdev, char *buf)
563{
564 struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
565 return sprintf(buf, "%x\n", dev->rev_id);
566}
567
568static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
569{
570 struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
571 return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
572 (int) (dev->fw_ver >> 16) & 0xffff,
573 (int) dev->fw_ver & 0xffff);
574}
575
576static ssize_t show_hca(struct class_device *cdev, char *buf)
577{
578 struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
579 switch (dev->hca_type) {
580 case TAVOR: return sprintf(buf, "MT23108\n");
581 case ARBEL_COMPAT: return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
582 case ARBEL_NATIVE: return sprintf(buf, "MT25208\n");
583 default: return sprintf(buf, "unknown\n");
584 }
585}
586
587static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
588static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
589static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
590
591static struct class_device_attribute *mthca_class_attributes[] = {
592 &class_device_attr_hw_rev,
593 &class_device_attr_fw_ver,
594 &class_device_attr_hca_type
595};
596
597int mthca_register_device(struct mthca_dev *dev)
598{
599 int ret;
600 int i;
601
602 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
603 dev->ib_dev.node_type = IB_NODE_CA;
604 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
605 dev->ib_dev.dma_device = &dev->pdev->dev;
606 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
607 dev->ib_dev.query_device = mthca_query_device;
608 dev->ib_dev.query_port = mthca_query_port;
609 dev->ib_dev.modify_port = mthca_modify_port;
610 dev->ib_dev.query_pkey = mthca_query_pkey;
611 dev->ib_dev.query_gid = mthca_query_gid;
612 dev->ib_dev.alloc_pd = mthca_alloc_pd;
613 dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
614 dev->ib_dev.create_ah = mthca_ah_create;
615 dev->ib_dev.destroy_ah = mthca_ah_destroy;
616 dev->ib_dev.create_qp = mthca_create_qp;
617 dev->ib_dev.modify_qp = mthca_modify_qp;
618 dev->ib_dev.destroy_qp = mthca_destroy_qp;
619 dev->ib_dev.create_cq = mthca_create_cq;
620 dev->ib_dev.destroy_cq = mthca_destroy_cq;
621 dev->ib_dev.poll_cq = mthca_poll_cq;
622 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
623 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
624 dev->ib_dev.dereg_mr = mthca_dereg_mr;
625 dev->ib_dev.attach_mcast = mthca_multicast_attach;
626 dev->ib_dev.detach_mcast = mthca_multicast_detach;
627 dev->ib_dev.process_mad = mthca_process_mad;
628
629 if (dev->hca_type == ARBEL_NATIVE) {
630 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
631 dev->ib_dev.post_send = mthca_arbel_post_send;
632 dev->ib_dev.post_recv = mthca_arbel_post_receive;
633 } else {
634 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
635 dev->ib_dev.post_send = mthca_tavor_post_send;
636 dev->ib_dev.post_recv = mthca_tavor_post_receive;
637 }
638
639 init_MUTEX(&dev->cap_mask_mutex);
640
641 ret = ib_register_device(&dev->ib_dev);
642 if (ret)
643 return ret;
644
645 for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
646 ret = class_device_create_file(&dev->ib_dev.class_dev,
647 mthca_class_attributes[i]);
648 if (ret) {
649 ib_unregister_device(&dev->ib_dev);
650 return ret;
651 }
652 }
653
654 return 0;
655}
656
657void mthca_unregister_device(struct mthca_dev *dev)
658{
659 ib_unregister_device(&dev->ib_dev);
660}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
new file mode 100644
index 000000000000..0598f3905d9a
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -0,0 +1,251 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef MTHCA_PROVIDER_H
36#define MTHCA_PROVIDER_H
37
38#include <ib_verbs.h>
39#include <ib_pack.h>
40
41#define MTHCA_MPT_FLAG_ATOMIC (1 << 14)
42#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13)
43#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12)
44#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11)
45#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10)
46
47struct mthca_buf_list {
48 void *buf;
49 DECLARE_PCI_UNMAP_ADDR(mapping)
50};
51
52struct mthca_uar {
53 unsigned long pfn;
54 int index;
55};
56
57struct mthca_mr {
58 struct ib_mr ibmr;
59 int order;
60 u32 first_seg;
61};
62
63struct mthca_pd {
64 struct ib_pd ibpd;
65 u32 pd_num;
66 atomic_t sqp_count;
67 struct mthca_mr ntmr;
68};
69
70struct mthca_eq {
71 struct mthca_dev *dev;
72 int eqn;
73 u32 eqn_mask;
74 u32 cons_index;
75 u16 msi_x_vector;
76 u16 msi_x_entry;
77 int have_irq;
78 int nent;
79 struct mthca_buf_list *page_list;
80 struct mthca_mr mr;
81};
82
83struct mthca_av;
84
85enum mthca_ah_type {
86 MTHCA_AH_ON_HCA,
87 MTHCA_AH_PCI_POOL,
88 MTHCA_AH_KMALLOC
89};
90
91struct mthca_ah {
92 struct ib_ah ibah;
93 enum mthca_ah_type type;
94 u32 key;
95 struct mthca_av *av;
96 dma_addr_t avdma;
97};
98
99/*
100 * Quick description of our CQ/QP locking scheme:
101 *
102 * We have one global lock that protects dev->cq/qp_table. Each
103 * struct mthca_cq/qp also has its own lock. An individual qp lock
104 * may be taken inside of an individual cq lock. Both cqs attached to
105 * a qp may be locked, with the send cq locked first. No other
106 * nesting should be done.
107 *
108 * Each struct mthca_cq/qp also has an atomic_t ref count. The
109 * pointer from the cq/qp_table to the struct counts as one reference.
110 * This reference also is good for access through the consumer API, so
111 * modifying the CQ/QP etc doesn't need to take another reference.
112 * Access because of a completion being polled does need a reference.
113 *
114 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
115 * destroy function to sleep on.
116 *
117 * This means that access from the consumer API requires nothing but
118 * taking the struct's lock.
119 *
120 * Access because of a completion event should go as follows:
121 * - lock cq/qp_table and look up struct
122 * - increment ref count in struct
123 * - drop cq/qp_table lock
124 * - lock struct, do your thing, and unlock struct
125 * - decrement ref count; if zero, wake up waiters
126 *
127 * To destroy a CQ/QP, we can do the following:
128 * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
129 * - decrement ref count
130 * - wait_event until ref count is zero
131 *
132 * It is the consumer's responsibilty to make sure that no QP
133 * operations (WQE posting or state modification) are pending when the
134 * QP is destroyed. Also, the consumer must make sure that calls to
135 * qp_modify are serialized.
136 *
137 * Possible optimizations (wait for profile data to see if/where we
138 * have locks bouncing between CPUs):
139 * - split cq/qp table lock into n separate (cache-aligned) locks,
140 * indexed (say) by the page in the table
141 * - split QP struct lock into three (one for common info, one for the
142 * send queue and one for the receive queue)
143 */
144
145struct mthca_cq {
146 struct ib_cq ibcq;
147 spinlock_t lock;
148 atomic_t refcount;
149 int cqn;
150 u32 cons_index;
151 int is_direct;
152
153 /* Next fields are Arbel only */
154 int set_ci_db_index;
155 u32 *set_ci_db;
156 int arm_db_index;
157 u32 *arm_db;
158 int arm_sn;
159
160 union {
161 struct mthca_buf_list direct;
162 struct mthca_buf_list *page_list;
163 } queue;
164 struct mthca_mr mr;
165 wait_queue_head_t wait;
166};
167
168struct mthca_wq {
169 spinlock_t lock;
170 int max;
171 unsigned next_ind;
172 unsigned last_comp;
173 unsigned head;
174 unsigned tail;
175 void *last;
176 int max_gs;
177 int wqe_shift;
178
179 int db_index; /* Arbel only */
180 u32 *db;
181};
182
183struct mthca_qp {
184 struct ib_qp ibqp;
185 atomic_t refcount;
186 u32 qpn;
187 int is_direct;
188 u8 transport;
189 u8 state;
190 u8 atomic_rd_en;
191 u8 resp_depth;
192
193 struct mthca_mr mr;
194
195 struct mthca_wq rq;
196 struct mthca_wq sq;
197 enum ib_sig_type sq_policy;
198 int send_wqe_offset;
199
200 u64 *wrid;
201 union {
202 struct mthca_buf_list direct;
203 struct mthca_buf_list *page_list;
204 } queue;
205
206 wait_queue_head_t wait;
207};
208
209struct mthca_sqp {
210 struct mthca_qp qp;
211 int port;
212 int pkey_index;
213 u32 qkey;
214 u32 send_psn;
215 struct ib_ud_header ud_header;
216 int header_buf_size;
217 void *header_buf;
218 dma_addr_t header_dma;
219};
220
221static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
222{
223 return container_of(ibmr, struct mthca_mr, ibmr);
224}
225
226static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd)
227{
228 return container_of(ibpd, struct mthca_pd, ibpd);
229}
230
231static inline struct mthca_ah *to_mah(struct ib_ah *ibah)
232{
233 return container_of(ibah, struct mthca_ah, ibah);
234}
235
236static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
237{
238 return container_of(ibcq, struct mthca_cq, ibcq);
239}
240
241static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
242{
243 return container_of(ibqp, struct mthca_qp, ibqp);
244}
245
246static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
247{
248 return container_of(qp, struct mthca_sqp, qp);
249}
250
251#endif /* MTHCA_PROVIDER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
new file mode 100644
index 000000000000..7e4bbbd31f07
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -0,0 +1,2056 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
33 */
34
35#include <linux/init.h>
36
37#include <ib_verbs.h>
38#include <ib_cache.h>
39#include <ib_pack.h>
40
41#include "mthca_dev.h"
42#include "mthca_cmd.h"
43#include "mthca_memfree.h"
44
45enum {
46 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
47 MTHCA_ACK_REQ_FREQ = 10,
48 MTHCA_FLIGHT_LIMIT = 9,
49 MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */
50};
51
52enum {
53 MTHCA_QP_STATE_RST = 0,
54 MTHCA_QP_STATE_INIT = 1,
55 MTHCA_QP_STATE_RTR = 2,
56 MTHCA_QP_STATE_RTS = 3,
57 MTHCA_QP_STATE_SQE = 4,
58 MTHCA_QP_STATE_SQD = 5,
59 MTHCA_QP_STATE_ERR = 6,
60 MTHCA_QP_STATE_DRAINING = 7
61};
62
63enum {
64 MTHCA_QP_ST_RC = 0x0,
65 MTHCA_QP_ST_UC = 0x1,
66 MTHCA_QP_ST_RD = 0x2,
67 MTHCA_QP_ST_UD = 0x3,
68 MTHCA_QP_ST_MLX = 0x7
69};
70
71enum {
72 MTHCA_QP_PM_MIGRATED = 0x3,
73 MTHCA_QP_PM_ARMED = 0x0,
74 MTHCA_QP_PM_REARM = 0x1
75};
76
77enum {
78 /* qp_context flags */
79 MTHCA_QP_BIT_DE = 1 << 8,
80 /* params1 */
81 MTHCA_QP_BIT_SRE = 1 << 15,
82 MTHCA_QP_BIT_SWE = 1 << 14,
83 MTHCA_QP_BIT_SAE = 1 << 13,
84 MTHCA_QP_BIT_SIC = 1 << 4,
85 MTHCA_QP_BIT_SSC = 1 << 3,
86 /* params2 */
87 MTHCA_QP_BIT_RRE = 1 << 15,
88 MTHCA_QP_BIT_RWE = 1 << 14,
89 MTHCA_QP_BIT_RAE = 1 << 13,
90 MTHCA_QP_BIT_RIC = 1 << 4,
91 MTHCA_QP_BIT_RSC = 1 << 3
92};
93
94struct mthca_qp_path {
95 u32 port_pkey;
96 u8 rnr_retry;
97 u8 g_mylmc;
98 u16 rlid;
99 u8 ackto;
100 u8 mgid_index;
101 u8 static_rate;
102 u8 hop_limit;
103 u32 sl_tclass_flowlabel;
104 u8 rgid[16];
105} __attribute__((packed));
106
107struct mthca_qp_context {
108 u32 flags;
109 u32 tavor_sched_queue; /* Reserved on Arbel */
110 u8 mtu_msgmax;
111 u8 rq_size_stride; /* Reserved on Tavor */
112 u8 sq_size_stride; /* Reserved on Tavor */
113 u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */
114 u32 usr_page;
115 u32 local_qpn;
116 u32 remote_qpn;
117 u32 reserved1[2];
118 struct mthca_qp_path pri_path;
119 struct mthca_qp_path alt_path;
120 u32 rdd;
121 u32 pd;
122 u32 wqe_base;
123 u32 wqe_lkey;
124 u32 params1;
125 u32 reserved2;
126 u32 next_send_psn;
127 u32 cqn_snd;
128 u32 snd_wqe_base_l; /* Next send WQE on Tavor */
129 u32 snd_db_index; /* (debugging only entries) */
130 u32 last_acked_psn;
131 u32 ssn;
132 u32 params2;
133 u32 rnr_nextrecvpsn;
134 u32 ra_buff_indx;
135 u32 cqn_rcv;
136 u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */
137 u32 rcv_db_index; /* (debugging only entries) */
138 u32 qkey;
139 u32 srqn;
140 u32 rmsn;
141 u16 rq_wqe_counter; /* reserved on Tavor */
142 u16 sq_wqe_counter; /* reserved on Tavor */
143 u32 reserved3[18];
144} __attribute__((packed));
145
146struct mthca_qp_param {
147 u32 opt_param_mask;
148 u32 reserved1;
149 struct mthca_qp_context context;
150 u32 reserved2[62];
151} __attribute__((packed));
152
153enum {
154 MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
155 MTHCA_QP_OPTPAR_RRE = 1 << 1,
156 MTHCA_QP_OPTPAR_RAE = 1 << 2,
157 MTHCA_QP_OPTPAR_RWE = 1 << 3,
158 MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4,
159 MTHCA_QP_OPTPAR_Q_KEY = 1 << 5,
160 MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,
161 MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
162 MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8,
163 MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9,
164 MTHCA_QP_OPTPAR_PM_STATE = 1 << 10,
165 MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11,
166 MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12,
167 MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13,
168 MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
169 MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15,
170 MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16
171};
172
173enum {
174 MTHCA_OPCODE_NOP = 0x00,
175 MTHCA_OPCODE_RDMA_WRITE = 0x08,
176 MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
177 MTHCA_OPCODE_SEND = 0x0a,
178 MTHCA_OPCODE_SEND_IMM = 0x0b,
179 MTHCA_OPCODE_RDMA_READ = 0x10,
180 MTHCA_OPCODE_ATOMIC_CS = 0x11,
181 MTHCA_OPCODE_ATOMIC_FA = 0x12,
182 MTHCA_OPCODE_BIND_MW = 0x18,
183 MTHCA_OPCODE_INVALID = 0xff
184};
185
186enum {
187 MTHCA_NEXT_DBD = 1 << 7,
188 MTHCA_NEXT_FENCE = 1 << 6,
189 MTHCA_NEXT_CQ_UPDATE = 1 << 3,
190 MTHCA_NEXT_EVENT_GEN = 1 << 2,
191 MTHCA_NEXT_SOLICIT = 1 << 1,
192
193 MTHCA_MLX_VL15 = 1 << 17,
194 MTHCA_MLX_SLR = 1 << 16
195};
196
197struct mthca_next_seg {
198 u32 nda_op; /* [31:6] next WQE [4:0] next opcode */
199 u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
200 u32 flags; /* [3] CQ [2] Event [1] Solicit */
201 u32 imm; /* immediate data */
202};
203
204struct mthca_tavor_ud_seg {
205 u32 reserved1;
206 u32 lkey;
207 u64 av_addr;
208 u32 reserved2[4];
209 u32 dqpn;
210 u32 qkey;
211 u32 reserved3[2];
212};
213
214struct mthca_arbel_ud_seg {
215 u32 av[8];
216 u32 dqpn;
217 u32 qkey;
218 u32 reserved[2];
219};
220
221struct mthca_bind_seg {
222 u32 flags; /* [31] Atomic [30] rem write [29] rem read */
223 u32 reserved;
224 u32 new_rkey;
225 u32 lkey;
226 u64 addr;
227 u64 length;
228};
229
230struct mthca_raddr_seg {
231 u64 raddr;
232 u32 rkey;
233 u32 reserved;
234};
235
236struct mthca_atomic_seg {
237 u64 swap_add;
238 u64 compare;
239};
240
241struct mthca_data_seg {
242 u32 byte_count;
243 u32 lkey;
244 u64 addr;
245};
246
247struct mthca_mlx_seg {
248 u32 nda_op;
249 u32 nds;
250 u32 flags; /* [17] VL15 [16] SLR [14:12] static rate
251 [11:8] SL [3] C [2] E */
252 u16 rlid;
253 u16 vcrc;
254};
255
256static const u8 mthca_opcode[] = {
257 [IB_WR_SEND] = MTHCA_OPCODE_SEND,
258 [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
259 [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,
260 [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,
261 [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,
262 [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,
263 [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
264};
265
266static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
267{
268 return qp->qpn >= dev->qp_table.sqp_start &&
269 qp->qpn <= dev->qp_table.sqp_start + 3;
270}
271
272static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
273{
274 return qp->qpn >= dev->qp_table.sqp_start &&
275 qp->qpn <= dev->qp_table.sqp_start + 1;
276}
277
278static void *get_recv_wqe(struct mthca_qp *qp, int n)
279{
280 if (qp->is_direct)
281 return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
282 else
283 return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
284 ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
285}
286
287static void *get_send_wqe(struct mthca_qp *qp, int n)
288{
289 if (qp->is_direct)
290 return qp->queue.direct.buf + qp->send_wqe_offset +
291 (n << qp->sq.wqe_shift);
292 else
293 return qp->queue.page_list[(qp->send_wqe_offset +
294 (n << qp->sq.wqe_shift)) >>
295 PAGE_SHIFT].buf +
296 ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
297 (PAGE_SIZE - 1));
298}
299
300void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
301 enum ib_event_type event_type)
302{
303 struct mthca_qp *qp;
304 struct ib_event event;
305
306 spin_lock(&dev->qp_table.lock);
307 qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
308 if (qp)
309 atomic_inc(&qp->refcount);
310 spin_unlock(&dev->qp_table.lock);
311
312 if (!qp) {
313 mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
314 return;
315 }
316
317 event.device = &dev->ib_dev;
318 event.event = event_type;
319 event.element.qp = &qp->ibqp;
320 if (qp->ibqp.event_handler)
321 qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
322
323 if (atomic_dec_and_test(&qp->refcount))
324 wake_up(&qp->wait);
325}
326
327static int to_mthca_state(enum ib_qp_state ib_state)
328{
329 switch (ib_state) {
330 case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
331 case IB_QPS_INIT: return MTHCA_QP_STATE_INIT;
332 case IB_QPS_RTR: return MTHCA_QP_STATE_RTR;
333 case IB_QPS_RTS: return MTHCA_QP_STATE_RTS;
334 case IB_QPS_SQD: return MTHCA_QP_STATE_SQD;
335 case IB_QPS_SQE: return MTHCA_QP_STATE_SQE;
336 case IB_QPS_ERR: return MTHCA_QP_STATE_ERR;
337 default: return -1;
338 }
339}
340
341enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
342
343static int to_mthca_st(int transport)
344{
345 switch (transport) {
346 case RC: return MTHCA_QP_ST_RC;
347 case UC: return MTHCA_QP_ST_UC;
348 case UD: return MTHCA_QP_ST_UD;
349 case RD: return MTHCA_QP_ST_RD;
350 case MLX: return MTHCA_QP_ST_MLX;
351 default: return -1;
352 }
353}
354
355static const struct {
356 int trans;
357 u32 req_param[NUM_TRANS];
358 u32 opt_param[NUM_TRANS];
359} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
360 [IB_QPS_RESET] = {
361 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
362 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
363 [IB_QPS_INIT] = {
364 .trans = MTHCA_TRANS_RST2INIT,
365 .req_param = {
366 [UD] = (IB_QP_PKEY_INDEX |
367 IB_QP_PORT |
368 IB_QP_QKEY),
369 [RC] = (IB_QP_PKEY_INDEX |
370 IB_QP_PORT |
371 IB_QP_ACCESS_FLAGS),
372 [MLX] = (IB_QP_PKEY_INDEX |
373 IB_QP_QKEY),
374 },
375 /* bug-for-bug compatibility with VAPI: */
376 .opt_param = {
377 [MLX] = IB_QP_PORT
378 }
379 },
380 },
381 [IB_QPS_INIT] = {
382 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
383 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
384 [IB_QPS_INIT] = {
385 .trans = MTHCA_TRANS_INIT2INIT,
386 .opt_param = {
387 [UD] = (IB_QP_PKEY_INDEX |
388 IB_QP_PORT |
389 IB_QP_QKEY),
390 [RC] = (IB_QP_PKEY_INDEX |
391 IB_QP_PORT |
392 IB_QP_ACCESS_FLAGS),
393 [MLX] = (IB_QP_PKEY_INDEX |
394 IB_QP_QKEY),
395 }
396 },
397 [IB_QPS_RTR] = {
398 .trans = MTHCA_TRANS_INIT2RTR,
399 .req_param = {
400 [RC] = (IB_QP_AV |
401 IB_QP_PATH_MTU |
402 IB_QP_DEST_QPN |
403 IB_QP_RQ_PSN |
404 IB_QP_MAX_DEST_RD_ATOMIC |
405 IB_QP_MIN_RNR_TIMER),
406 },
407 .opt_param = {
408 [UD] = (IB_QP_PKEY_INDEX |
409 IB_QP_QKEY),
410 [RC] = (IB_QP_ALT_PATH |
411 IB_QP_ACCESS_FLAGS |
412 IB_QP_PKEY_INDEX),
413 [MLX] = (IB_QP_PKEY_INDEX |
414 IB_QP_QKEY),
415 }
416 }
417 },
418 [IB_QPS_RTR] = {
419 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
420 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
421 [IB_QPS_RTS] = {
422 .trans = MTHCA_TRANS_RTR2RTS,
423 .req_param = {
424 [UD] = IB_QP_SQ_PSN,
425 [RC] = (IB_QP_TIMEOUT |
426 IB_QP_RETRY_CNT |
427 IB_QP_RNR_RETRY |
428 IB_QP_SQ_PSN |
429 IB_QP_MAX_QP_RD_ATOMIC),
430 [MLX] = IB_QP_SQ_PSN,
431 },
432 .opt_param = {
433 [UD] = (IB_QP_CUR_STATE |
434 IB_QP_QKEY),
435 [RC] = (IB_QP_CUR_STATE |
436 IB_QP_ALT_PATH |
437 IB_QP_ACCESS_FLAGS |
438 IB_QP_PKEY_INDEX |
439 IB_QP_MIN_RNR_TIMER |
440 IB_QP_PATH_MIG_STATE),
441 [MLX] = (IB_QP_CUR_STATE |
442 IB_QP_QKEY),
443 }
444 }
445 },
446 [IB_QPS_RTS] = {
447 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
448 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
449 [IB_QPS_RTS] = {
450 .trans = MTHCA_TRANS_RTS2RTS,
451 .opt_param = {
452 [UD] = (IB_QP_CUR_STATE |
453 IB_QP_QKEY),
454 [RC] = (IB_QP_ACCESS_FLAGS |
455 IB_QP_ALT_PATH |
456 IB_QP_PATH_MIG_STATE |
457 IB_QP_MIN_RNR_TIMER),
458 [MLX] = (IB_QP_CUR_STATE |
459 IB_QP_QKEY),
460 }
461 },
462 [IB_QPS_SQD] = {
463 .trans = MTHCA_TRANS_RTS2SQD,
464 },
465 },
466 [IB_QPS_SQD] = {
467 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
468 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
469 [IB_QPS_RTS] = {
470 .trans = MTHCA_TRANS_SQD2RTS,
471 .opt_param = {
472 [UD] = (IB_QP_CUR_STATE |
473 IB_QP_QKEY),
474 [RC] = (IB_QP_CUR_STATE |
475 IB_QP_ALT_PATH |
476 IB_QP_ACCESS_FLAGS |
477 IB_QP_MIN_RNR_TIMER |
478 IB_QP_PATH_MIG_STATE),
479 [MLX] = (IB_QP_CUR_STATE |
480 IB_QP_QKEY),
481 }
482 },
483 [IB_QPS_SQD] = {
484 .trans = MTHCA_TRANS_SQD2SQD,
485 .opt_param = {
486 [UD] = (IB_QP_PKEY_INDEX |
487 IB_QP_QKEY),
488 [RC] = (IB_QP_AV |
489 IB_QP_TIMEOUT |
490 IB_QP_RETRY_CNT |
491 IB_QP_RNR_RETRY |
492 IB_QP_MAX_QP_RD_ATOMIC |
493 IB_QP_MAX_DEST_RD_ATOMIC |
494 IB_QP_CUR_STATE |
495 IB_QP_ALT_PATH |
496 IB_QP_ACCESS_FLAGS |
497 IB_QP_PKEY_INDEX |
498 IB_QP_MIN_RNR_TIMER |
499 IB_QP_PATH_MIG_STATE),
500 [MLX] = (IB_QP_PKEY_INDEX |
501 IB_QP_QKEY),
502 }
503 }
504 },
505 [IB_QPS_SQE] = {
506 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
507 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
508 [IB_QPS_RTS] = {
509 .trans = MTHCA_TRANS_SQERR2RTS,
510 .opt_param = {
511 [UD] = (IB_QP_CUR_STATE |
512 IB_QP_QKEY),
513 [RC] = (IB_QP_CUR_STATE |
514 IB_QP_MIN_RNR_TIMER),
515 [MLX] = (IB_QP_CUR_STATE |
516 IB_QP_QKEY),
517 }
518 }
519 },
520 [IB_QPS_ERR] = {
521 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
522 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
523 }
524};
525
526static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
527 int attr_mask)
528{
529 if (attr_mask & IB_QP_PKEY_INDEX)
530 sqp->pkey_index = attr->pkey_index;
531 if (attr_mask & IB_QP_QKEY)
532 sqp->qkey = attr->qkey;
533 if (attr_mask & IB_QP_SQ_PSN)
534 sqp->send_psn = attr->sq_psn;
535}
536
537static void init_port(struct mthca_dev *dev, int port)
538{
539 int err;
540 u8 status;
541 struct mthca_init_ib_param param;
542
543 memset(&param, 0, sizeof param);
544
545 param.enable_1x = 1;
546 param.enable_4x = 1;
547 param.vl_cap = dev->limits.vl_cap;
548 param.mtu_cap = dev->limits.mtu_cap;
549 param.gid_cap = dev->limits.gid_table_len;
550 param.pkey_cap = dev->limits.pkey_table_len;
551
552 err = mthca_INIT_IB(dev, &param, port, &status);
553 if (err)
554 mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
555 if (status)
556 mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
557}
558
559int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
560{
561 struct mthca_dev *dev = to_mdev(ibqp->device);
562 struct mthca_qp *qp = to_mqp(ibqp);
563 enum ib_qp_state cur_state, new_state;
564 void *mailbox = NULL;
565 struct mthca_qp_param *qp_param;
566 struct mthca_qp_context *qp_context;
567 u32 req_param, opt_param;
568 u8 status;
569 int err;
570
571 if (attr_mask & IB_QP_CUR_STATE) {
572 if (attr->cur_qp_state != IB_QPS_RTR &&
573 attr->cur_qp_state != IB_QPS_RTS &&
574 attr->cur_qp_state != IB_QPS_SQD &&
575 attr->cur_qp_state != IB_QPS_SQE)
576 return -EINVAL;
577 else
578 cur_state = attr->cur_qp_state;
579 } else {
580 spin_lock_irq(&qp->sq.lock);
581 spin_lock(&qp->rq.lock);
582 cur_state = qp->state;
583 spin_unlock(&qp->rq.lock);
584 spin_unlock_irq(&qp->sq.lock);
585 }
586
587 if (attr_mask & IB_QP_STATE) {
588 if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
589 return -EINVAL;
590 new_state = attr->qp_state;
591 } else
592 new_state = cur_state;
593
594 if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
595 mthca_dbg(dev, "Illegal QP transition "
596 "%d->%d\n", cur_state, new_state);
597 return -EINVAL;
598 }
599
600 req_param = state_table[cur_state][new_state].req_param[qp->transport];
601 opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
602
603 if ((req_param & attr_mask) != req_param) {
604 mthca_dbg(dev, "QP transition "
605 "%d->%d missing req attr 0x%08x\n",
606 cur_state, new_state,
607 req_param & ~attr_mask);
608 return -EINVAL;
609 }
610
611 if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
612 mthca_dbg(dev, "QP transition (transport %d) "
613 "%d->%d has extra attr 0x%08x\n",
614 qp->transport,
615 cur_state, new_state,
616 attr_mask & ~(req_param | opt_param |
617 IB_QP_STATE));
618 return -EINVAL;
619 }
620
621 mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
622 if (!mailbox)
623 return -ENOMEM;
624 qp_param = MAILBOX_ALIGN(mailbox);
625 qp_context = &qp_param->context;
626 memset(qp_param, 0, sizeof *qp_param);
627
628 qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) |
629 (to_mthca_st(qp->transport) << 16));
630 qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE);
631 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
632 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
633 else {
634 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
635 switch (attr->path_mig_state) {
636 case IB_MIG_MIGRATED:
637 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
638 break;
639 case IB_MIG_REARM:
640 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
641 break;
642 case IB_MIG_ARMED:
643 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
644 break;
645 }
646 }
647
648 /* leave tavor_sched_queue as 0 */
649
650 if (qp->transport == MLX || qp->transport == UD)
651 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
652 else if (attr_mask & IB_QP_PATH_MTU)
653 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
654
655 if (dev->hca_type == ARBEL_NATIVE) {
656 qp_context->rq_size_stride =
657 ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
658 qp_context->sq_size_stride =
659 ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
660 }
661
662 /* leave arbel_sched_queue as 0 */
663
664 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
665 qp_context->local_qpn = cpu_to_be32(qp->qpn);
666 if (attr_mask & IB_QP_DEST_QPN) {
667 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
668 }
669
670 if (qp->transport == MLX)
671 qp_context->pri_path.port_pkey |=
672 cpu_to_be32(to_msqp(qp)->port << 24);
673 else {
674 if (attr_mask & IB_QP_PORT) {
675 qp_context->pri_path.port_pkey |=
676 cpu_to_be32(attr->port_num << 24);
677 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
678 }
679 }
680
681 if (attr_mask & IB_QP_PKEY_INDEX) {
682 qp_context->pri_path.port_pkey |=
683 cpu_to_be32(attr->pkey_index);
684 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
685 }
686
687 if (attr_mask & IB_QP_RNR_RETRY) {
688 qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
689 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
690 }
691
692 if (attr_mask & IB_QP_AV) {
693 qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f;
694 qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
695 qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
696 if (attr->ah_attr.ah_flags & IB_AH_GRH) {
697 qp_context->pri_path.g_mylmc |= 1 << 7;
698 qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
699 qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
700 qp_context->pri_path.sl_tclass_flowlabel =
701 cpu_to_be32((attr->ah_attr.sl << 28) |
702 (attr->ah_attr.grh.traffic_class << 20) |
703 (attr->ah_attr.grh.flow_label));
704 memcpy(qp_context->pri_path.rgid,
705 attr->ah_attr.grh.dgid.raw, 16);
706 } else {
707 qp_context->pri_path.sl_tclass_flowlabel =
708 cpu_to_be32(attr->ah_attr.sl << 28);
709 }
710 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
711 }
712
713 if (attr_mask & IB_QP_TIMEOUT) {
714 qp_context->pri_path.ackto = attr->timeout;
715 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
716 }
717
718 /* XXX alt_path */
719
720 /* leave rdd as 0 */
721 qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
722 /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
723 qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);
724 qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
725 (MTHCA_FLIGHT_LIMIT << 24) |
726 MTHCA_QP_BIT_SRE |
727 MTHCA_QP_BIT_SWE |
728 MTHCA_QP_BIT_SAE);
729 if (qp->sq_policy == IB_SIGNAL_ALL_WR)
730 qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
731 if (attr_mask & IB_QP_RETRY_CNT) {
732 qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
733 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
734 }
735
736 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
737 qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
738 ffs(attr->max_dest_rd_atomic) - 1 : 0,
739 7) << 21);
740 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
741 }
742
743 if (attr_mask & IB_QP_SQ_PSN)
744 qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
745 qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
746
747 if (dev->hca_type == ARBEL_NATIVE) {
748 qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
749 qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
750 }
751
752 if (attr_mask & IB_QP_ACCESS_FLAGS) {
753 /*
754 * Only enable RDMA/atomics if we have responder
755 * resources set to a non-zero value.
756 */
757 if (qp->resp_depth) {
758 qp_context->params2 |=
759 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
760 MTHCA_QP_BIT_RWE : 0);
761 qp_context->params2 |=
762 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
763 MTHCA_QP_BIT_RRE : 0);
764 qp_context->params2 |=
765 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
766 MTHCA_QP_BIT_RAE : 0);
767 }
768
769 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
770 MTHCA_QP_OPTPAR_RRE |
771 MTHCA_QP_OPTPAR_RAE);
772
773 qp->atomic_rd_en = attr->qp_access_flags;
774 }
775
776 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
777 u8 rra_max;
778
779 if (qp->resp_depth && !attr->max_rd_atomic) {
780 /*
781 * Lowering our responder resources to zero.
782 * Turn off RDMA/atomics as responder.
783 * (RWE/RRE/RAE in params2 already zero)
784 */
785 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
786 MTHCA_QP_OPTPAR_RRE |
787 MTHCA_QP_OPTPAR_RAE);
788 }
789
790 if (!qp->resp_depth && attr->max_rd_atomic) {
791 /*
792 * Increasing our responder resources from
793 * zero. Turn on RDMA/atomics as appropriate.
794 */
795 qp_context->params2 |=
796 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
797 MTHCA_QP_BIT_RWE : 0);
798 qp_context->params2 |=
799 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
800 MTHCA_QP_BIT_RRE : 0);
801 qp_context->params2 |=
802 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
803 MTHCA_QP_BIT_RAE : 0);
804
805 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
806 MTHCA_QP_OPTPAR_RRE |
807 MTHCA_QP_OPTPAR_RAE);
808 }
809
810 for (rra_max = 0;
811 1 << rra_max < attr->max_rd_atomic &&
812 rra_max < dev->qp_table.rdb_shift;
813 ++rra_max)
814 ; /* nothing */
815
816 qp_context->params2 |= cpu_to_be32(rra_max << 21);
817 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
818
819 qp->resp_depth = attr->max_rd_atomic;
820 }
821
822 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
823
824 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
825 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
826 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
827 }
828 if (attr_mask & IB_QP_RQ_PSN)
829 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
830
831 qp_context->ra_buff_indx =
832 cpu_to_be32(dev->qp_table.rdb_base +
833 ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
834 dev->qp_table.rdb_shift));
835
836 qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
837
838 if (dev->hca_type == ARBEL_NATIVE)
839 qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index);
840
841 if (attr_mask & IB_QP_QKEY) {
842 qp_context->qkey = cpu_to_be32(attr->qkey);
843 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
844 }
845
846 err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
847 qp->qpn, 0, qp_param, 0, &status);
848 if (status) {
849 mthca_warn(dev, "modify QP %d returned status %02x.\n",
850 state_table[cur_state][new_state].trans, status);
851 err = -EINVAL;
852 }
853
854 if (!err)
855 qp->state = new_state;
856
857 kfree(mailbox);
858
859 if (is_sqp(dev, qp))
860 store_attrs(to_msqp(qp), attr, attr_mask);
861
862 /*
863 * If we are moving QP0 to RTR, bring the IB link up; if we
864 * are moving QP0 to RESET or ERROR, bring the link back down.
865 */
866 if (is_qp0(dev, qp)) {
867 if (cur_state != IB_QPS_RTR &&
868 new_state == IB_QPS_RTR)
869 init_port(dev, to_msqp(qp)->port);
870
871 if (cur_state != IB_QPS_RESET &&
872 cur_state != IB_QPS_ERR &&
873 (new_state == IB_QPS_RESET ||
874 new_state == IB_QPS_ERR))
875 mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
876 }
877
878 return err;
879}
880
881/*
882 * Allocate and register buffer for WQEs. qp->rq.max, sq.max,
883 * rq.max_gs and sq.max_gs must all be assigned.
884 * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
885 * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
886 * queue)
887 */
888static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
889 struct mthca_pd *pd,
890 struct mthca_qp *qp)
891{
892 int size;
893 int i;
894 int npages, shift;
895 dma_addr_t t;
896 u64 *dma_list = NULL;
897 int err = -ENOMEM;
898
899 size = sizeof (struct mthca_next_seg) +
900 qp->rq.max_gs * sizeof (struct mthca_data_seg);
901
902 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
903 qp->rq.wqe_shift++)
904 ; /* nothing */
905
906 size = sizeof (struct mthca_next_seg) +
907 qp->sq.max_gs * sizeof (struct mthca_data_seg);
908 switch (qp->transport) {
909 case MLX:
910 size += 2 * sizeof (struct mthca_data_seg);
911 break;
912 case UD:
913 if (dev->hca_type == ARBEL_NATIVE)
914 size += sizeof (struct mthca_arbel_ud_seg);
915 else
916 size += sizeof (struct mthca_tavor_ud_seg);
917 break;
918 default:
919 /* bind seg is as big as atomic + raddr segs */
920 size += sizeof (struct mthca_bind_seg);
921 }
922
923 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
924 qp->sq.wqe_shift++)
925 ; /* nothing */
926
927 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
928 1 << qp->sq.wqe_shift);
929 size = PAGE_ALIGN(qp->send_wqe_offset +
930 (qp->sq.max << qp->sq.wqe_shift));
931
932 qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
933 GFP_KERNEL);
934 if (!qp->wrid)
935 goto err_out;
936
937 if (size <= MTHCA_MAX_DIRECT_QP_SIZE) {
938 qp->is_direct = 1;
939 npages = 1;
940 shift = get_order(size) + PAGE_SHIFT;
941
942 if (0)
943 mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
944 size, shift);
945
946 qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
947 if (!qp->queue.direct.buf)
948 goto err_out;
949
950 pci_unmap_addr_set(&qp->queue.direct, mapping, t);
951
952 memset(qp->queue.direct.buf, 0, size);
953
954 while (t & ((1 << shift) - 1)) {
955 --shift;
956 npages *= 2;
957 }
958
959 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
960 if (!dma_list)
961 goto err_out_free;
962
963 for (i = 0; i < npages; ++i)
964 dma_list[i] = t + i * (1 << shift);
965 } else {
966 qp->is_direct = 0;
967 npages = size / PAGE_SIZE;
968 shift = PAGE_SHIFT;
969
970 if (0)
971 mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages);
972
973 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
974 if (!dma_list)
975 goto err_out;
976
977 qp->queue.page_list = kmalloc(npages *
978 sizeof *qp->queue.page_list,
979 GFP_KERNEL);
980 if (!qp->queue.page_list)
981 goto err_out;
982
983 for (i = 0; i < npages; ++i) {
984 qp->queue.page_list[i].buf =
985 pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
986 if (!qp->queue.page_list[i].buf)
987 goto err_out_free;
988
989 memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE);
990
991 pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t);
992 dma_list[i] = t;
993 }
994 }
995
996 err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift,
997 npages, 0, size,
998 MTHCA_MPT_FLAG_LOCAL_READ,
999 &qp->mr);
1000 if (err)
1001 goto err_out_free;
1002
1003 kfree(dma_list);
1004 return 0;
1005
1006 err_out_free:
1007 if (qp->is_direct) {
1008 pci_free_consistent(dev->pdev, size,
1009 qp->queue.direct.buf,
1010 pci_unmap_addr(&qp->queue.direct, mapping));
1011 } else
1012 for (i = 0; i < npages; ++i) {
1013 if (qp->queue.page_list[i].buf)
1014 pci_free_consistent(dev->pdev, PAGE_SIZE,
1015 qp->queue.page_list[i].buf,
1016 pci_unmap_addr(&qp->queue.page_list[i],
1017 mapping));
1018
1019 }
1020
1021 err_out:
1022 kfree(qp->wrid);
1023 kfree(dma_list);
1024 return err;
1025}
1026
1027static int mthca_alloc_memfree(struct mthca_dev *dev,
1028 struct mthca_qp *qp)
1029{
1030 int ret = 0;
1031
1032 if (dev->hca_type == ARBEL_NATIVE) {
1033 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
1034 if (ret)
1035 return ret;
1036
1037 ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
1038 if (ret)
1039 goto err_qpc;
1040
1041 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1042 qp->qpn, &qp->rq.db);
1043 if (qp->rq.db_index < 0) {
1044 ret = -ENOMEM;
1045 goto err_eqpc;
1046 }
1047
1048 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1049 qp->qpn, &qp->sq.db);
1050 if (qp->sq.db_index < 0) {
1051 ret = -ENOMEM;
1052 goto err_rq_db;
1053 }
1054 }
1055
1056 return 0;
1057
1058err_rq_db:
1059 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1060
1061err_eqpc:
1062 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1063
1064err_qpc:
1065 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1066
1067 return ret;
1068}
1069
1070static void mthca_free_memfree(struct mthca_dev *dev,
1071 struct mthca_qp *qp)
1072{
1073 if (dev->hca_type == ARBEL_NATIVE) {
1074 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1075 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1076 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1077 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1078 }
1079}
1080
1081static void mthca_wq_init(struct mthca_wq* wq)
1082{
1083 spin_lock_init(&wq->lock);
1084 wq->next_ind = 0;
1085 wq->last_comp = wq->max - 1;
1086 wq->head = 0;
1087 wq->tail = 0;
1088 wq->last = NULL;
1089}
1090
1091static int mthca_alloc_qp_common(struct mthca_dev *dev,
1092 struct mthca_pd *pd,
1093 struct mthca_cq *send_cq,
1094 struct mthca_cq *recv_cq,
1095 enum ib_sig_type send_policy,
1096 struct mthca_qp *qp)
1097{
1098 struct mthca_next_seg *wqe;
1099 int ret;
1100 int i;
1101
1102 atomic_set(&qp->refcount, 1);
1103 qp->state = IB_QPS_RESET;
1104 qp->atomic_rd_en = 0;
1105 qp->resp_depth = 0;
1106 qp->sq_policy = send_policy;
1107 mthca_wq_init(&qp->sq);
1108 mthca_wq_init(&qp->rq);
1109
1110 ret = mthca_alloc_memfree(dev, qp);
1111 if (ret)
1112 return ret;
1113
1114 ret = mthca_alloc_wqe_buf(dev, pd, qp);
1115 if (ret) {
1116 mthca_free_memfree(dev, qp);
1117 return ret;
1118 }
1119
1120 if (dev->hca_type == ARBEL_NATIVE) {
1121 for (i = 0; i < qp->rq.max; ++i) {
1122 wqe = get_recv_wqe(qp, i);
1123 wqe->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
1124 qp->rq.wqe_shift);
1125 wqe->ee_nds = cpu_to_be32(1 << (qp->rq.wqe_shift - 4));
1126 }
1127
1128 for (i = 0; i < qp->sq.max; ++i) {
1129 wqe = get_send_wqe(qp, i);
1130 wqe->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
1131 qp->sq.wqe_shift) +
1132 qp->send_wqe_offset);
1133 }
1134 }
1135
1136 return 0;
1137}
1138
1139static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
1140{
1141 int i;
1142
1143 if (dev->hca_type != ARBEL_NATIVE)
1144 return;
1145
1146 for (i = 0; 1 << i < qp->rq.max; ++i)
1147 ; /* nothing */
1148
1149 qp->rq.max = 1 << i;
1150
1151 for (i = 0; 1 << i < qp->sq.max; ++i)
1152 ; /* nothing */
1153
1154 qp->sq.max = 1 << i;
1155}
1156
1157int mthca_alloc_qp(struct mthca_dev *dev,
1158 struct mthca_pd *pd,
1159 struct mthca_cq *send_cq,
1160 struct mthca_cq *recv_cq,
1161 enum ib_qp_type type,
1162 enum ib_sig_type send_policy,
1163 struct mthca_qp *qp)
1164{
1165 int err;
1166
1167 mthca_align_qp_size(dev, qp);
1168
1169 switch (type) {
1170 case IB_QPT_RC: qp->transport = RC; break;
1171 case IB_QPT_UC: qp->transport = UC; break;
1172 case IB_QPT_UD: qp->transport = UD; break;
1173 default: return -EINVAL;
1174 }
1175
1176 qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1177 if (qp->qpn == -1)
1178 return -ENOMEM;
1179
1180 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1181 send_policy, qp);
1182 if (err) {
1183 mthca_free(&dev->qp_table.alloc, qp->qpn);
1184 return err;
1185 }
1186
1187 spin_lock_irq(&dev->qp_table.lock);
1188 mthca_array_set(&dev->qp_table.qp,
1189 qp->qpn & (dev->limits.num_qps - 1), qp);
1190 spin_unlock_irq(&dev->qp_table.lock);
1191
1192 return 0;
1193}
1194
1195int mthca_alloc_sqp(struct mthca_dev *dev,
1196 struct mthca_pd *pd,
1197 struct mthca_cq *send_cq,
1198 struct mthca_cq *recv_cq,
1199 enum ib_sig_type send_policy,
1200 int qpn,
1201 int port,
1202 struct mthca_sqp *sqp)
1203{
1204 int err = 0;
1205 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1206
1207 mthca_align_qp_size(dev, &sqp->qp);
1208
1209 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
1210 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
1211 &sqp->header_dma, GFP_KERNEL);
1212 if (!sqp->header_buf)
1213 return -ENOMEM;
1214
1215 spin_lock_irq(&dev->qp_table.lock);
1216 if (mthca_array_get(&dev->qp_table.qp, mqpn))
1217 err = -EBUSY;
1218 else
1219 mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
1220 spin_unlock_irq(&dev->qp_table.lock);
1221
1222 if (err)
1223 goto err_out;
1224
1225 sqp->port = port;
1226 sqp->qp.qpn = mqpn;
1227 sqp->qp.transport = MLX;
1228
1229 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1230 send_policy, &sqp->qp);
1231 if (err)
1232 goto err_out_free;
1233
1234 atomic_inc(&pd->sqp_count);
1235
1236 return 0;
1237
1238 err_out_free:
1239 /*
1240 * Lock CQs here, so that CQ polling code can do QP lookup
1241 * without taking a lock.
1242 */
1243 spin_lock_irq(&send_cq->lock);
1244 if (send_cq != recv_cq)
1245 spin_lock(&recv_cq->lock);
1246
1247 spin_lock(&dev->qp_table.lock);
1248 mthca_array_clear(&dev->qp_table.qp, mqpn);
1249 spin_unlock(&dev->qp_table.lock);
1250
1251 if (send_cq != recv_cq)
1252 spin_unlock(&recv_cq->lock);
1253 spin_unlock_irq(&send_cq->lock);
1254
1255 err_out:
1256 dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
1257 sqp->header_buf, sqp->header_dma);
1258
1259 return err;
1260}
1261
1262void mthca_free_qp(struct mthca_dev *dev,
1263 struct mthca_qp *qp)
1264{
1265 u8 status;
1266 int size;
1267 int i;
1268 struct mthca_cq *send_cq;
1269 struct mthca_cq *recv_cq;
1270
1271 send_cq = to_mcq(qp->ibqp.send_cq);
1272 recv_cq = to_mcq(qp->ibqp.recv_cq);
1273
1274 /*
1275 * Lock CQs here, so that CQ polling code can do QP lookup
1276 * without taking a lock.
1277 */
1278 spin_lock_irq(&send_cq->lock);
1279 if (send_cq != recv_cq)
1280 spin_lock(&recv_cq->lock);
1281
1282 spin_lock(&dev->qp_table.lock);
1283 mthca_array_clear(&dev->qp_table.qp,
1284 qp->qpn & (dev->limits.num_qps - 1));
1285 spin_unlock(&dev->qp_table.lock);
1286
1287 if (send_cq != recv_cq)
1288 spin_unlock(&recv_cq->lock);
1289 spin_unlock_irq(&send_cq->lock);
1290
1291 atomic_dec(&qp->refcount);
1292 wait_event(qp->wait, !atomic_read(&qp->refcount));
1293
1294 if (qp->state != IB_QPS_RESET)
1295 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1296
1297 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
1298 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1299 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
1300
1301 mthca_free_mr(dev, &qp->mr);
1302
1303 size = PAGE_ALIGN(qp->send_wqe_offset +
1304 (qp->sq.max << qp->sq.wqe_shift));
1305
1306 if (qp->is_direct) {
1307 pci_free_consistent(dev->pdev, size,
1308 qp->queue.direct.buf,
1309 pci_unmap_addr(&qp->queue.direct, mapping));
1310 } else {
1311 for (i = 0; i < size / PAGE_SIZE; ++i) {
1312 pci_free_consistent(dev->pdev, PAGE_SIZE,
1313 qp->queue.page_list[i].buf,
1314 pci_unmap_addr(&qp->queue.page_list[i],
1315 mapping));
1316 }
1317 }
1318
1319 kfree(qp->wrid);
1320
1321 mthca_free_memfree(dev, qp);
1322
1323 if (is_sqp(dev, qp)) {
1324 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
1325 dma_free_coherent(&dev->pdev->dev,
1326 to_msqp(qp)->header_buf_size,
1327 to_msqp(qp)->header_buf,
1328 to_msqp(qp)->header_dma);
1329 } else
1330 mthca_free(&dev->qp_table.alloc, qp->qpn);
1331}
1332
1333/* Create UD header for an MLX send and build a data segment for it */
1334static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1335 int ind, struct ib_send_wr *wr,
1336 struct mthca_mlx_seg *mlx,
1337 struct mthca_data_seg *data)
1338{
1339 int header_size;
1340 int err;
1341
1342 ib_ud_header_init(256, /* assume a MAD */
1343 sqp->ud_header.grh_present,
1344 &sqp->ud_header);
1345
1346 err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
1347 if (err)
1348 return err;
1349 mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
1350 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
1351 (sqp->ud_header.lrh.destination_lid == 0xffff ?
1352 MTHCA_MLX_SLR : 0) |
1353 (sqp->ud_header.lrh.service_level << 8));
1354 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1355 mlx->vcrc = 0;
1356
1357 switch (wr->opcode) {
1358 case IB_WR_SEND:
1359 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1360 sqp->ud_header.immediate_present = 0;
1361 break;
1362 case IB_WR_SEND_WITH_IMM:
1363 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1364 sqp->ud_header.immediate_present = 1;
1365 sqp->ud_header.immediate_data = wr->imm_data;
1366 break;
1367 default:
1368 return -EINVAL;
1369 }
1370
1371 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1372 if (sqp->ud_header.lrh.destination_lid == 0xffff)
1373 sqp->ud_header.lrh.source_lid = 0xffff;
1374 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1375 if (!sqp->qp.ibqp.qp_num)
1376 ib_get_cached_pkey(&dev->ib_dev, sqp->port,
1377 sqp->pkey_index,
1378 &sqp->ud_header.bth.pkey);
1379 else
1380 ib_get_cached_pkey(&dev->ib_dev, sqp->port,
1381 wr->wr.ud.pkey_index,
1382 &sqp->ud_header.bth.pkey);
1383 cpu_to_be16s(&sqp->ud_header.bth.pkey);
1384 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1385 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1386 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
1387 sqp->qkey : wr->wr.ud.remote_qkey);
1388 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
1389
1390 header_size = ib_ud_header_pack(&sqp->ud_header,
1391 sqp->header_buf +
1392 ind * MTHCA_UD_HEADER_SIZE);
1393
1394 data->byte_count = cpu_to_be32(header_size);
1395 data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
1396 data->addr = cpu_to_be64(sqp->header_dma +
1397 ind * MTHCA_UD_HEADER_SIZE);
1398
1399 return 0;
1400}
1401
1402static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1403 struct ib_cq *ib_cq)
1404{
1405 unsigned cur;
1406 struct mthca_cq *cq;
1407
1408 cur = wq->head - wq->tail;
1409 if (likely(cur + nreq < wq->max))
1410 return 0;
1411
1412 cq = to_mcq(ib_cq);
1413 spin_lock(&cq->lock);
1414 cur = wq->head - wq->tail;
1415 spin_unlock(&cq->lock);
1416
1417 return cur + nreq >= wq->max;
1418}
1419
1420int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1421 struct ib_send_wr **bad_wr)
1422{
1423 struct mthca_dev *dev = to_mdev(ibqp->device);
1424 struct mthca_qp *qp = to_mqp(ibqp);
1425 void *wqe;
1426 void *prev_wqe;
1427 unsigned long flags;
1428 int err = 0;
1429 int nreq;
1430 int i;
1431 int size;
1432 int size0 = 0;
1433 u32 f0 = 0;
1434 int ind;
1435 u8 op0 = 0;
1436
1437 spin_lock_irqsave(&qp->sq.lock, flags);
1438
1439 /* XXX check that state is OK to post send */
1440
1441 ind = qp->sq.next_ind;
1442
1443 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1444 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1445 mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1446 " %d max, %d nreq)\n", qp->qpn,
1447 qp->sq.head, qp->sq.tail,
1448 qp->sq.max, nreq);
1449 err = -ENOMEM;
1450 *bad_wr = wr;
1451 goto out;
1452 }
1453
1454 wqe = get_send_wqe(qp, ind);
1455 prev_wqe = qp->sq.last;
1456 qp->sq.last = wqe;
1457
1458 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1459 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
1460 ((struct mthca_next_seg *) wqe)->flags =
1461 ((wr->send_flags & IB_SEND_SIGNALED) ?
1462 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1463 ((wr->send_flags & IB_SEND_SOLICITED) ?
1464 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
1465 cpu_to_be32(1);
1466 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1467 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1468 ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
1469
1470 wqe += sizeof (struct mthca_next_seg);
1471 size = sizeof (struct mthca_next_seg) / 16;
1472
1473 switch (qp->transport) {
1474 case RC:
1475 switch (wr->opcode) {
1476 case IB_WR_ATOMIC_CMP_AND_SWP:
1477 case IB_WR_ATOMIC_FETCH_AND_ADD:
1478 ((struct mthca_raddr_seg *) wqe)->raddr =
1479 cpu_to_be64(wr->wr.atomic.remote_addr);
1480 ((struct mthca_raddr_seg *) wqe)->rkey =
1481 cpu_to_be32(wr->wr.atomic.rkey);
1482 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1483
1484 wqe += sizeof (struct mthca_raddr_seg);
1485
1486 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1487 ((struct mthca_atomic_seg *) wqe)->swap_add =
1488 cpu_to_be64(wr->wr.atomic.swap);
1489 ((struct mthca_atomic_seg *) wqe)->compare =
1490 cpu_to_be64(wr->wr.atomic.compare_add);
1491 } else {
1492 ((struct mthca_atomic_seg *) wqe)->swap_add =
1493 cpu_to_be64(wr->wr.atomic.compare_add);
1494 ((struct mthca_atomic_seg *) wqe)->compare = 0;
1495 }
1496
1497 wqe += sizeof (struct mthca_atomic_seg);
1498 size += sizeof (struct mthca_raddr_seg) / 16 +
1499 sizeof (struct mthca_atomic_seg);
1500 break;
1501
1502 case IB_WR_RDMA_WRITE:
1503 case IB_WR_RDMA_WRITE_WITH_IMM:
1504 case IB_WR_RDMA_READ:
1505 ((struct mthca_raddr_seg *) wqe)->raddr =
1506 cpu_to_be64(wr->wr.rdma.remote_addr);
1507 ((struct mthca_raddr_seg *) wqe)->rkey =
1508 cpu_to_be32(wr->wr.rdma.rkey);
1509 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1510 wqe += sizeof (struct mthca_raddr_seg);
1511 size += sizeof (struct mthca_raddr_seg) / 16;
1512 break;
1513
1514 default:
1515 /* No extra segments required for sends */
1516 break;
1517 }
1518
1519 break;
1520
1521 case UD:
1522 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
1523 cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
1524 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
1525 cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
1526 ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
1527 cpu_to_be32(wr->wr.ud.remote_qpn);
1528 ((struct mthca_tavor_ud_seg *) wqe)->qkey =
1529 cpu_to_be32(wr->wr.ud.remote_qkey);
1530
1531 wqe += sizeof (struct mthca_tavor_ud_seg);
1532 size += sizeof (struct mthca_tavor_ud_seg) / 16;
1533 break;
1534
1535 case MLX:
1536 err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1537 wqe - sizeof (struct mthca_next_seg),
1538 wqe);
1539 if (err) {
1540 *bad_wr = wr;
1541 goto out;
1542 }
1543 wqe += sizeof (struct mthca_data_seg);
1544 size += sizeof (struct mthca_data_seg) / 16;
1545 break;
1546 }
1547
1548 if (wr->num_sge > qp->sq.max_gs) {
1549 mthca_err(dev, "too many gathers\n");
1550 err = -EINVAL;
1551 *bad_wr = wr;
1552 goto out;
1553 }
1554
1555 for (i = 0; i < wr->num_sge; ++i) {
1556 ((struct mthca_data_seg *) wqe)->byte_count =
1557 cpu_to_be32(wr->sg_list[i].length);
1558 ((struct mthca_data_seg *) wqe)->lkey =
1559 cpu_to_be32(wr->sg_list[i].lkey);
1560 ((struct mthca_data_seg *) wqe)->addr =
1561 cpu_to_be64(wr->sg_list[i].addr);
1562 wqe += sizeof (struct mthca_data_seg);
1563 size += sizeof (struct mthca_data_seg) / 16;
1564 }
1565
1566 /* Add one more inline data segment for ICRC */
1567 if (qp->transport == MLX) {
1568 ((struct mthca_data_seg *) wqe)->byte_count =
1569 cpu_to_be32((1 << 31) | 4);
1570 ((u32 *) wqe)[1] = 0;
1571 wqe += sizeof (struct mthca_data_seg);
1572 size += sizeof (struct mthca_data_seg) / 16;
1573 }
1574
1575 qp->wrid[ind + qp->rq.max] = wr->wr_id;
1576
1577 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
1578 mthca_err(dev, "opcode invalid\n");
1579 err = -EINVAL;
1580 *bad_wr = wr;
1581 goto out;
1582 }
1583
1584 if (prev_wqe) {
1585 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1586 cpu_to_be32(((ind << qp->sq.wqe_shift) +
1587 qp->send_wqe_offset) |
1588 mthca_opcode[wr->opcode]);
1589 wmb();
1590 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1591 cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
1592 }
1593
1594 if (!size0) {
1595 size0 = size;
1596 op0 = mthca_opcode[wr->opcode];
1597 }
1598
1599 ++ind;
1600 if (unlikely(ind >= qp->sq.max))
1601 ind -= qp->sq.max;
1602 }
1603
1604out:
1605 if (likely(nreq)) {
1606 u32 doorbell[2];
1607
1608 doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1609 qp->send_wqe_offset) | f0 | op0);
1610 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1611
1612 wmb();
1613
1614 mthca_write64(doorbell,
1615 dev->kar + MTHCA_SEND_DOORBELL,
1616 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1617 }
1618
1619 qp->sq.next_ind = ind;
1620 qp->sq.head += nreq;
1621
1622 spin_unlock_irqrestore(&qp->sq.lock, flags);
1623 return err;
1624}
1625
1626int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1627 struct ib_recv_wr **bad_wr)
1628{
1629 struct mthca_dev *dev = to_mdev(ibqp->device);
1630 struct mthca_qp *qp = to_mqp(ibqp);
1631 unsigned long flags;
1632 int err = 0;
1633 int nreq;
1634 int i;
1635 int size;
1636 int size0 = 0;
1637 int ind;
1638 void *wqe;
1639 void *prev_wqe;
1640
1641 spin_lock_irqsave(&qp->rq.lock, flags);
1642
1643 /* XXX check that state is OK to post receive */
1644
1645 ind = qp->rq.next_ind;
1646
1647 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1648 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1649 mthca_err(dev, "RQ %06x full (%u head, %u tail,"
1650 " %d max, %d nreq)\n", qp->qpn,
1651 qp->rq.head, qp->rq.tail,
1652 qp->rq.max, nreq);
1653 err = -ENOMEM;
1654 *bad_wr = wr;
1655 goto out;
1656 }
1657
1658 wqe = get_recv_wqe(qp, ind);
1659 prev_wqe = qp->rq.last;
1660 qp->rq.last = wqe;
1661
1662 ((struct mthca_next_seg *) wqe)->nda_op = 0;
1663 ((struct mthca_next_seg *) wqe)->ee_nds =
1664 cpu_to_be32(MTHCA_NEXT_DBD);
1665 ((struct mthca_next_seg *) wqe)->flags = 0;
1666
1667 wqe += sizeof (struct mthca_next_seg);
1668 size = sizeof (struct mthca_next_seg) / 16;
1669
1670 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1671 err = -EINVAL;
1672 *bad_wr = wr;
1673 goto out;
1674 }
1675
1676 for (i = 0; i < wr->num_sge; ++i) {
1677 ((struct mthca_data_seg *) wqe)->byte_count =
1678 cpu_to_be32(wr->sg_list[i].length);
1679 ((struct mthca_data_seg *) wqe)->lkey =
1680 cpu_to_be32(wr->sg_list[i].lkey);
1681 ((struct mthca_data_seg *) wqe)->addr =
1682 cpu_to_be64(wr->sg_list[i].addr);
1683 wqe += sizeof (struct mthca_data_seg);
1684 size += sizeof (struct mthca_data_seg) / 16;
1685 }
1686
1687 qp->wrid[ind] = wr->wr_id;
1688
1689 if (likely(prev_wqe)) {
1690 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1691 cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
1692 wmb();
1693 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1694 cpu_to_be32(MTHCA_NEXT_DBD | size);
1695 }
1696
1697 if (!size0)
1698 size0 = size;
1699
1700 ++ind;
1701 if (unlikely(ind >= qp->rq.max))
1702 ind -= qp->rq.max;
1703 }
1704
1705out:
1706 if (likely(nreq)) {
1707 u32 doorbell[2];
1708
1709 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1710 doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
1711
1712 wmb();
1713
1714 mthca_write64(doorbell,
1715 dev->kar + MTHCA_RECEIVE_DOORBELL,
1716 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1717 }
1718
1719 qp->rq.next_ind = ind;
1720 qp->rq.head += nreq;
1721
1722 spin_unlock_irqrestore(&qp->rq.lock, flags);
1723 return err;
1724}
1725
1726int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1727 struct ib_send_wr **bad_wr)
1728{
1729 struct mthca_dev *dev = to_mdev(ibqp->device);
1730 struct mthca_qp *qp = to_mqp(ibqp);
1731 void *wqe;
1732 void *prev_wqe;
1733 unsigned long flags;
1734 int err = 0;
1735 int nreq;
1736 int i;
1737 int size;
1738 int size0 = 0;
1739 u32 f0 = 0;
1740 int ind;
1741 u8 op0 = 0;
1742
1743 spin_lock_irqsave(&qp->sq.lock, flags);
1744
1745 /* XXX check that state is OK to post send */
1746
1747 ind = qp->sq.head & (qp->sq.max - 1);
1748
1749 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1750 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1751 mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1752 " %d max, %d nreq)\n", qp->qpn,
1753 qp->sq.head, qp->sq.tail,
1754 qp->sq.max, nreq);
1755 err = -ENOMEM;
1756 *bad_wr = wr;
1757 goto out;
1758 }
1759
1760 wqe = get_send_wqe(qp, ind);
1761 prev_wqe = qp->sq.last;
1762 qp->sq.last = wqe;
1763
1764 ((struct mthca_next_seg *) wqe)->flags =
1765 ((wr->send_flags & IB_SEND_SIGNALED) ?
1766 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1767 ((wr->send_flags & IB_SEND_SOLICITED) ?
1768 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
1769 cpu_to_be32(1);
1770 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1771 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1772 ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
1773
1774 wqe += sizeof (struct mthca_next_seg);
1775 size = sizeof (struct mthca_next_seg) / 16;
1776
1777 switch (qp->transport) {
1778 case UD:
1779 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
1780 to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
1781 ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
1782 cpu_to_be32(wr->wr.ud.remote_qpn);
1783 ((struct mthca_arbel_ud_seg *) wqe)->qkey =
1784 cpu_to_be32(wr->wr.ud.remote_qkey);
1785
1786 wqe += sizeof (struct mthca_arbel_ud_seg);
1787 size += sizeof (struct mthca_arbel_ud_seg) / 16;
1788 break;
1789
1790 case MLX:
1791 err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1792 wqe - sizeof (struct mthca_next_seg),
1793 wqe);
1794 if (err) {
1795 *bad_wr = wr;
1796 goto out;
1797 }
1798 wqe += sizeof (struct mthca_data_seg);
1799 size += sizeof (struct mthca_data_seg) / 16;
1800 break;
1801 }
1802
1803 if (wr->num_sge > qp->sq.max_gs) {
1804 mthca_err(dev, "too many gathers\n");
1805 err = -EINVAL;
1806 *bad_wr = wr;
1807 goto out;
1808 }
1809
1810 for (i = 0; i < wr->num_sge; ++i) {
1811 ((struct mthca_data_seg *) wqe)->byte_count =
1812 cpu_to_be32(wr->sg_list[i].length);
1813 ((struct mthca_data_seg *) wqe)->lkey =
1814 cpu_to_be32(wr->sg_list[i].lkey);
1815 ((struct mthca_data_seg *) wqe)->addr =
1816 cpu_to_be64(wr->sg_list[i].addr);
1817 wqe += sizeof (struct mthca_data_seg);
1818 size += sizeof (struct mthca_data_seg) / 16;
1819 }
1820
1821 /* Add one more inline data segment for ICRC */
1822 if (qp->transport == MLX) {
1823 ((struct mthca_data_seg *) wqe)->byte_count =
1824 cpu_to_be32((1 << 31) | 4);
1825 ((u32 *) wqe)[1] = 0;
1826 wqe += sizeof (struct mthca_data_seg);
1827 size += sizeof (struct mthca_data_seg) / 16;
1828 }
1829
1830 qp->wrid[ind + qp->rq.max] = wr->wr_id;
1831
1832 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
1833 mthca_err(dev, "opcode invalid\n");
1834 err = -EINVAL;
1835 *bad_wr = wr;
1836 goto out;
1837 }
1838
1839 if (likely(prev_wqe)) {
1840 ((struct mthca_next_seg *) prev_wqe)->nda_op =
1841 cpu_to_be32(((ind << qp->sq.wqe_shift) +
1842 qp->send_wqe_offset) |
1843 mthca_opcode[wr->opcode]);
1844 wmb();
1845 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1846 cpu_to_be32(MTHCA_NEXT_DBD | size);
1847 }
1848
1849 if (!size0) {
1850 size0 = size;
1851 op0 = mthca_opcode[wr->opcode];
1852 }
1853
1854 ++ind;
1855 if (unlikely(ind >= qp->sq.max))
1856 ind -= qp->sq.max;
1857 }
1858
1859out:
1860 if (likely(nreq)) {
1861 u32 doorbell[2];
1862
1863 doorbell[0] = cpu_to_be32((nreq << 24) |
1864 ((qp->sq.head & 0xffff) << 8) |
1865 f0 | op0);
1866 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1867
1868 qp->sq.head += nreq;
1869
1870 /*
1871 * Make sure that descriptors are written before
1872 * doorbell record.
1873 */
1874 wmb();
1875 *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
1876
1877 /*
1878 * Make sure doorbell record is written before we
1879 * write MMIO send doorbell.
1880 */
1881 wmb();
1882 mthca_write64(doorbell,
1883 dev->kar + MTHCA_SEND_DOORBELL,
1884 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1885 }
1886
1887 spin_unlock_irqrestore(&qp->sq.lock, flags);
1888 return err;
1889}
1890
1891int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1892 struct ib_recv_wr **bad_wr)
1893{
1894 struct mthca_dev *dev = to_mdev(ibqp->device);
1895 struct mthca_qp *qp = to_mqp(ibqp);
1896 unsigned long flags;
1897 int err = 0;
1898 int nreq;
1899 int ind;
1900 int i;
1901 void *wqe;
1902
1903 spin_lock_irqsave(&qp->rq.lock, flags);
1904
1905 /* XXX check that state is OK to post receive */
1906
1907 ind = qp->rq.head & (qp->rq.max - 1);
1908
1909 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1910 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1911 mthca_err(dev, "RQ %06x full (%u head, %u tail,"
1912 " %d max, %d nreq)\n", qp->qpn,
1913 qp->rq.head, qp->rq.tail,
1914 qp->rq.max, nreq);
1915 err = -ENOMEM;
1916 *bad_wr = wr;
1917 goto out;
1918 }
1919
1920 wqe = get_recv_wqe(qp, ind);
1921
1922 ((struct mthca_next_seg *) wqe)->flags = 0;
1923
1924 wqe += sizeof (struct mthca_next_seg);
1925
1926 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1927 err = -EINVAL;
1928 *bad_wr = wr;
1929 goto out;
1930 }
1931
1932 for (i = 0; i < wr->num_sge; ++i) {
1933 ((struct mthca_data_seg *) wqe)->byte_count =
1934 cpu_to_be32(wr->sg_list[i].length);
1935 ((struct mthca_data_seg *) wqe)->lkey =
1936 cpu_to_be32(wr->sg_list[i].lkey);
1937 ((struct mthca_data_seg *) wqe)->addr =
1938 cpu_to_be64(wr->sg_list[i].addr);
1939 wqe += sizeof (struct mthca_data_seg);
1940 }
1941
1942 if (i < qp->rq.max_gs) {
1943 ((struct mthca_data_seg *) wqe)->byte_count = 0;
1944 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(0x100);
1945 ((struct mthca_data_seg *) wqe)->addr = 0;
1946 }
1947
1948 qp->wrid[ind] = wr->wr_id;
1949
1950 ++ind;
1951 if (unlikely(ind >= qp->rq.max))
1952 ind -= qp->rq.max;
1953 }
1954out:
1955 if (likely(nreq)) {
1956 qp->rq.head += nreq;
1957
1958 /*
1959 * Make sure that descriptors are written before
1960 * doorbell record.
1961 */
1962 wmb();
1963 *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
1964 }
1965
1966 spin_unlock_irqrestore(&qp->rq.lock, flags);
1967 return err;
1968}
1969
1970int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
1971 int index, int *dbd, u32 *new_wqe)
1972{
1973 struct mthca_next_seg *next;
1974
1975 if (is_send)
1976 next = get_send_wqe(qp, index);
1977 else
1978 next = get_recv_wqe(qp, index);
1979
1980 if (dev->hca_type == ARBEL_NATIVE)
1981 *dbd = 1;
1982 else
1983 *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
1984 if (next->ee_nds & cpu_to_be32(0x3f))
1985 *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
1986 (next->ee_nds & cpu_to_be32(0x3f));
1987 else
1988 *new_wqe = 0;
1989
1990 return 0;
1991}
1992
1993int __devinit mthca_init_qp_table(struct mthca_dev *dev)
1994{
1995 int err;
1996 u8 status;
1997 int i;
1998
1999 spin_lock_init(&dev->qp_table.lock);
2000
2001 /*
2002 * We reserve 2 extra QPs per port for the special QPs. The
2003 * special QP for port 1 has to be even, so round up.
2004 */
2005 dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
2006 err = mthca_alloc_init(&dev->qp_table.alloc,
2007 dev->limits.num_qps,
2008 (1 << 24) - 1,
2009 dev->qp_table.sqp_start +
2010 MTHCA_MAX_PORTS * 2);
2011 if (err)
2012 return err;
2013
2014 err = mthca_array_init(&dev->qp_table.qp,
2015 dev->limits.num_qps);
2016 if (err) {
2017 mthca_alloc_cleanup(&dev->qp_table.alloc);
2018 return err;
2019 }
2020
2021 for (i = 0; i < 2; ++i) {
2022 err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
2023 dev->qp_table.sqp_start + i * 2,
2024 &status);
2025 if (err)
2026 goto err_out;
2027 if (status) {
2028 mthca_warn(dev, "CONF_SPECIAL_QP returned "
2029 "status %02x, aborting.\n",
2030 status);
2031 err = -EINVAL;
2032 goto err_out;
2033 }
2034 }
2035 return 0;
2036
2037 err_out:
2038 for (i = 0; i < 2; ++i)
2039 mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
2040
2041 mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2042 mthca_alloc_cleanup(&dev->qp_table.alloc);
2043
2044 return err;
2045}
2046
2047void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev)
2048{
2049 int i;
2050 u8 status;
2051
2052 for (i = 0; i < 2; ++i)
2053 mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
2054
2055 mthca_alloc_cleanup(&dev->qp_table.alloc);
2056}
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
new file mode 100644
index 000000000000..ce3fff7d02b7
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -0,0 +1,232 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/config.h>
36#include <linux/init.h>
37#include <linux/errno.h>
38#include <linux/pci.h>
39#include <linux/delay.h>
40
41#include "mthca_dev.h"
42#include "mthca_cmd.h"
43
44int mthca_reset(struct mthca_dev *mdev)
45{
46 int i;
47 int err = 0;
48 u32 *hca_header = NULL;
49 u32 *bridge_header = NULL;
50 struct pci_dev *bridge = NULL;
51
52#define MTHCA_RESET_OFFSET 0xf0010
53#define MTHCA_RESET_VALUE swab32(1)
54
55 /*
56 * Reset the chip. This is somewhat ugly because we have to
57 * save off the PCI header before reset and then restore it
58 * after the chip reboots. We skip config space offsets 22
59 * and 23 since those have a special meaning.
60 *
61 * To make matters worse, for Tavor (PCI-X HCA) we have to
62 * find the associated bridge device and save off its PCI
63 * header as well.
64 */
65
66 if (mdev->hca_type == TAVOR) {
67 /* Look for the bridge -- its device ID will be 2 more
68 than HCA's device ID. */
69 while ((bridge = pci_get_device(mdev->pdev->vendor,
70 mdev->pdev->device + 2,
71 bridge)) != NULL) {
72 if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
73 bridge->subordinate == mdev->pdev->bus) {
74 mthca_dbg(mdev, "Found bridge: %s (%s)\n",
75 pci_pretty_name(bridge), pci_name(bridge));
76 break;
77 }
78 }
79
80 if (!bridge) {
81 /*
82 * Didn't find a bridge for a Tavor device --
83 * assume we're in no-bridge mode and hope for
84 * the best.
85 */
86 mthca_warn(mdev, "No bridge found for %s (%s)\n",
87 pci_pretty_name(mdev->pdev), pci_name(mdev->pdev));
88 }
89
90 }
91
92 /* For Arbel do we need to save off the full 4K PCI Express header?? */
93 hca_header = kmalloc(256, GFP_KERNEL);
94 if (!hca_header) {
95 err = -ENOMEM;
96 mthca_err(mdev, "Couldn't allocate memory to save HCA "
97 "PCI header, aborting.\n");
98 goto out;
99 }
100
101 for (i = 0; i < 64; ++i) {
102 if (i == 22 || i == 23)
103 continue;
104 if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) {
105 err = -ENODEV;
106 mthca_err(mdev, "Couldn't save HCA "
107 "PCI header, aborting.\n");
108 goto out;
109 }
110 }
111
112 if (bridge) {
113 bridge_header = kmalloc(256, GFP_KERNEL);
114 if (!bridge_header) {
115 err = -ENOMEM;
116 mthca_err(mdev, "Couldn't allocate memory to save HCA "
117 "bridge PCI header, aborting.\n");
118 goto out;
119 }
120
121 for (i = 0; i < 64; ++i) {
122 if (i == 22 || i == 23)
123 continue;
124 if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) {
125 err = -ENODEV;
126 mthca_err(mdev, "Couldn't save HCA bridge "
127 "PCI header, aborting.\n");
128 goto out;
129 }
130 }
131 }
132
133 /* actually hit reset */
134 {
135 void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) +
136 MTHCA_RESET_OFFSET, 4);
137
138 if (!reset) {
139 err = -ENOMEM;
140 mthca_err(mdev, "Couldn't map HCA reset register, "
141 "aborting.\n");
142 goto out;
143 }
144
145 writel(MTHCA_RESET_VALUE, reset);
146 iounmap(reset);
147 }
148
149 /* Docs say to wait one second before accessing device */
150 msleep(1000);
151
152 /* Now wait for PCI device to start responding again */
153 {
154 u32 v;
155 int c = 0;
156
157 for (c = 0; c < 100; ++c) {
158 if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) {
159 err = -ENODEV;
160 mthca_err(mdev, "Couldn't access HCA after reset, "
161 "aborting.\n");
162 goto out;
163 }
164
165 if (v != 0xffffffff)
166 goto good;
167
168 msleep(100);
169 }
170
171 err = -ENODEV;
172 mthca_err(mdev, "PCI device did not come back after reset, "
173 "aborting.\n");
174 goto out;
175 }
176
177good:
178 /* Now restore the PCI headers */
179 if (bridge) {
180 /*
181 * Bridge control register is at 0x3e, so we'll
182 * naturally restore it last in this loop.
183 */
184 for (i = 0; i < 16; ++i) {
185 if (i * 4 == PCI_COMMAND)
186 continue;
187
188 if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) {
189 err = -ENODEV;
190 mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
191 "aborting.\n", i);
192 goto out;
193 }
194 }
195
196 if (pci_write_config_dword(bridge, PCI_COMMAND,
197 bridge_header[PCI_COMMAND / 4])) {
198 err = -ENODEV;
199 mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
200 "aborting.\n");
201 goto out;
202 }
203 }
204
205 for (i = 0; i < 16; ++i) {
206 if (i * 4 == PCI_COMMAND)
207 continue;
208
209 if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) {
210 err = -ENODEV;
211 mthca_err(mdev, "Couldn't restore HCA reg %x, "
212 "aborting.\n", i);
213 goto out;
214 }
215 }
216
217 if (pci_write_config_dword(mdev->pdev, PCI_COMMAND,
218 hca_header[PCI_COMMAND / 4])) {
219 err = -ENODEV;
220 mthca_err(mdev, "Couldn't restore HCA COMMAND, "
221 "aborting.\n");
222 goto out;
223 }
224
225out:
226 if (bridge)
227 pci_dev_put(bridge);
228 kfree(bridge_header);
229 kfree(hca_header);
230
231 return err;
232}
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
new file mode 100644
index 000000000000..1c8791ded6ff
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -0,0 +1,78 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include "mthca_dev.h"
36#include "mthca_memfree.h"
37
38int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
39{
40 uar->index = mthca_alloc(&dev->uar_table.alloc);
41 if (uar->index == -1)
42 return -ENOMEM;
43
44 uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
45
46 return 0;
47}
48
49void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
50{
51 mthca_free(&dev->uar_table.alloc, uar->index);
52}
53
54int mthca_init_uar_table(struct mthca_dev *dev)
55{
56 int ret;
57
58 ret = mthca_alloc_init(&dev->uar_table.alloc,
59 dev->limits.num_uars,
60 dev->limits.num_uars - 1,
61 dev->limits.reserved_uars);
62 if (ret)
63 return ret;
64
65 ret = mthca_init_db_tab(dev);
66 if (ret)
67 mthca_alloc_cleanup(&dev->uar_table.alloc);
68
69 return ret;
70}
71
72void mthca_cleanup_uar_table(struct mthca_dev *dev)
73{
74 mthca_cleanup_db_tab(dev);
75
76 /* XXX check if any UARs are still allocated? */
77 mthca_alloc_cleanup(&dev->uar_table.alloc);
78}
diff --git a/drivers/infiniband/include/ib_cache.h b/drivers/infiniband/include/ib_cache.h
new file mode 100644
index 000000000000..44ef6bb9b9df
--- /dev/null
+++ b/drivers/infiniband/include/ib_cache.h
@@ -0,0 +1,103 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef _IB_CACHE_H
36#define _IB_CACHE_H
37
38#include <ib_verbs.h>
39
40/**
41 * ib_get_cached_gid - Returns a cached GID table entry
42 * @device: The device to query.
43 * @port_num: The port number of the device to query.
44 * @index: The index into the cached GID table to query.
45 * @gid: The GID value found at the specified index.
46 *
47 * ib_get_cached_gid() fetches the specified GID table entry stored in
48 * the local software cache.
49 */
50int ib_get_cached_gid(struct ib_device *device,
51 u8 port_num,
52 int index,
53 union ib_gid *gid);
54
55/**
56 * ib_find_cached_gid - Returns the port number and GID table index where
57 * a specified GID value occurs.
58 * @device: The device to query.
59 * @gid: The GID value to search for.
60 * @port_num: The port number of the device where the GID value was found.
61 * @index: The index into the cached GID table where the GID was found. This
62 * parameter may be NULL.
63 *
64 * ib_find_cached_gid() searches for the specified GID value in
65 * the local software cache.
66 */
67int ib_find_cached_gid(struct ib_device *device,
68 union ib_gid *gid,
69 u8 *port_num,
70 u16 *index);
71
72/**
73 * ib_get_cached_pkey - Returns a cached PKey table entry
74 * @device: The device to query.
75 * @port_num: The port number of the device to query.
76 * @index: The index into the cached PKey table to query.
77 * @pkey: The PKey value found at the specified index.
78 *
79 * ib_get_cached_pkey() fetches the specified PKey table entry stored in
80 * the local software cache.
81 */
82int ib_get_cached_pkey(struct ib_device *device_handle,
83 u8 port_num,
84 int index,
85 u16 *pkey);
86
87/**
88 * ib_find_cached_pkey - Returns the PKey table index where a specified
89 * PKey value occurs.
90 * @device: The device to query.
91 * @port_num: The port number of the device to search for the PKey.
92 * @pkey: The PKey value to search for.
93 * @index: The index into the cached PKey table where the PKey was found.
94 *
95 * ib_find_cached_pkey() searches the specified PKey table in
96 * the local software cache.
97 */
98int ib_find_cached_pkey(struct ib_device *device,
99 u8 port_num,
100 u16 pkey,
101 u16 *index);
102
103#endif /* _IB_CACHE_H */
diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/drivers/infiniband/include/ib_fmr_pool.h
new file mode 100644
index 000000000000..e8769657cbbb
--- /dev/null
+++ b/drivers/infiniband/include/ib_fmr_pool.h
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_fmr_pool.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#if !defined(IB_FMR_POOL_H)
36#define IB_FMR_POOL_H
37
38#include <ib_verbs.h>
39
40struct ib_fmr_pool;
41
42/**
43 * struct ib_fmr_pool_param - Parameters for creating FMR pool
44 * @max_pages_per_fmr:Maximum number of pages per map request.
45 * @access:Access flags for FMRs in pool.
46 * @pool_size:Number of FMRs to allocate for pool.
47 * @dirty_watermark:Flush is triggered when @dirty_watermark dirty
48 * FMRs are present.
49 * @flush_function:Callback called when unmapped FMRs are flushed and
50 * more FMRs are possibly available for mapping
51 * @flush_arg:Context passed to user's flush function.
52 * @cache:If set, FMRs may be reused after unmapping for identical map
53 * requests.
54 */
55struct ib_fmr_pool_param {
56 int max_pages_per_fmr;
57 enum ib_access_flags access;
58 int pool_size;
59 int dirty_watermark;
60 void (*flush_function)(struct ib_fmr_pool *pool,
61 void * arg);
62 void *flush_arg;
63 unsigned cache:1;
64};
65
66struct ib_pool_fmr {
67 struct ib_fmr *fmr;
68 struct ib_fmr_pool *pool;
69 struct list_head list;
70 struct hlist_node cache_node;
71 int ref_count;
72 int remap_count;
73 u64 io_virtual_address;
74 int page_list_len;
75 u64 page_list[0];
76};
77
78struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
79 struct ib_fmr_pool_param *params);
80
81int ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
82
83int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
84
85struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
86 u64 *page_list,
87 int list_len,
88 u64 *io_virtual_address);
89
90int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
91
92#endif /* IB_FMR_POOL_H */
diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h
new file mode 100644
index 000000000000..4a6bf6763a97
--- /dev/null
+++ b/drivers/infiniband/include/ib_mad.h
@@ -0,0 +1,404 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: ib_mad.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#if !defined( IB_MAD_H )
40#define IB_MAD_H
41
42#include <ib_verbs.h>
43
44/* Management base version */
45#define IB_MGMT_BASE_VERSION 1
46
47/* Management classes */
48#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
49#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81
50#define IB_MGMT_CLASS_SUBN_ADM 0x03
51#define IB_MGMT_CLASS_PERF_MGMT 0x04
52#define IB_MGMT_CLASS_BM 0x05
53#define IB_MGMT_CLASS_DEVICE_MGMT 0x06
54#define IB_MGMT_CLASS_CM 0x07
55#define IB_MGMT_CLASS_SNMP 0x08
56#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
57#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
58
59/* Management methods */
60#define IB_MGMT_METHOD_GET 0x01
61#define IB_MGMT_METHOD_SET 0x02
62#define IB_MGMT_METHOD_GET_RESP 0x81
63#define IB_MGMT_METHOD_SEND 0x03
64#define IB_MGMT_METHOD_TRAP 0x05
65#define IB_MGMT_METHOD_REPORT 0x06
66#define IB_MGMT_METHOD_REPORT_RESP 0x86
67#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
68
69#define IB_MGMT_METHOD_RESP 0x80
70
71#define IB_MGMT_MAX_METHODS 128
72
73#define IB_QP0 0
74#define IB_QP1 __constant_htonl(1)
75#define IB_QP1_QKEY 0x80010000
76
77struct ib_grh {
78 u32 version_tclass_flow;
79 u16 paylen;
80 u8 next_hdr;
81 u8 hop_limit;
82 union ib_gid sgid;
83 union ib_gid dgid;
84} __attribute__ ((packed));
85
86struct ib_mad_hdr {
87 u8 base_version;
88 u8 mgmt_class;
89 u8 class_version;
90 u8 method;
91 u16 status;
92 u16 class_specific;
93 u64 tid;
94 u16 attr_id;
95 u16 resv;
96 u32 attr_mod;
97} __attribute__ ((packed));
98
99struct ib_rmpp_hdr {
100 u8 rmpp_version;
101 u8 rmpp_type;
102 u8 rmpp_rtime_flags;
103 u8 rmpp_status;
104 u32 seg_num;
105 u32 paylen_newwin;
106} __attribute__ ((packed));
107
108struct ib_mad {
109 struct ib_mad_hdr mad_hdr;
110 u8 data[232];
111} __attribute__ ((packed));
112
113struct ib_rmpp_mad {
114 struct ib_mad_hdr mad_hdr;
115 struct ib_rmpp_hdr rmpp_hdr;
116 u8 data[220];
117} __attribute__ ((packed));
118
119struct ib_vendor_mad {
120 struct ib_mad_hdr mad_hdr;
121 struct ib_rmpp_hdr rmpp_hdr;
122 u8 reserved;
123 u8 oui[3];
124 u8 data[216];
125} __attribute__ ((packed));
126
127struct ib_mad_agent;
128struct ib_mad_send_wc;
129struct ib_mad_recv_wc;
130
131/**
132 * ib_mad_send_handler - callback handler for a sent MAD.
133 * @mad_agent: MAD agent that sent the MAD.
134 * @mad_send_wc: Send work completion information on the sent MAD.
135 */
136typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
137 struct ib_mad_send_wc *mad_send_wc);
138
139/**
140 * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
141 * @mad_agent: MAD agent that snooped the MAD.
142 * @send_wr: Work request information on the sent MAD.
143 * @mad_send_wc: Work completion information on the sent MAD. Valid
144 * only for snooping that occurs on a send completion.
145 *
146 * Clients snooping MADs should not modify data referenced by the @send_wr
147 * or @mad_send_wc.
148 */
149typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
150 struct ib_send_wr *send_wr,
151 struct ib_mad_send_wc *mad_send_wc);
152
153/**
154 * ib_mad_recv_handler - callback handler for a received MAD.
155 * @mad_agent: MAD agent requesting the received MAD.
156 * @mad_recv_wc: Received work completion information on the received MAD.
157 *
158 * MADs received in response to a send request operation will be handed to
159 * the user after the send operation completes. All data buffers given
160 * to registered agents through this routine are owned by the receiving
161 * client, except for snooping agents. Clients snooping MADs should not
162 * modify the data referenced by @mad_recv_wc.
163 */
164typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
165 struct ib_mad_recv_wc *mad_recv_wc);
166
167/**
168 * ib_mad_agent - Used to track MAD registration with the access layer.
169 * @device: Reference to device registration is on.
170 * @qp: Reference to QP used for sending and receiving MADs.
171 * @recv_handler: Callback handler for a received MAD.
172 * @send_handler: Callback handler for a sent MAD.
173 * @snoop_handler: Callback handler for snooped sent MADs.
174 * @context: User-specified context associated with this registration.
175 * @hi_tid: Access layer assigned transaction ID for this client.
176 * Unsolicited MADs sent by this client will have the upper 32-bits
177 * of their TID set to this value.
178 * @port_num: Port number on which QP is registered
179 */
180struct ib_mad_agent {
181 struct ib_device *device;
182 struct ib_qp *qp;
183 ib_mad_recv_handler recv_handler;
184 ib_mad_send_handler send_handler;
185 ib_mad_snoop_handler snoop_handler;
186 void *context;
187 u32 hi_tid;
188 u8 port_num;
189};
190
191/**
192 * ib_mad_send_wc - MAD send completion information.
193 * @wr_id: Work request identifier associated with the send MAD request.
194 * @status: Completion status.
195 * @vendor_err: Optional vendor error information returned with a failed
196 * request.
197 */
198struct ib_mad_send_wc {
199 u64 wr_id;
200 enum ib_wc_status status;
201 u32 vendor_err;
202};
203
204/**
205 * ib_mad_recv_buf - received MAD buffer information.
206 * @list: Reference to next data buffer for a received RMPP MAD.
207 * @grh: References a data buffer containing the global route header.
208 * The data refereced by this buffer is only valid if the GRH is
209 * valid.
210 * @mad: References the start of the received MAD.
211 */
212struct ib_mad_recv_buf {
213 struct list_head list;
214 struct ib_grh *grh;
215 struct ib_mad *mad;
216};
217
218/**
219 * ib_mad_recv_wc - received MAD information.
220 * @wc: Completion information for the received data.
221 * @recv_buf: Specifies the location of the received data buffer(s).
222 * @mad_len: The length of the received MAD, without duplicated headers.
223 *
224 * For received response, the wr_id field of the wc is set to the wr_id
225 * for the corresponding send request.
226 */
227struct ib_mad_recv_wc {
228 struct ib_wc *wc;
229 struct ib_mad_recv_buf recv_buf;
230 int mad_len;
231};
232
233/**
234 * ib_mad_reg_req - MAD registration request
235 * @mgmt_class: Indicates which management class of MADs should be receive
236 * by the caller. This field is only required if the user wishes to
237 * receive unsolicited MADs, otherwise it should be 0.
238 * @mgmt_class_version: Indicates which version of MADs for the given
239 * management class to receive.
240 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
241 * in the range from 0x30 to 0x4f. Otherwise not used.
242 * @method_mask: The caller will receive unsolicited MADs for any method
243 * where @method_mask = 1.
244 */
245struct ib_mad_reg_req {
246 u8 mgmt_class;
247 u8 mgmt_class_version;
248 u8 oui[3];
249 DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
250};
251
252/**
253 * ib_register_mad_agent - Register to send/receive MADs.
254 * @device: The device to register with.
255 * @port_num: The port on the specified device to use.
256 * @qp_type: Specifies which QP to access. Must be either
257 * IB_QPT_SMI or IB_QPT_GSI.
258 * @mad_reg_req: Specifies which unsolicited MADs should be received
259 * by the caller. This parameter may be NULL if the caller only
260 * wishes to receive solicited responses.
261 * @rmpp_version: If set, indicates that the client will send
262 * and receive MADs that contain the RMPP header for the given version.
263 * If set to 0, indicates that RMPP is not used by this client.
264 * @send_handler: The completion callback routine invoked after a send
265 * request has completed.
266 * @recv_handler: The completion callback routine invoked for a received
267 * MAD.
268 * @context: User specified context associated with the registration.
269 */
270struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
271 u8 port_num,
272 enum ib_qp_type qp_type,
273 struct ib_mad_reg_req *mad_reg_req,
274 u8 rmpp_version,
275 ib_mad_send_handler send_handler,
276 ib_mad_recv_handler recv_handler,
277 void *context);
278
279enum ib_mad_snoop_flags {
280 /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
281 /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/
282 IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2),
283 /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
284 IB_MAD_SNOOP_RECVS = (1<<4)
285 /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/
286 /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/
287};
288
289/**
290 * ib_register_mad_snoop - Register to snoop sent and received MADs.
291 * @device: The device to register with.
292 * @port_num: The port on the specified device to use.
293 * @qp_type: Specifies which QP traffic to snoop. Must be either
294 * IB_QPT_SMI or IB_QPT_GSI.
295 * @mad_snoop_flags: Specifies information where snooping occurs.
296 * @send_handler: The callback routine invoked for a snooped send.
297 * @recv_handler: The callback routine invoked for a snooped receive.
298 * @context: User specified context associated with the registration.
299 */
300struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
301 u8 port_num,
302 enum ib_qp_type qp_type,
303 int mad_snoop_flags,
304 ib_mad_snoop_handler snoop_handler,
305 ib_mad_recv_handler recv_handler,
306 void *context);
307
308/**
309 * ib_unregister_mad_agent - Unregisters a client from using MAD services.
310 * @mad_agent: Corresponding MAD registration request to deregister.
311 *
312 * After invoking this routine, MAD services are no longer usable by the
313 * client on the associated QP.
314 */
315int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
316
317/**
318 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
319 * with the registered client.
320 * @mad_agent: Specifies the associated registration to post the send to.
321 * @send_wr: Specifies the information needed to send the MAD(s).
322 * @bad_send_wr: Specifies the MAD on which an error was encountered.
323 *
324 * Sent MADs are not guaranteed to complete in the order that they were posted.
325 */
326int ib_post_send_mad(struct ib_mad_agent *mad_agent,
327 struct ib_send_wr *send_wr,
328 struct ib_send_wr **bad_send_wr);
329
330/**
331 * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
332 * @mad_recv_wc: Work completion information for a received MAD.
333 * @buf: User-provided data buffer to receive the coalesced buffers. The
334 * referenced buffer should be at least the size of the mad_len specified
335 * by @mad_recv_wc.
336 *
337 * This call copies a chain of received RMPP MADs into a single data buffer,
338 * removing duplicated headers.
339 */
340void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc,
341 void *buf);
342
343/**
344 * ib_free_recv_mad - Returns data buffers used to receive a MAD to the
345 * access layer.
346 * @mad_recv_wc: Work completion information for a received MAD.
347 *
348 * Clients receiving MADs through their ib_mad_recv_handler must call this
349 * routine to return the work completion buffers to the access layer.
350 */
351void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
352
353/**
354 * ib_cancel_mad - Cancels an outstanding send MAD operation.
355 * @mad_agent: Specifies the registration associated with sent MAD.
356 * @wr_id: Indicates the work request identifier of the MAD to cancel.
357 *
358 * MADs will be returned to the user through the corresponding
359 * ib_mad_send_handler.
360 */
361void ib_cancel_mad(struct ib_mad_agent *mad_agent,
362 u64 wr_id);
363
364/**
365 * ib_redirect_mad_qp - Registers a QP for MAD services.
366 * @qp: Reference to a QP that requires MAD services.
367 * @rmpp_version: If set, indicates that the client will send
368 * and receive MADs that contain the RMPP header for the given version.
369 * If set to 0, indicates that RMPP is not used by this client.
370 * @send_handler: The completion callback routine invoked after a send
371 * request has completed.
372 * @recv_handler: The completion callback routine invoked for a received
373 * MAD.
374 * @context: User specified context associated with the registration.
375 *
376 * Use of this call allows clients to use MAD services, such as RMPP,
377 * on user-owned QPs. After calling this routine, users may send
378 * MADs on the specified QP by calling ib_mad_post_send.
379 */
380struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
381 u8 rmpp_version,
382 ib_mad_send_handler send_handler,
383 ib_mad_recv_handler recv_handler,
384 void *context);
385
386/**
387 * ib_process_mad_wc - Processes a work completion associated with a
388 * MAD sent or received on a redirected QP.
389 * @mad_agent: Specifies the registered MAD service using the redirected QP.
390 * @wc: References a work completion associated with a sent or received
391 * MAD segment.
392 *
393 * This routine is used to complete or continue processing on a MAD request.
394 * If the work completion is associated with a send operation, calling
395 * this routine is required to continue an RMPP transfer or to wait for a
396 * corresponding response, if it is a request. If the work completion is
397 * associated with a receive operation, calling this routine is required to
398 * process an inbound or outbound RMPP transfer, or to match a response MAD
399 * with its corresponding request.
400 */
401int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
402 struct ib_wc *wc);
403
404#endif /* IB_MAD_H */
diff --git a/drivers/infiniband/include/ib_pack.h b/drivers/infiniband/include/ib_pack.h
new file mode 100644
index 000000000000..fe480f3e8654
--- /dev/null
+++ b/drivers/infiniband/include/ib_pack.h
@@ -0,0 +1,245 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef IB_PACK_H
36#define IB_PACK_H
37
38#include <ib_verbs.h>
39
40enum {
41 IB_LRH_BYTES = 8,
42 IB_GRH_BYTES = 40,
43 IB_BTH_BYTES = 12,
44 IB_DETH_BYTES = 8
45};
46
47struct ib_field {
48 size_t struct_offset_bytes;
49 size_t struct_size_bytes;
50 int offset_words;
51 int offset_bits;
52 int size_bits;
53 char *field_name;
54};
55
56#define RESERVED \
57 .field_name = "reserved"
58
59/*
60 * This macro cleans up the definitions of constants for BTH opcodes.
61 * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY,
62 * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives
63 * the correct value.
64 *
65 * In short, user code should use the constants defined using the
66 * macro rather than worrying about adding together other constants.
67*/
68#define IB_OPCODE(transport, op) \
69 IB_OPCODE_ ## transport ## _ ## op = \
70 IB_OPCODE_ ## transport + IB_OPCODE_ ## op
71
72enum {
73 /* transport types -- just used to define real constants */
74 IB_OPCODE_RC = 0x00,
75 IB_OPCODE_UC = 0x20,
76 IB_OPCODE_RD = 0x40,
77 IB_OPCODE_UD = 0x60,
78
79 /* operations -- just used to define real constants */
80 IB_OPCODE_SEND_FIRST = 0x00,
81 IB_OPCODE_SEND_MIDDLE = 0x01,
82 IB_OPCODE_SEND_LAST = 0x02,
83 IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03,
84 IB_OPCODE_SEND_ONLY = 0x04,
85 IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05,
86 IB_OPCODE_RDMA_WRITE_FIRST = 0x06,
87 IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07,
88 IB_OPCODE_RDMA_WRITE_LAST = 0x08,
89 IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09,
90 IB_OPCODE_RDMA_WRITE_ONLY = 0x0a,
91 IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b,
92 IB_OPCODE_RDMA_READ_REQUEST = 0x0c,
93 IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d,
94 IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e,
95 IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f,
96 IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10,
97 IB_OPCODE_ACKNOWLEDGE = 0x11,
98 IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12,
99 IB_OPCODE_COMPARE_SWAP = 0x13,
100 IB_OPCODE_FETCH_ADD = 0x14,
101
102 /* real constants follow -- see comment about above IB_OPCODE()
103 macro for more details */
104
105 /* RC */
106 IB_OPCODE(RC, SEND_FIRST),
107 IB_OPCODE(RC, SEND_MIDDLE),
108 IB_OPCODE(RC, SEND_LAST),
109 IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
110 IB_OPCODE(RC, SEND_ONLY),
111 IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
112 IB_OPCODE(RC, RDMA_WRITE_FIRST),
113 IB_OPCODE(RC, RDMA_WRITE_MIDDLE),
114 IB_OPCODE(RC, RDMA_WRITE_LAST),
115 IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
116 IB_OPCODE(RC, RDMA_WRITE_ONLY),
117 IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
118 IB_OPCODE(RC, RDMA_READ_REQUEST),
119 IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
120 IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
121 IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
122 IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
123 IB_OPCODE(RC, ACKNOWLEDGE),
124 IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
125 IB_OPCODE(RC, COMPARE_SWAP),
126 IB_OPCODE(RC, FETCH_ADD),
127
128 /* UC */
129 IB_OPCODE(UC, SEND_FIRST),
130 IB_OPCODE(UC, SEND_MIDDLE),
131 IB_OPCODE(UC, SEND_LAST),
132 IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
133 IB_OPCODE(UC, SEND_ONLY),
134 IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
135 IB_OPCODE(UC, RDMA_WRITE_FIRST),
136 IB_OPCODE(UC, RDMA_WRITE_MIDDLE),
137 IB_OPCODE(UC, RDMA_WRITE_LAST),
138 IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
139 IB_OPCODE(UC, RDMA_WRITE_ONLY),
140 IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
141
142 /* RD */
143 IB_OPCODE(RD, SEND_FIRST),
144 IB_OPCODE(RD, SEND_MIDDLE),
145 IB_OPCODE(RD, SEND_LAST),
146 IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
147 IB_OPCODE(RD, SEND_ONLY),
148 IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
149 IB_OPCODE(RD, RDMA_WRITE_FIRST),
150 IB_OPCODE(RD, RDMA_WRITE_MIDDLE),
151 IB_OPCODE(RD, RDMA_WRITE_LAST),
152 IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
153 IB_OPCODE(RD, RDMA_WRITE_ONLY),
154 IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
155 IB_OPCODE(RD, RDMA_READ_REQUEST),
156 IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
157 IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
158 IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
159 IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
160 IB_OPCODE(RD, ACKNOWLEDGE),
161 IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
162 IB_OPCODE(RD, COMPARE_SWAP),
163 IB_OPCODE(RD, FETCH_ADD),
164
165 /* UD */
166 IB_OPCODE(UD, SEND_ONLY),
167 IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
168};
169
170enum {
171 IB_LNH_RAW = 0,
172 IB_LNH_IP = 1,
173 IB_LNH_IBA_LOCAL = 2,
174 IB_LNH_IBA_GLOBAL = 3
175};
176
177struct ib_unpacked_lrh {
178 u8 virtual_lane;
179 u8 link_version;
180 u8 service_level;
181 u8 link_next_header;
182 __be16 destination_lid;
183 __be16 packet_length;
184 __be16 source_lid;
185};
186
187struct ib_unpacked_grh {
188 u8 ip_version;
189 u8 traffic_class;
190 __be32 flow_label;
191 __be16 payload_length;
192 u8 next_header;
193 u8 hop_limit;
194 union ib_gid source_gid;
195 union ib_gid destination_gid;
196};
197
198struct ib_unpacked_bth {
199 u8 opcode;
200 u8 solicited_event;
201 u8 mig_req;
202 u8 pad_count;
203 u8 transport_header_version;
204 __be16 pkey;
205 __be32 destination_qpn;
206 u8 ack_req;
207 __be32 psn;
208};
209
210struct ib_unpacked_deth {
211 __be32 qkey;
212 __be32 source_qpn;
213};
214
215struct ib_ud_header {
216 struct ib_unpacked_lrh lrh;
217 int grh_present;
218 struct ib_unpacked_grh grh;
219 struct ib_unpacked_bth bth;
220 struct ib_unpacked_deth deth;
221 int immediate_present;
222 __be32 immediate_data;
223};
224
225void ib_pack(const struct ib_field *desc,
226 int desc_len,
227 void *structure,
228 void *buf);
229
230void ib_unpack(const struct ib_field *desc,
231 int desc_len,
232 void *buf,
233 void *structure);
234
235void ib_ud_header_init(int payload_bytes,
236 int grh_present,
237 struct ib_ud_header *header);
238
239int ib_ud_header_pack(struct ib_ud_header *header,
240 void *buf);
241
242int ib_ud_header_unpack(void *buf,
243 struct ib_ud_header *header);
244
245#endif /* IB_PACK_H */
diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h
new file mode 100644
index 000000000000..f4f747707b30
--- /dev/null
+++ b/drivers/infiniband/include/ib_sa.h
@@ -0,0 +1,308 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_sa.h 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#ifndef IB_SA_H
36#define IB_SA_H
37
38#include <linux/compiler.h>
39
40#include <ib_verbs.h>
41#include <ib_mad.h>
42
43enum {
44 IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */
45
46 IB_SA_METHOD_DELETE = 0x15
47};
48
49enum ib_sa_selector {
50 IB_SA_GTE = 0,
51 IB_SA_LTE = 1,
52 IB_SA_EQ = 2,
53 /*
54 * The meaning of "best" depends on the attribute: for
55 * example, for MTU best will return the largest available
56 * MTU, while for packet life time, best will return the
57 * smallest available life time.
58 */
59 IB_SA_BEST = 3
60};
61
62enum ib_sa_rate {
63 IB_SA_RATE_2_5_GBPS = 2,
64 IB_SA_RATE_5_GBPS = 5,
65 IB_SA_RATE_10_GBPS = 3,
66 IB_SA_RATE_20_GBPS = 6,
67 IB_SA_RATE_30_GBPS = 4,
68 IB_SA_RATE_40_GBPS = 7,
69 IB_SA_RATE_60_GBPS = 8,
70 IB_SA_RATE_80_GBPS = 9,
71 IB_SA_RATE_120_GBPS = 10
72};
73
74static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
75{
76 switch (rate) {
77 case IB_SA_RATE_2_5_GBPS: return 1;
78 case IB_SA_RATE_5_GBPS: return 2;
79 case IB_SA_RATE_10_GBPS: return 4;
80 case IB_SA_RATE_20_GBPS: return 8;
81 case IB_SA_RATE_30_GBPS: return 12;
82 case IB_SA_RATE_40_GBPS: return 16;
83 case IB_SA_RATE_60_GBPS: return 24;
84 case IB_SA_RATE_80_GBPS: return 32;
85 case IB_SA_RATE_120_GBPS: return 48;
86 default: return -1;
87 }
88}
89
90typedef u64 __bitwise ib_sa_comp_mask;
91
92#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
93
94/*
95 * Structures for SA records are named "struct ib_sa_xxx_rec." No
96 * attempt is made to pack structures to match the physical layout of
97 * SA records in SA MADs; all packing and unpacking is handled by the
98 * SA query code.
99 *
100 * For a record with structure ib_sa_xxx_rec, the naming convention
101 * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we
102 * never use different abbreviations or otherwise change the spelling
103 * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY).
104 *
105 * Reserved rows are indicated with comments to help maintainability.
106 */
107
108/* reserved: 0 */
109/* reserved: 1 */
110#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2)
111#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3)
112#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4)
113#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5)
114#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6)
115/* reserved: 7 */
116#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8)
117#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9)
118#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10)
119#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11)
120#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12)
121#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13)
122/* reserved: 14 */
123#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15)
124#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16)
125#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17)
126#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18)
127#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19)
128#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20)
129#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21)
130#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22)
131
132struct ib_sa_path_rec {
133 /* reserved */
134 /* reserved */
135 union ib_gid dgid;
136 union ib_gid sgid;
137 u16 dlid;
138 u16 slid;
139 int raw_traffic;
140 /* reserved */
141 u32 flow_label;
142 u8 hop_limit;
143 u8 traffic_class;
144 int reversible;
145 u8 numb_path;
146 u16 pkey;
147 /* reserved */
148 u8 sl;
149 u8 mtu_selector;
150 enum ib_mtu mtu;
151 u8 rate_selector;
152 u8 rate;
153 u8 packet_life_time_selector;
154 u8 packet_life_time;
155 u8 preference;
156};
157
158#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
159#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1)
160#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2)
161#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3)
162#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4)
163#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5)
164#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6)
165#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7)
166#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8)
167#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9)
168#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10)
169#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11)
170#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12)
171#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13)
172#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14)
173#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15)
174#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16)
175#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17)
176
177struct ib_sa_mcmember_rec {
178 union ib_gid mgid;
179 union ib_gid port_gid;
180 u32 qkey;
181 u16 mlid;
182 u8 mtu_selector;
183 enum ib_mtu mtu;
184 u8 traffic_class;
185 u16 pkey;
186 u8 rate_selector;
187 u8 rate;
188 u8 packet_life_time_selector;
189 u8 packet_life_time;
190 u8 sl;
191 u32 flow_label;
192 u8 hop_limit;
193 u8 scope;
194 u8 join_state;
195 int proxy_join;
196};
197
198struct ib_sa_query;
199
200void ib_sa_cancel_query(int id, struct ib_sa_query *query);
201
202int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
203 struct ib_sa_path_rec *rec,
204 ib_sa_comp_mask comp_mask,
205 int timeout_ms, int gfp_mask,
206 void (*callback)(int status,
207 struct ib_sa_path_rec *resp,
208 void *context),
209 void *context,
210 struct ib_sa_query **query);
211
212int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
213 u8 method,
214 struct ib_sa_mcmember_rec *rec,
215 ib_sa_comp_mask comp_mask,
216 int timeout_ms, int gfp_mask,
217 void (*callback)(int status,
218 struct ib_sa_mcmember_rec *resp,
219 void *context),
220 void *context,
221 struct ib_sa_query **query);
222
223/**
224 * ib_sa_mcmember_rec_set - Start an MCMember set query
225 * @device:device to send query on
226 * @port_num: port number to send query on
227 * @rec:MCMember Record to send in query
228 * @comp_mask:component mask to send in query
229 * @timeout_ms:time to wait for response
230 * @gfp_mask:GFP mask to use for internal allocations
231 * @callback:function called when query completes, times out or is
232 * canceled
233 * @context:opaque user context passed to callback
234 * @sa_query:query context, used to cancel query
235 *
236 * Send an MCMember Set query to the SA (eg to join a multicast
237 * group). The callback function will be called when the query
238 * completes (or fails); status is 0 for a successful response, -EINTR
239 * if the query is canceled, -ETIMEDOUT is the query timed out, or
240 * -EIO if an error occurred sending the query. The resp parameter of
241 * the callback is only valid if status is 0.
242 *
243 * If the return value of ib_sa_mcmember_rec_set() is negative, it is
244 * an error code. Otherwise it is a query ID that can be used to
245 * cancel the query.
246 */
247static inline int
248ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
249 struct ib_sa_mcmember_rec *rec,
250 ib_sa_comp_mask comp_mask,
251 int timeout_ms, int gfp_mask,
252 void (*callback)(int status,
253 struct ib_sa_mcmember_rec *resp,
254 void *context),
255 void *context,
256 struct ib_sa_query **query)
257{
258 return ib_sa_mcmember_rec_query(device, port_num,
259 IB_MGMT_METHOD_SET,
260 rec, comp_mask,
261 timeout_ms, gfp_mask, callback,
262 context, query);
263}
264
265/**
266 * ib_sa_mcmember_rec_delete - Start an MCMember delete query
267 * @device:device to send query on
268 * @port_num: port number to send query on
269 * @rec:MCMember Record to send in query
270 * @comp_mask:component mask to send in query
271 * @timeout_ms:time to wait for response
272 * @gfp_mask:GFP mask to use for internal allocations
273 * @callback:function called when query completes, times out or is
274 * canceled
275 * @context:opaque user context passed to callback
276 * @sa_query:query context, used to cancel query
277 *
278 * Send an MCMember Delete query to the SA (eg to leave a multicast
279 * group). The callback function will be called when the query
280 * completes (or fails); status is 0 for a successful response, -EINTR
281 * if the query is canceled, -ETIMEDOUT is the query timed out, or
282 * -EIO if an error occurred sending the query. The resp parameter of
283 * the callback is only valid if status is 0.
284 *
285 * If the return value of ib_sa_mcmember_rec_delete() is negative, it
286 * is an error code. Otherwise it is a query ID that can be used to
287 * cancel the query.
288 */
289static inline int
290ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
291 struct ib_sa_mcmember_rec *rec,
292 ib_sa_comp_mask comp_mask,
293 int timeout_ms, int gfp_mask,
294 void (*callback)(int status,
295 struct ib_sa_mcmember_rec *resp,
296 void *context),
297 void *context,
298 struct ib_sa_query **query)
299{
300 return ib_sa_mcmember_rec_query(device, port_num,
301 IB_SA_METHOD_DELETE,
302 rec, comp_mask,
303 timeout_ms, gfp_mask, callback,
304 context, query);
305}
306
307
308#endif /* IB_SA_H */
diff --git a/drivers/infiniband/include/ib_smi.h b/drivers/infiniband/include/ib_smi.h
new file mode 100644
index 000000000000..ca8216514963
--- /dev/null
+++ b/drivers/infiniband/include/ib_smi.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#if !defined( IB_SMI_H )
40#define IB_SMI_H
41
42#include <ib_mad.h>
43
44#define IB_LID_PERMISSIVE 0xFFFF
45
46#define IB_SMP_DATA_SIZE 64
47#define IB_SMP_MAX_PATH_HOPS 64
48
49struct ib_smp {
50 u8 base_version;
51 u8 mgmt_class;
52 u8 class_version;
53 u8 method;
54 u16 status;
55 u8 hop_ptr;
56 u8 hop_cnt;
57 u64 tid;
58 u16 attr_id;
59 u16 resv;
60 u32 attr_mod;
61 u64 mkey;
62 u16 dr_slid;
63 u16 dr_dlid;
64 u8 reserved[28];
65 u8 data[IB_SMP_DATA_SIZE];
66 u8 initial_path[IB_SMP_MAX_PATH_HOPS];
67 u8 return_path[IB_SMP_MAX_PATH_HOPS];
68} __attribute__ ((packed));
69
70#define IB_SMP_DIRECTION __constant_htons(0x8000)
71
72/* Subnet management attributes */
73#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002)
74#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010)
75#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011)
76#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012)
77#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014)
78#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015)
79#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016)
80#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017)
81#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018)
82#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019)
83#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A)
84#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B)
85#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020)
86#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030)
87#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031)
88#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00)
89
90static inline u8
91ib_get_smp_direction(struct ib_smp *smp)
92{
93 return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
94}
95
96#endif /* IB_SMI_H */
diff --git a/drivers/infiniband/include/ib_user_mad.h b/drivers/infiniband/include/ib_user_mad.h
new file mode 100644
index 000000000000..06ad4a6075fa
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_mad.h
@@ -0,0 +1,123 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_user_mad.h 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#ifndef IB_USER_MAD_H
36#define IB_USER_MAD_H
37
38#include <linux/types.h>
39#include <linux/ioctl.h>
40
41/*
42 * Increment this value if any changes that break userspace ABI
43 * compatibility are made.
44 */
45#define IB_USER_MAD_ABI_VERSION 2
46
47/*
48 * Make sure that all structs defined in this file remain laid out so
49 * that they pack the same way on 32-bit and 64-bit architectures (to
50 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
51 */
52
53/**
54 * ib_user_mad - MAD packet
55 * @data - Contents of MAD
56 * @id - ID of agent MAD received with/to be sent with
57 * @status - 0 on successful receive, ETIMEDOUT if no response
58 * received (transaction ID in data[] will be set to TID of original
59 * request) (ignored on send)
60 * @timeout_ms - Milliseconds to wait for response (unset on receive)
61 * @qpn - Remote QP number received from/to be sent to
62 * @qkey - Remote Q_Key to be sent with (unset on receive)
63 * @lid - Remote lid received from/to be sent to
64 * @sl - Service level received with/to be sent with
65 * @path_bits - Local path bits received with/to be sent with
66 * @grh_present - If set, GRH was received/should be sent
67 * @gid_index - Local GID index to send with (unset on receive)
68 * @hop_limit - Hop limit in GRH
69 * @traffic_class - Traffic class in GRH
70 * @gid - Remote GID in GRH
71 * @flow_label - Flow label in GRH
72 *
73 * All multi-byte quantities are stored in network (big endian) byte order.
74 */
75struct ib_user_mad {
76 __u8 data[256];
77 __u32 id;
78 __u32 status;
79 __u32 timeout_ms;
80 __u32 qpn;
81 __u32 qkey;
82 __u16 lid;
83 __u8 sl;
84 __u8 path_bits;
85 __u8 grh_present;
86 __u8 gid_index;
87 __u8 hop_limit;
88 __u8 traffic_class;
89 __u8 gid[16];
90 __u32 flow_label;
91};
92
93/**
94 * ib_user_mad_reg_req - MAD registration request
95 * @id - Set by the kernel; used to identify agent in future requests.
96 * @qpn - Queue pair number; must be 0 or 1.
97 * @method_mask - The caller will receive unsolicited MADs for any method
98 * where @method_mask = 1.
99 * @mgmt_class - Indicates which management class of MADs should be receive
100 * by the caller. This field is only required if the user wishes to
101 * receive unsolicited MADs, otherwise it should be 0.
102 * @mgmt_class_version - Indicates which version of MADs for the given
103 * management class to receive.
104 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
105 * in the range from 0x30 to 0x4f. Otherwise not used.
106 */
107struct ib_user_mad_reg_req {
108 __u32 id;
109 __u32 method_mask[4];
110 __u8 qpn;
111 __u8 mgmt_class;
112 __u8 mgmt_class_version;
113 __u8 oui[3];
114};
115
116#define IB_IOCTL_MAGIC 0x1b
117
118#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \
119 struct ib_user_mad_reg_req)
120
121#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
122
123#endif /* IB_USER_MAD_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h
new file mode 100644
index 000000000000..cf01f044a223
--- /dev/null
+++ b/drivers/infiniband/include/ib_verbs.h
@@ -0,0 +1,1252 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
37 */
38
39#if !defined(IB_VERBS_H)
40#define IB_VERBS_H
41
42#include <linux/types.h>
43#include <linux/device.h>
44#include <asm/atomic.h>
45
46union ib_gid {
47 u8 raw[16];
48 struct {
49 u64 subnet_prefix;
50 u64 interface_id;
51 } global;
52};
53
54enum ib_node_type {
55 IB_NODE_CA = 1,
56 IB_NODE_SWITCH,
57 IB_NODE_ROUTER
58};
59
60enum ib_device_cap_flags {
61 IB_DEVICE_RESIZE_MAX_WR = 1,
62 IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
63 IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
64 IB_DEVICE_RAW_MULTI = (1<<3),
65 IB_DEVICE_AUTO_PATH_MIG = (1<<4),
66 IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
67 IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
68 IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
69 IB_DEVICE_SHUTDOWN_PORT = (1<<8),
70 IB_DEVICE_INIT_TYPE = (1<<9),
71 IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
72 IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
73 IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
74 IB_DEVICE_SRQ_RESIZE = (1<<13),
75 IB_DEVICE_N_NOTIFY_CQ = (1<<14),
76};
77
78enum ib_atomic_cap {
79 IB_ATOMIC_NONE,
80 IB_ATOMIC_HCA,
81 IB_ATOMIC_GLOB
82};
83
84struct ib_device_attr {
85 u64 fw_ver;
86 u64 node_guid;
87 u64 sys_image_guid;
88 u64 max_mr_size;
89 u64 page_size_cap;
90 u32 vendor_id;
91 u32 vendor_part_id;
92 u32 hw_ver;
93 int max_qp;
94 int max_qp_wr;
95 int device_cap_flags;
96 int max_sge;
97 int max_sge_rd;
98 int max_cq;
99 int max_cqe;
100 int max_mr;
101 int max_pd;
102 int max_qp_rd_atom;
103 int max_ee_rd_atom;
104 int max_res_rd_atom;
105 int max_qp_init_rd_atom;
106 int max_ee_init_rd_atom;
107 enum ib_atomic_cap atomic_cap;
108 int max_ee;
109 int max_rdd;
110 int max_mw;
111 int max_raw_ipv6_qp;
112 int max_raw_ethy_qp;
113 int max_mcast_grp;
114 int max_mcast_qp_attach;
115 int max_total_mcast_qp_attach;
116 int max_ah;
117 int max_fmr;
118 int max_map_per_fmr;
119 int max_srq;
120 int max_srq_wr;
121 int max_srq_sge;
122 u16 max_pkeys;
123 u8 local_ca_ack_delay;
124};
125
126enum ib_mtu {
127 IB_MTU_256 = 1,
128 IB_MTU_512 = 2,
129 IB_MTU_1024 = 3,
130 IB_MTU_2048 = 4,
131 IB_MTU_4096 = 5
132};
133
134static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
135{
136 switch (mtu) {
137 case IB_MTU_256: return 256;
138 case IB_MTU_512: return 512;
139 case IB_MTU_1024: return 1024;
140 case IB_MTU_2048: return 2048;
141 case IB_MTU_4096: return 4096;
142 default: return -1;
143 }
144}
145
146enum ib_port_state {
147 IB_PORT_NOP = 0,
148 IB_PORT_DOWN = 1,
149 IB_PORT_INIT = 2,
150 IB_PORT_ARMED = 3,
151 IB_PORT_ACTIVE = 4,
152 IB_PORT_ACTIVE_DEFER = 5
153};
154
155enum ib_port_cap_flags {
156 IB_PORT_SM = 1 << 1,
157 IB_PORT_NOTICE_SUP = 1 << 2,
158 IB_PORT_TRAP_SUP = 1 << 3,
159 IB_PORT_OPT_IPD_SUP = 1 << 4,
160 IB_PORT_AUTO_MIGR_SUP = 1 << 5,
161 IB_PORT_SL_MAP_SUP = 1 << 6,
162 IB_PORT_MKEY_NVRAM = 1 << 7,
163 IB_PORT_PKEY_NVRAM = 1 << 8,
164 IB_PORT_LED_INFO_SUP = 1 << 9,
165 IB_PORT_SM_DISABLED = 1 << 10,
166 IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
167 IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
168 IB_PORT_CM_SUP = 1 << 16,
169 IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
170 IB_PORT_REINIT_SUP = 1 << 18,
171 IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
172 IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
173 IB_PORT_DR_NOTICE_SUP = 1 << 21,
174 IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
175 IB_PORT_BOOT_MGMT_SUP = 1 << 23,
176 IB_PORT_LINK_LATENCY_SUP = 1 << 24,
177 IB_PORT_CLIENT_REG_SUP = 1 << 25
178};
179
180enum ib_port_width {
181 IB_WIDTH_1X = 1,
182 IB_WIDTH_4X = 2,
183 IB_WIDTH_8X = 4,
184 IB_WIDTH_12X = 8
185};
186
187static inline int ib_width_enum_to_int(enum ib_port_width width)
188{
189 switch (width) {
190 case IB_WIDTH_1X: return 1;
191 case IB_WIDTH_4X: return 4;
192 case IB_WIDTH_8X: return 8;
193 case IB_WIDTH_12X: return 12;
194 default: return -1;
195 }
196}
197
198struct ib_port_attr {
199 enum ib_port_state state;
200 enum ib_mtu max_mtu;
201 enum ib_mtu active_mtu;
202 int gid_tbl_len;
203 u32 port_cap_flags;
204 u32 max_msg_sz;
205 u32 bad_pkey_cntr;
206 u32 qkey_viol_cntr;
207 u16 pkey_tbl_len;
208 u16 lid;
209 u16 sm_lid;
210 u8 lmc;
211 u8 max_vl_num;
212 u8 sm_sl;
213 u8 subnet_timeout;
214 u8 init_type_reply;
215 u8 active_width;
216 u8 active_speed;
217 u8 phys_state;
218};
219
220enum ib_device_modify_flags {
221 IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1
222};
223
224struct ib_device_modify {
225 u64 sys_image_guid;
226};
227
228enum ib_port_modify_flags {
229 IB_PORT_SHUTDOWN = 1,
230 IB_PORT_INIT_TYPE = (1<<2),
231 IB_PORT_RESET_QKEY_CNTR = (1<<3)
232};
233
234struct ib_port_modify {
235 u32 set_port_cap_mask;
236 u32 clr_port_cap_mask;
237 u8 init_type;
238};
239
240enum ib_event_type {
241 IB_EVENT_CQ_ERR,
242 IB_EVENT_QP_FATAL,
243 IB_EVENT_QP_REQ_ERR,
244 IB_EVENT_QP_ACCESS_ERR,
245 IB_EVENT_COMM_EST,
246 IB_EVENT_SQ_DRAINED,
247 IB_EVENT_PATH_MIG,
248 IB_EVENT_PATH_MIG_ERR,
249 IB_EVENT_DEVICE_FATAL,
250 IB_EVENT_PORT_ACTIVE,
251 IB_EVENT_PORT_ERR,
252 IB_EVENT_LID_CHANGE,
253 IB_EVENT_PKEY_CHANGE,
254 IB_EVENT_SM_CHANGE
255};
256
257struct ib_event {
258 struct ib_device *device;
259 union {
260 struct ib_cq *cq;
261 struct ib_qp *qp;
262 u8 port_num;
263 } element;
264 enum ib_event_type event;
265};
266
267struct ib_event_handler {
268 struct ib_device *device;
269 void (*handler)(struct ib_event_handler *, struct ib_event *);
270 struct list_head list;
271};
272
273#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \
274 do { \
275 (_ptr)->device = _device; \
276 (_ptr)->handler = _handler; \
277 INIT_LIST_HEAD(&(_ptr)->list); \
278 } while (0)
279
280struct ib_global_route {
281 union ib_gid dgid;
282 u32 flow_label;
283 u8 sgid_index;
284 u8 hop_limit;
285 u8 traffic_class;
286};
287
288enum {
289 IB_MULTICAST_QPN = 0xffffff
290};
291
292enum ib_ah_flags {
293 IB_AH_GRH = 1
294};
295
296struct ib_ah_attr {
297 struct ib_global_route grh;
298 u16 dlid;
299 u8 sl;
300 u8 src_path_bits;
301 u8 static_rate;
302 u8 ah_flags;
303 u8 port_num;
304};
305
306enum ib_wc_status {
307 IB_WC_SUCCESS,
308 IB_WC_LOC_LEN_ERR,
309 IB_WC_LOC_QP_OP_ERR,
310 IB_WC_LOC_EEC_OP_ERR,
311 IB_WC_LOC_PROT_ERR,
312 IB_WC_WR_FLUSH_ERR,
313 IB_WC_MW_BIND_ERR,
314 IB_WC_BAD_RESP_ERR,
315 IB_WC_LOC_ACCESS_ERR,
316 IB_WC_REM_INV_REQ_ERR,
317 IB_WC_REM_ACCESS_ERR,
318 IB_WC_REM_OP_ERR,
319 IB_WC_RETRY_EXC_ERR,
320 IB_WC_RNR_RETRY_EXC_ERR,
321 IB_WC_LOC_RDD_VIOL_ERR,
322 IB_WC_REM_INV_RD_REQ_ERR,
323 IB_WC_REM_ABORT_ERR,
324 IB_WC_INV_EECN_ERR,
325 IB_WC_INV_EEC_STATE_ERR,
326 IB_WC_FATAL_ERR,
327 IB_WC_RESP_TIMEOUT_ERR,
328 IB_WC_GENERAL_ERR
329};
330
331enum ib_wc_opcode {
332 IB_WC_SEND,
333 IB_WC_RDMA_WRITE,
334 IB_WC_RDMA_READ,
335 IB_WC_COMP_SWAP,
336 IB_WC_FETCH_ADD,
337 IB_WC_BIND_MW,
338/*
339 * Set value of IB_WC_RECV so consumers can test if a completion is a
340 * receive by testing (opcode & IB_WC_RECV).
341 */
342 IB_WC_RECV = 1 << 7,
343 IB_WC_RECV_RDMA_WITH_IMM
344};
345
346enum ib_wc_flags {
347 IB_WC_GRH = 1,
348 IB_WC_WITH_IMM = (1<<1)
349};
350
351struct ib_wc {
352 u64 wr_id;
353 enum ib_wc_status status;
354 enum ib_wc_opcode opcode;
355 u32 vendor_err;
356 u32 byte_len;
357 __be32 imm_data;
358 u32 qp_num;
359 u32 src_qp;
360 int wc_flags;
361 u16 pkey_index;
362 u16 slid;
363 u8 sl;
364 u8 dlid_path_bits;
365 u8 port_num; /* valid only for DR SMPs on switches */
366};
367
368enum ib_cq_notify {
369 IB_CQ_SOLICITED,
370 IB_CQ_NEXT_COMP
371};
372
373struct ib_qp_cap {
374 u32 max_send_wr;
375 u32 max_recv_wr;
376 u32 max_send_sge;
377 u32 max_recv_sge;
378 u32 max_inline_data;
379};
380
381enum ib_sig_type {
382 IB_SIGNAL_ALL_WR,
383 IB_SIGNAL_REQ_WR
384};
385
386enum ib_qp_type {
387 /*
388 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
389 * here (and in that order) since the MAD layer uses them as
390 * indices into a 2-entry table.
391 */
392 IB_QPT_SMI,
393 IB_QPT_GSI,
394
395 IB_QPT_RC,
396 IB_QPT_UC,
397 IB_QPT_UD,
398 IB_QPT_RAW_IPV6,
399 IB_QPT_RAW_ETY
400};
401
402struct ib_qp_init_attr {
403 void (*event_handler)(struct ib_event *, void *);
404 void *qp_context;
405 struct ib_cq *send_cq;
406 struct ib_cq *recv_cq;
407 struct ib_srq *srq;
408 struct ib_qp_cap cap;
409 enum ib_sig_type sq_sig_type;
410 enum ib_qp_type qp_type;
411 u8 port_num; /* special QP types only */
412};
413
414enum ib_rnr_timeout {
415 IB_RNR_TIMER_655_36 = 0,
416 IB_RNR_TIMER_000_01 = 1,
417 IB_RNR_TIMER_000_02 = 2,
418 IB_RNR_TIMER_000_03 = 3,
419 IB_RNR_TIMER_000_04 = 4,
420 IB_RNR_TIMER_000_06 = 5,
421 IB_RNR_TIMER_000_08 = 6,
422 IB_RNR_TIMER_000_12 = 7,
423 IB_RNR_TIMER_000_16 = 8,
424 IB_RNR_TIMER_000_24 = 9,
425 IB_RNR_TIMER_000_32 = 10,
426 IB_RNR_TIMER_000_48 = 11,
427 IB_RNR_TIMER_000_64 = 12,
428 IB_RNR_TIMER_000_96 = 13,
429 IB_RNR_TIMER_001_28 = 14,
430 IB_RNR_TIMER_001_92 = 15,
431 IB_RNR_TIMER_002_56 = 16,
432 IB_RNR_TIMER_003_84 = 17,
433 IB_RNR_TIMER_005_12 = 18,
434 IB_RNR_TIMER_007_68 = 19,
435 IB_RNR_TIMER_010_24 = 20,
436 IB_RNR_TIMER_015_36 = 21,
437 IB_RNR_TIMER_020_48 = 22,
438 IB_RNR_TIMER_030_72 = 23,
439 IB_RNR_TIMER_040_96 = 24,
440 IB_RNR_TIMER_061_44 = 25,
441 IB_RNR_TIMER_081_92 = 26,
442 IB_RNR_TIMER_122_88 = 27,
443 IB_RNR_TIMER_163_84 = 28,
444 IB_RNR_TIMER_245_76 = 29,
445 IB_RNR_TIMER_327_68 = 30,
446 IB_RNR_TIMER_491_52 = 31
447};
448
449enum ib_qp_attr_mask {
450 IB_QP_STATE = 1,
451 IB_QP_CUR_STATE = (1<<1),
452 IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2),
453 IB_QP_ACCESS_FLAGS = (1<<3),
454 IB_QP_PKEY_INDEX = (1<<4),
455 IB_QP_PORT = (1<<5),
456 IB_QP_QKEY = (1<<6),
457 IB_QP_AV = (1<<7),
458 IB_QP_PATH_MTU = (1<<8),
459 IB_QP_TIMEOUT = (1<<9),
460 IB_QP_RETRY_CNT = (1<<10),
461 IB_QP_RNR_RETRY = (1<<11),
462 IB_QP_RQ_PSN = (1<<12),
463 IB_QP_MAX_QP_RD_ATOMIC = (1<<13),
464 IB_QP_ALT_PATH = (1<<14),
465 IB_QP_MIN_RNR_TIMER = (1<<15),
466 IB_QP_SQ_PSN = (1<<16),
467 IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
468 IB_QP_PATH_MIG_STATE = (1<<18),
469 IB_QP_CAP = (1<<19),
470 IB_QP_DEST_QPN = (1<<20)
471};
472
473enum ib_qp_state {
474 IB_QPS_RESET,
475 IB_QPS_INIT,
476 IB_QPS_RTR,
477 IB_QPS_RTS,
478 IB_QPS_SQD,
479 IB_QPS_SQE,
480 IB_QPS_ERR
481};
482
483enum ib_mig_state {
484 IB_MIG_MIGRATED,
485 IB_MIG_REARM,
486 IB_MIG_ARMED
487};
488
489struct ib_qp_attr {
490 enum ib_qp_state qp_state;
491 enum ib_qp_state cur_qp_state;
492 enum ib_mtu path_mtu;
493 enum ib_mig_state path_mig_state;
494 u32 qkey;
495 u32 rq_psn;
496 u32 sq_psn;
497 u32 dest_qp_num;
498 int qp_access_flags;
499 struct ib_qp_cap cap;
500 struct ib_ah_attr ah_attr;
501 struct ib_ah_attr alt_ah_attr;
502 u16 pkey_index;
503 u16 alt_pkey_index;
504 u8 en_sqd_async_notify;
505 u8 sq_draining;
506 u8 max_rd_atomic;
507 u8 max_dest_rd_atomic;
508 u8 min_rnr_timer;
509 u8 port_num;
510 u8 timeout;
511 u8 retry_cnt;
512 u8 rnr_retry;
513 u8 alt_port_num;
514 u8 alt_timeout;
515};
516
517enum ib_wr_opcode {
518 IB_WR_RDMA_WRITE,
519 IB_WR_RDMA_WRITE_WITH_IMM,
520 IB_WR_SEND,
521 IB_WR_SEND_WITH_IMM,
522 IB_WR_RDMA_READ,
523 IB_WR_ATOMIC_CMP_AND_SWP,
524 IB_WR_ATOMIC_FETCH_AND_ADD
525};
526
527enum ib_send_flags {
528 IB_SEND_FENCE = 1,
529 IB_SEND_SIGNALED = (1<<1),
530 IB_SEND_SOLICITED = (1<<2),
531 IB_SEND_INLINE = (1<<3)
532};
533
534struct ib_sge {
535 u64 addr;
536 u32 length;
537 u32 lkey;
538};
539
540struct ib_send_wr {
541 struct ib_send_wr *next;
542 u64 wr_id;
543 struct ib_sge *sg_list;
544 int num_sge;
545 enum ib_wr_opcode opcode;
546 int send_flags;
547 u32 imm_data;
548 union {
549 struct {
550 u64 remote_addr;
551 u32 rkey;
552 } rdma;
553 struct {
554 u64 remote_addr;
555 u64 compare_add;
556 u64 swap;
557 u32 rkey;
558 } atomic;
559 struct {
560 struct ib_ah *ah;
561 struct ib_mad_hdr *mad_hdr;
562 u32 remote_qpn;
563 u32 remote_qkey;
564 int timeout_ms; /* valid for MADs only */
565 u16 pkey_index; /* valid for GSI only */
566 u8 port_num; /* valid for DR SMPs on switch only */
567 } ud;
568 } wr;
569};
570
571struct ib_recv_wr {
572 struct ib_recv_wr *next;
573 u64 wr_id;
574 struct ib_sge *sg_list;
575 int num_sge;
576};
577
578enum ib_access_flags {
579 IB_ACCESS_LOCAL_WRITE = 1,
580 IB_ACCESS_REMOTE_WRITE = (1<<1),
581 IB_ACCESS_REMOTE_READ = (1<<2),
582 IB_ACCESS_REMOTE_ATOMIC = (1<<3),
583 IB_ACCESS_MW_BIND = (1<<4)
584};
585
586struct ib_phys_buf {
587 u64 addr;
588 u64 size;
589};
590
591struct ib_mr_attr {
592 struct ib_pd *pd;
593 u64 device_virt_addr;
594 u64 size;
595 int mr_access_flags;
596 u32 lkey;
597 u32 rkey;
598};
599
600enum ib_mr_rereg_flags {
601 IB_MR_REREG_TRANS = 1,
602 IB_MR_REREG_PD = (1<<1),
603 IB_MR_REREG_ACCESS = (1<<2)
604};
605
606struct ib_mw_bind {
607 struct ib_mr *mr;
608 u64 wr_id;
609 u64 addr;
610 u32 length;
611 int send_flags;
612 int mw_access_flags;
613};
614
615struct ib_fmr_attr {
616 int max_pages;
617 int max_maps;
618 u8 page_size;
619};
620
621struct ib_pd {
622 struct ib_device *device;
623 atomic_t usecnt; /* count all resources */
624};
625
626struct ib_ah {
627 struct ib_device *device;
628 struct ib_pd *pd;
629};
630
631typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
632
633struct ib_cq {
634 struct ib_device *device;
635 ib_comp_handler comp_handler;
636 void (*event_handler)(struct ib_event *, void *);
637 void * cq_context;
638 int cqe;
639 atomic_t usecnt; /* count number of work queues */
640};
641
642struct ib_srq {
643 struct ib_device *device;
644 struct ib_pd *pd;
645 void *srq_context;
646 atomic_t usecnt;
647};
648
649struct ib_qp {
650 struct ib_device *device;
651 struct ib_pd *pd;
652 struct ib_cq *send_cq;
653 struct ib_cq *recv_cq;
654 struct ib_srq *srq;
655 void (*event_handler)(struct ib_event *, void *);
656 void *qp_context;
657 u32 qp_num;
658 enum ib_qp_type qp_type;
659};
660
661struct ib_mr {
662 struct ib_device *device;
663 struct ib_pd *pd;
664 u32 lkey;
665 u32 rkey;
666 atomic_t usecnt; /* count number of MWs */
667};
668
669struct ib_mw {
670 struct ib_device *device;
671 struct ib_pd *pd;
672 u32 rkey;
673};
674
675struct ib_fmr {
676 struct ib_device *device;
677 struct ib_pd *pd;
678 struct list_head list;
679 u32 lkey;
680 u32 rkey;
681};
682
683struct ib_mad;
684struct ib_grh;
685
686enum ib_process_mad_flags {
687 IB_MAD_IGNORE_MKEY = 1,
688 IB_MAD_IGNORE_BKEY = 2,
689 IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
690};
691
692enum ib_mad_result {
693 IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */
694 IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */
695 IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */
696 IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */
697};
698
699#define IB_DEVICE_NAME_MAX 64
700
701struct ib_cache {
702 rwlock_t lock;
703 struct ib_event_handler event_handler;
704 struct ib_pkey_cache **pkey_cache;
705 struct ib_gid_cache **gid_cache;
706};
707
708struct ib_device {
709 struct device *dma_device;
710
711 char name[IB_DEVICE_NAME_MAX];
712
713 struct list_head event_handler_list;
714 spinlock_t event_handler_lock;
715
716 struct list_head core_list;
717 struct list_head client_data_list;
718 spinlock_t client_data_lock;
719
720 struct ib_cache cache;
721
722 u32 flags;
723
724 int (*query_device)(struct ib_device *device,
725 struct ib_device_attr *device_attr);
726 int (*query_port)(struct ib_device *device,
727 u8 port_num,
728 struct ib_port_attr *port_attr);
729 int (*query_gid)(struct ib_device *device,
730 u8 port_num, int index,
731 union ib_gid *gid);
732 int (*query_pkey)(struct ib_device *device,
733 u8 port_num, u16 index, u16 *pkey);
734 int (*modify_device)(struct ib_device *device,
735 int device_modify_mask,
736 struct ib_device_modify *device_modify);
737 int (*modify_port)(struct ib_device *device,
738 u8 port_num, int port_modify_mask,
739 struct ib_port_modify *port_modify);
740 struct ib_pd * (*alloc_pd)(struct ib_device *device);
741 int (*dealloc_pd)(struct ib_pd *pd);
742 struct ib_ah * (*create_ah)(struct ib_pd *pd,
743 struct ib_ah_attr *ah_attr);
744 int (*modify_ah)(struct ib_ah *ah,
745 struct ib_ah_attr *ah_attr);
746 int (*query_ah)(struct ib_ah *ah,
747 struct ib_ah_attr *ah_attr);
748 int (*destroy_ah)(struct ib_ah *ah);
749 struct ib_qp * (*create_qp)(struct ib_pd *pd,
750 struct ib_qp_init_attr *qp_init_attr);
751 int (*modify_qp)(struct ib_qp *qp,
752 struct ib_qp_attr *qp_attr,
753 int qp_attr_mask);
754 int (*query_qp)(struct ib_qp *qp,
755 struct ib_qp_attr *qp_attr,
756 int qp_attr_mask,
757 struct ib_qp_init_attr *qp_init_attr);
758 int (*destroy_qp)(struct ib_qp *qp);
759 int (*post_send)(struct ib_qp *qp,
760 struct ib_send_wr *send_wr,
761 struct ib_send_wr **bad_send_wr);
762 int (*post_recv)(struct ib_qp *qp,
763 struct ib_recv_wr *recv_wr,
764 struct ib_recv_wr **bad_recv_wr);
765 struct ib_cq * (*create_cq)(struct ib_device *device,
766 int cqe);
767 int (*destroy_cq)(struct ib_cq *cq);
768 int (*resize_cq)(struct ib_cq *cq, int *cqe);
769 int (*poll_cq)(struct ib_cq *cq, int num_entries,
770 struct ib_wc *wc);
771 int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
772 int (*req_notify_cq)(struct ib_cq *cq,
773 enum ib_cq_notify cq_notify);
774 int (*req_ncomp_notif)(struct ib_cq *cq,
775 int wc_cnt);
776 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
777 int mr_access_flags);
778 struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
779 struct ib_phys_buf *phys_buf_array,
780 int num_phys_buf,
781 int mr_access_flags,
782 u64 *iova_start);
783 int (*query_mr)(struct ib_mr *mr,
784 struct ib_mr_attr *mr_attr);
785 int (*dereg_mr)(struct ib_mr *mr);
786 int (*rereg_phys_mr)(struct ib_mr *mr,
787 int mr_rereg_mask,
788 struct ib_pd *pd,
789 struct ib_phys_buf *phys_buf_array,
790 int num_phys_buf,
791 int mr_access_flags,
792 u64 *iova_start);
793 struct ib_mw * (*alloc_mw)(struct ib_pd *pd);
794 int (*bind_mw)(struct ib_qp *qp,
795 struct ib_mw *mw,
796 struct ib_mw_bind *mw_bind);
797 int (*dealloc_mw)(struct ib_mw *mw);
798 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
799 int mr_access_flags,
800 struct ib_fmr_attr *fmr_attr);
801 int (*map_phys_fmr)(struct ib_fmr *fmr,
802 u64 *page_list, int list_len,
803 u64 iova);
804 int (*unmap_fmr)(struct list_head *fmr_list);
805 int (*dealloc_fmr)(struct ib_fmr *fmr);
806 int (*attach_mcast)(struct ib_qp *qp,
807 union ib_gid *gid,
808 u16 lid);
809 int (*detach_mcast)(struct ib_qp *qp,
810 union ib_gid *gid,
811 u16 lid);
812 int (*process_mad)(struct ib_device *device,
813 int process_mad_flags,
814 u8 port_num,
815 struct ib_wc *in_wc,
816 struct ib_grh *in_grh,
817 struct ib_mad *in_mad,
818 struct ib_mad *out_mad);
819
820 struct class_device class_dev;
821 struct kobject ports_parent;
822 struct list_head port_list;
823
824 enum {
825 IB_DEV_UNINITIALIZED,
826 IB_DEV_REGISTERED,
827 IB_DEV_UNREGISTERED
828 } reg_state;
829
830 u8 node_type;
831 u8 phys_port_cnt;
832};
833
834struct ib_client {
835 char *name;
836 void (*add) (struct ib_device *);
837 void (*remove)(struct ib_device *);
838
839 struct list_head list;
840};
841
842struct ib_device *ib_alloc_device(size_t size);
843void ib_dealloc_device(struct ib_device *device);
844
845int ib_register_device (struct ib_device *device);
846void ib_unregister_device(struct ib_device *device);
847
848int ib_register_client (struct ib_client *client);
849void ib_unregister_client(struct ib_client *client);
850
851void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
852void ib_set_client_data(struct ib_device *device, struct ib_client *client,
853 void *data);
854
855int ib_register_event_handler (struct ib_event_handler *event_handler);
856int ib_unregister_event_handler(struct ib_event_handler *event_handler);
857void ib_dispatch_event(struct ib_event *event);
858
859int ib_query_device(struct ib_device *device,
860 struct ib_device_attr *device_attr);
861
862int ib_query_port(struct ib_device *device,
863 u8 port_num, struct ib_port_attr *port_attr);
864
865int ib_query_gid(struct ib_device *device,
866 u8 port_num, int index, union ib_gid *gid);
867
868int ib_query_pkey(struct ib_device *device,
869 u8 port_num, u16 index, u16 *pkey);
870
871int ib_modify_device(struct ib_device *device,
872 int device_modify_mask,
873 struct ib_device_modify *device_modify);
874
875int ib_modify_port(struct ib_device *device,
876 u8 port_num, int port_modify_mask,
877 struct ib_port_modify *port_modify);
878
879/**
880 * ib_alloc_pd - Allocates an unused protection domain.
881 * @device: The device on which to allocate the protection domain.
882 *
883 * A protection domain object provides an association between QPs, shared
884 * receive queues, address handles, memory regions, and memory windows.
885 */
886struct ib_pd *ib_alloc_pd(struct ib_device *device);
887
888/**
889 * ib_dealloc_pd - Deallocates a protection domain.
890 * @pd: The protection domain to deallocate.
891 */
892int ib_dealloc_pd(struct ib_pd *pd);
893
894/**
895 * ib_create_ah - Creates an address handle for the given address vector.
896 * @pd: The protection domain associated with the address handle.
897 * @ah_attr: The attributes of the address vector.
898 *
899 * The address handle is used to reference a local or global destination
900 * in all UD QP post sends.
901 */
902struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
903
904/**
905 * ib_modify_ah - Modifies the address vector associated with an address
906 * handle.
907 * @ah: The address handle to modify.
908 * @ah_attr: The new address vector attributes to associate with the
909 * address handle.
910 */
911int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
912
913/**
914 * ib_query_ah - Queries the address vector associated with an address
915 * handle.
916 * @ah: The address handle to query.
917 * @ah_attr: The address vector attributes associated with the address
918 * handle.
919 */
920int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
921
922/**
923 * ib_destroy_ah - Destroys an address handle.
924 * @ah: The address handle to destroy.
925 */
926int ib_destroy_ah(struct ib_ah *ah);
927
928/**
929 * ib_create_qp - Creates a QP associated with the specified protection
930 * domain.
931 * @pd: The protection domain associated with the QP.
932 * @qp_init_attr: A list of initial attributes required to create the QP.
933 */
934struct ib_qp *ib_create_qp(struct ib_pd *pd,
935 struct ib_qp_init_attr *qp_init_attr);
936
937/**
938 * ib_modify_qp - Modifies the attributes for the specified QP and then
939 * transitions the QP to the given state.
940 * @qp: The QP to modify.
941 * @qp_attr: On input, specifies the QP attributes to modify. On output,
942 * the current values of selected QP attributes are returned.
943 * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
944 * are being modified.
945 */
946int ib_modify_qp(struct ib_qp *qp,
947 struct ib_qp_attr *qp_attr,
948 int qp_attr_mask);
949
950/**
951 * ib_query_qp - Returns the attribute list and current values for the
952 * specified QP.
953 * @qp: The QP to query.
954 * @qp_attr: The attributes of the specified QP.
955 * @qp_attr_mask: A bit-mask used to select specific attributes to query.
956 * @qp_init_attr: Additional attributes of the selected QP.
957 *
958 * The qp_attr_mask may be used to limit the query to gathering only the
959 * selected attributes.
960 */
961int ib_query_qp(struct ib_qp *qp,
962 struct ib_qp_attr *qp_attr,
963 int qp_attr_mask,
964 struct ib_qp_init_attr *qp_init_attr);
965
966/**
967 * ib_destroy_qp - Destroys the specified QP.
968 * @qp: The QP to destroy.
969 */
970int ib_destroy_qp(struct ib_qp *qp);
971
972/**
973 * ib_post_send - Posts a list of work requests to the send queue of
974 * the specified QP.
975 * @qp: The QP to post the work request on.
976 * @send_wr: A list of work requests to post on the send queue.
977 * @bad_send_wr: On an immediate failure, this parameter will reference
978 * the work request that failed to be posted on the QP.
979 */
980static inline int ib_post_send(struct ib_qp *qp,
981 struct ib_send_wr *send_wr,
982 struct ib_send_wr **bad_send_wr)
983{
984 return qp->device->post_send(qp, send_wr, bad_send_wr);
985}
986
987/**
988 * ib_post_recv - Posts a list of work requests to the receive queue of
989 * the specified QP.
990 * @qp: The QP to post the work request on.
991 * @recv_wr: A list of work requests to post on the receive queue.
992 * @bad_recv_wr: On an immediate failure, this parameter will reference
993 * the work request that failed to be posted on the QP.
994 */
995static inline int ib_post_recv(struct ib_qp *qp,
996 struct ib_recv_wr *recv_wr,
997 struct ib_recv_wr **bad_recv_wr)
998{
999 return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
1000}
1001
1002/**
1003 * ib_create_cq - Creates a CQ on the specified device.
1004 * @device: The device on which to create the CQ.
1005 * @comp_handler: A user-specified callback that is invoked when a
1006 * completion event occurs on the CQ.
1007 * @event_handler: A user-specified callback that is invoked when an
1008 * asynchronous event not associated with a completion occurs on the CQ.
1009 * @cq_context: Context associated with the CQ returned to the user via
1010 * the associated completion and event handlers.
1011 * @cqe: The minimum size of the CQ.
1012 *
1013 * Users can examine the cq structure to determine the actual CQ size.
1014 */
1015struct ib_cq *ib_create_cq(struct ib_device *device,
1016 ib_comp_handler comp_handler,
1017 void (*event_handler)(struct ib_event *, void *),
1018 void *cq_context, int cqe);
1019
1020/**
1021 * ib_resize_cq - Modifies the capacity of the CQ.
1022 * @cq: The CQ to resize.
1023 * @cqe: The minimum size of the CQ.
1024 *
1025 * Users can examine the cq structure to determine the actual CQ size.
1026 */
1027int ib_resize_cq(struct ib_cq *cq, int cqe);
1028
1029/**
1030 * ib_destroy_cq - Destroys the specified CQ.
1031 * @cq: The CQ to destroy.
1032 */
1033int ib_destroy_cq(struct ib_cq *cq);
1034
1035/**
1036 * ib_poll_cq - poll a CQ for completion(s)
1037 * @cq:the CQ being polled
1038 * @num_entries:maximum number of completions to return
1039 * @wc:array of at least @num_entries &struct ib_wc where completions
1040 * will be returned
1041 *
1042 * Poll a CQ for (possibly multiple) completions. If the return value
1043 * is < 0, an error occurred. If the return value is >= 0, it is the
1044 * number of completions returned. If the return value is
1045 * non-negative and < num_entries, then the CQ was emptied.
1046 */
1047static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
1048 struct ib_wc *wc)
1049{
1050 return cq->device->poll_cq(cq, num_entries, wc);
1051}
1052
1053/**
1054 * ib_peek_cq - Returns the number of unreaped completions currently
1055 * on the specified CQ.
1056 * @cq: The CQ to peek.
1057 * @wc_cnt: A minimum number of unreaped completions to check for.
1058 *
1059 * If the number of unreaped completions is greater than or equal to wc_cnt,
1060 * this function returns wc_cnt, otherwise, it returns the actual number of
1061 * unreaped completions.
1062 */
1063int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
1064
1065/**
1066 * ib_req_notify_cq - Request completion notification on a CQ.
1067 * @cq: The CQ to generate an event for.
1068 * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will
1069 * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP,
1070 * notification will occur on the next completion.
1071 */
1072static inline int ib_req_notify_cq(struct ib_cq *cq,
1073 enum ib_cq_notify cq_notify)
1074{
1075 return cq->device->req_notify_cq(cq, cq_notify);
1076}
1077
1078/**
1079 * ib_req_ncomp_notif - Request completion notification when there are
1080 * at least the specified number of unreaped completions on the CQ.
1081 * @cq: The CQ to generate an event for.
1082 * @wc_cnt: The number of unreaped completions that should be on the
1083 * CQ before an event is generated.
1084 */
1085static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
1086{
1087 return cq->device->req_ncomp_notif ?
1088 cq->device->req_ncomp_notif(cq, wc_cnt) :
1089 -ENOSYS;
1090}
1091
1092/**
1093 * ib_get_dma_mr - Returns a memory region for system memory that is
1094 * usable for DMA.
1095 * @pd: The protection domain associated with the memory region.
1096 * @mr_access_flags: Specifies the memory access rights.
1097 */
1098struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
1099
1100/**
1101 * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
1102 * by an HCA.
1103 * @pd: The protection domain associated assigned to the registered region.
1104 * @phys_buf_array: Specifies a list of physical buffers to use in the
1105 * memory region.
1106 * @num_phys_buf: Specifies the size of the phys_buf_array.
1107 * @mr_access_flags: Specifies the memory access rights.
1108 * @iova_start: The offset of the region's starting I/O virtual address.
1109 */
1110struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
1111 struct ib_phys_buf *phys_buf_array,
1112 int num_phys_buf,
1113 int mr_access_flags,
1114 u64 *iova_start);
1115
1116/**
1117 * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
1118 * Conceptually, this call performs the functions deregister memory region
1119 * followed by register physical memory region. Where possible,
1120 * resources are reused instead of deallocated and reallocated.
1121 * @mr: The memory region to modify.
1122 * @mr_rereg_mask: A bit-mask used to indicate which of the following
1123 * properties of the memory region are being modified.
1124 * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
1125 * the new protection domain to associated with the memory region,
1126 * otherwise, this parameter is ignored.
1127 * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
1128 * field specifies a list of physical buffers to use in the new
1129 * translation, otherwise, this parameter is ignored.
1130 * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
1131 * field specifies the size of the phys_buf_array, otherwise, this
1132 * parameter is ignored.
1133 * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
1134 * field specifies the new memory access rights, otherwise, this
1135 * parameter is ignored.
1136 * @iova_start: The offset of the region's starting I/O virtual address.
1137 */
1138int ib_rereg_phys_mr(struct ib_mr *mr,
1139 int mr_rereg_mask,
1140 struct ib_pd *pd,
1141 struct ib_phys_buf *phys_buf_array,
1142 int num_phys_buf,
1143 int mr_access_flags,
1144 u64 *iova_start);
1145
1146/**
1147 * ib_query_mr - Retrieves information about a specific memory region.
1148 * @mr: The memory region to retrieve information about.
1149 * @mr_attr: The attributes of the specified memory region.
1150 */
1151int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
1152
1153/**
1154 * ib_dereg_mr - Deregisters a memory region and removes it from the
1155 * HCA translation table.
1156 * @mr: The memory region to deregister.
1157 */
1158int ib_dereg_mr(struct ib_mr *mr);
1159
1160/**
1161 * ib_alloc_mw - Allocates a memory window.
1162 * @pd: The protection domain associated with the memory window.
1163 */
1164struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
1165
1166/**
1167 * ib_bind_mw - Posts a work request to the send queue of the specified
1168 * QP, which binds the memory window to the given address range and
1169 * remote access attributes.
1170 * @qp: QP to post the bind work request on.
1171 * @mw: The memory window to bind.
1172 * @mw_bind: Specifies information about the memory window, including
1173 * its address range, remote access rights, and associated memory region.
1174 */
1175static inline int ib_bind_mw(struct ib_qp *qp,
1176 struct ib_mw *mw,
1177 struct ib_mw_bind *mw_bind)
1178{
1179 /* XXX reference counting in corresponding MR? */
1180 return mw->device->bind_mw ?
1181 mw->device->bind_mw(qp, mw, mw_bind) :
1182 -ENOSYS;
1183}
1184
1185/**
1186 * ib_dealloc_mw - Deallocates a memory window.
1187 * @mw: The memory window to deallocate.
1188 */
1189int ib_dealloc_mw(struct ib_mw *mw);
1190
1191/**
1192 * ib_alloc_fmr - Allocates a unmapped fast memory region.
1193 * @pd: The protection domain associated with the unmapped region.
1194 * @mr_access_flags: Specifies the memory access rights.
1195 * @fmr_attr: Attributes of the unmapped region.
1196 *
1197 * A fast memory region must be mapped before it can be used as part of
1198 * a work request.
1199 */
1200struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
1201 int mr_access_flags,
1202 struct ib_fmr_attr *fmr_attr);
1203
1204/**
1205 * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
1206 * @fmr: The fast memory region to associate with the pages.
1207 * @page_list: An array of physical pages to map to the fast memory region.
1208 * @list_len: The number of pages in page_list.
1209 * @iova: The I/O virtual address to use with the mapped region.
1210 */
1211static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
1212 u64 *page_list, int list_len,
1213 u64 iova)
1214{
1215 return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
1216}
1217
1218/**
1219 * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
1220 * @fmr_list: A linked list of fast memory regions to unmap.
1221 */
1222int ib_unmap_fmr(struct list_head *fmr_list);
1223
1224/**
1225 * ib_dealloc_fmr - Deallocates a fast memory region.
1226 * @fmr: The fast memory region to deallocate.
1227 */
1228int ib_dealloc_fmr(struct ib_fmr *fmr);
1229
1230/**
1231 * ib_attach_mcast - Attaches the specified QP to a multicast group.
1232 * @qp: QP to attach to the multicast group. The QP must be type
1233 * IB_QPT_UD.
1234 * @gid: Multicast group GID.
1235 * @lid: Multicast group LID in host byte order.
1236 *
1237 * In order to send and receive multicast packets, subnet
1238 * administration must have created the multicast group and configured
1239 * the fabric appropriately. The port associated with the specified
1240 * QP must also be a member of the multicast group.
1241 */
1242int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
1243
1244/**
1245 * ib_detach_mcast - Detaches the specified QP from a multicast group.
1246 * @qp: QP to detach from the multicast group.
1247 * @gid: Multicast group GID.
1248 * @lid: Multicast group LID in host byte order.
1249 */
1250int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
1251
1252#endif /* IB_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
new file mode 100644
index 000000000000..8d2e04cac68e
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -0,0 +1,33 @@
1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand"
3 depends on INFINIBAND && NETDEVICES && INET
4 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB
7 device as a fancy NIC.
8
9 The IPoIB protocol is defined by the IETF ipoib working
10 group: <http://www.ietf.org/html.charters/ipoib-charter.html>.
11
12config INFINIBAND_IPOIB_DEBUG
13 bool "IP-over-InfiniBand debugging"
14 depends on INFINIBAND_IPOIB
15 ---help---
16 This option causes debugging code to be compiled into the
17 IPoIB driver. The output can be turned on via the
18 debug_level and mcast_debug_level module parameters (which
19 can also be set after the driver is loaded through sysfs).
20
21 This option also creates an "ipoib_debugfs," which can be
22 mounted to expose debugging information about IB multicast
23 groups used by the IPoIB driver.
24
25config INFINIBAND_IPOIB_DEBUG_DATA
26 bool "IP-over-InfiniBand data path debugging"
27 depends on INFINIBAND_IPOIB_DEBUG
28 ---help---
29 This option compiles debugging code into the the data path
30 of the IPoIB driver. The output can be turned on via the
31 data_debug_level module parameter; however, even with output
32 turned off, this debugging code will have some performance
33 impact.
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
new file mode 100644
index 000000000000..394bc08abc6f
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -0,0 +1,11 @@
1EXTRA_CFLAGS += -Idrivers/infiniband/include
2
3obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o
4
5ib_ipoib-y := ipoib_main.o \
6 ipoib_ib.o \
7 ipoib_multicast.o \
8 ipoib_verbs.o \
9 ipoib_vlan.o
10ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
11
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
new file mode 100644
index 000000000000..04c98f54e9c4
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -0,0 +1,353 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $
33 */
34
35#ifndef _IPOIB_H
36#define _IPOIB_H
37
38#include <linux/list.h>
39#include <linux/skbuff.h>
40#include <linux/netdevice.h>
41#include <linux/workqueue.h>
42#include <linux/pci.h>
43#include <linux/config.h>
44#include <linux/kref.h>
45#include <linux/if_infiniband.h>
46
47#include <net/neighbour.h>
48
49#include <asm/atomic.h>
50#include <asm/semaphore.h>
51
52#include <ib_verbs.h>
53#include <ib_pack.h>
54#include <ib_sa.h>
55
56/* constants */
57
58enum {
59 IPOIB_PACKET_SIZE = 2048,
60 IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
61
62 IPOIB_ENCAP_LEN = 4,
63
64 IPOIB_RX_RING_SIZE = 128,
65 IPOIB_TX_RING_SIZE = 64,
66
67 IPOIB_NUM_WC = 4,
68
69 IPOIB_MAX_PATH_REC_QUEUE = 3,
70 IPOIB_MAX_MCAST_QUEUE = 3,
71
72 IPOIB_FLAG_OPER_UP = 0,
73 IPOIB_FLAG_ADMIN_UP = 1,
74 IPOIB_PKEY_ASSIGNED = 2,
75 IPOIB_PKEY_STOP = 3,
76 IPOIB_FLAG_SUBINTERFACE = 4,
77 IPOIB_MCAST_RUN = 5,
78 IPOIB_STOP_REAPER = 6,
79
80 IPOIB_MAX_BACKOFF_SECONDS = 16,
81
82 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
83 IPOIB_MCAST_FLAG_SENDONLY = 1,
84 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
85 IPOIB_MCAST_FLAG_ATTACHED = 3,
86};
87
88/* structs */
89
90struct ipoib_header {
91 u16 proto;
92 u16 reserved;
93};
94
95struct ipoib_pseudoheader {
96 u8 hwaddr[INFINIBAND_ALEN];
97};
98
99struct ipoib_mcast;
100
101struct ipoib_buf {
102 struct sk_buff *skb;
103 DECLARE_PCI_UNMAP_ADDR(mapping)
104};
105
106/*
107 * Device private locking: tx_lock protects members used in TX fast
108 * path (and we use LLTX so upper layers don't do extra locking).
109 * lock protects everything else. lock nests inside of tx_lock (ie
110 * tx_lock must be acquired first if needed).
111 */
112struct ipoib_dev_priv {
113 spinlock_t lock;
114
115 struct net_device *dev;
116
117 unsigned long flags;
118
119 struct semaphore mcast_mutex;
120 struct semaphore vlan_mutex;
121
122 struct rb_root path_tree;
123 struct list_head path_list;
124
125 struct ipoib_mcast *broadcast;
126 struct list_head multicast_list;
127 struct rb_root multicast_tree;
128
129 struct work_struct pkey_task;
130 struct work_struct mcast_task;
131 struct work_struct flush_task;
132 struct work_struct restart_task;
133 struct work_struct ah_reap_task;
134
135 struct ib_device *ca;
136 u8 port;
137 u16 pkey;
138 struct ib_pd *pd;
139 struct ib_mr *mr;
140 struct ib_cq *cq;
141 struct ib_qp *qp;
142 u32 qkey;
143
144 union ib_gid local_gid;
145 u16 local_lid;
146 u8 local_rate;
147
148 unsigned int admin_mtu;
149 unsigned int mcast_mtu;
150
151 struct ipoib_buf *rx_ring;
152
153 spinlock_t tx_lock;
154 struct ipoib_buf *tx_ring;
155 unsigned tx_head;
156 unsigned tx_tail;
157 struct ib_sge tx_sge;
158 struct ib_send_wr tx_wr;
159
160 struct ib_wc ibwc[IPOIB_NUM_WC];
161
162 struct list_head dead_ahs;
163
164 struct ib_event_handler event_handler;
165
166 struct net_device_stats stats;
167
168 struct net_device *parent;
169 struct list_head child_intfs;
170 struct list_head list;
171
172#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
173 struct list_head fs_list;
174 struct dentry *mcg_dentry;
175#endif
176};
177
178struct ipoib_ah {
179 struct net_device *dev;
180 struct ib_ah *ah;
181 struct list_head list;
182 struct kref ref;
183 unsigned last_send;
184};
185
186struct ipoib_path {
187 struct net_device *dev;
188 struct ib_sa_path_rec pathrec;
189 struct ipoib_ah *ah;
190 struct sk_buff_head queue;
191
192 struct list_head neigh_list;
193
194 int query_id;
195 struct ib_sa_query *query;
196 struct completion done;
197
198 struct rb_node rb_node;
199 struct list_head list;
200};
201
202struct ipoib_neigh {
203 struct ipoib_ah *ah;
204 struct sk_buff_head queue;
205
206 struct neighbour *neighbour;
207
208 struct list_head list;
209};
210
211static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
212{
213 return (struct ipoib_neigh **) (neigh->ha + 24 -
214 (offsetof(struct neighbour, ha) & 4));
215}
216
217extern struct workqueue_struct *ipoib_workqueue;
218
219/* functions */
220
221void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
222
223struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
224 struct ib_pd *pd, struct ib_ah_attr *attr);
225void ipoib_free_ah(struct kref *kref);
226static inline void ipoib_put_ah(struct ipoib_ah *ah)
227{
228 kref_put(&ah->ref, ipoib_free_ah);
229}
230
231int ipoib_add_pkey_attr(struct net_device *dev);
232
233void ipoib_send(struct net_device *dev, struct sk_buff *skb,
234 struct ipoib_ah *address, u32 qpn);
235void ipoib_reap_ah(void *dev_ptr);
236
237void ipoib_flush_paths(struct net_device *dev);
238struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
239
240int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
241void ipoib_ib_dev_flush(void *dev);
242void ipoib_ib_dev_cleanup(struct net_device *dev);
243
244int ipoib_ib_dev_open(struct net_device *dev);
245int ipoib_ib_dev_up(struct net_device *dev);
246int ipoib_ib_dev_down(struct net_device *dev);
247int ipoib_ib_dev_stop(struct net_device *dev);
248
249int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
250void ipoib_dev_cleanup(struct net_device *dev);
251
252void ipoib_mcast_join_task(void *dev_ptr);
253void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
254 struct sk_buff *skb);
255
256void ipoib_mcast_restart_task(void *dev_ptr);
257int ipoib_mcast_start_thread(struct net_device *dev);
258int ipoib_mcast_stop_thread(struct net_device *dev);
259
260void ipoib_mcast_dev_down(struct net_device *dev);
261void ipoib_mcast_dev_flush(struct net_device *dev);
262
263struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
264void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter);
265int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
266void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
267 union ib_gid *gid,
268 unsigned long *created,
269 unsigned int *queuelen,
270 unsigned int *complete,
271 unsigned int *send_only);
272
273int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
274 union ib_gid *mgid);
275int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
276 union ib_gid *mgid);
277
278int ipoib_qp_create(struct net_device *dev);
279int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
280void ipoib_transport_dev_cleanup(struct net_device *dev);
281
282void ipoib_event(struct ib_event_handler *handler,
283 struct ib_event *record);
284
285int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
286int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
287
288void ipoib_pkey_poll(void *dev);
289int ipoib_pkey_dev_delay_open(struct net_device *dev);
290
291#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
292int ipoib_create_debug_file(struct net_device *dev);
293void ipoib_delete_debug_file(struct net_device *dev);
294int ipoib_register_debugfs(void);
295void ipoib_unregister_debugfs(void);
296#else
297static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; }
298static inline void ipoib_delete_debug_file(struct net_device *dev) { }
299static inline int ipoib_register_debugfs(void) { return 0; }
300static inline void ipoib_unregister_debugfs(void) { }
301#endif
302
303
304#define ipoib_printk(level, priv, format, arg...) \
305 printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
306#define ipoib_warn(priv, format, arg...) \
307 ipoib_printk(KERN_WARNING, priv, format , ## arg)
308
309
310#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
311extern int ipoib_debug_level;
312
313#define ipoib_dbg(priv, format, arg...) \
314 do { \
315 if (ipoib_debug_level > 0) \
316 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
317 } while (0)
318#define ipoib_dbg_mcast(priv, format, arg...) \
319 do { \
320 if (mcast_debug_level > 0) \
321 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
322 } while (0)
323#else /* CONFIG_INFINIBAND_IPOIB_DEBUG */
324#define ipoib_dbg(priv, format, arg...) \
325 do { (void) (priv); } while (0)
326#define ipoib_dbg_mcast(priv, format, arg...) \
327 do { (void) (priv); } while (0)
328#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
329
330#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
331#define ipoib_dbg_data(priv, format, arg...) \
332 do { \
333 if (data_debug_level > 0) \
334 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
335 } while (0)
336#else /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
337#define ipoib_dbg_data(priv, format, arg...) \
338 do { (void) (priv); } while (0)
339#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
340
341
342#define IPOIB_GID_FMT "%x:%x:%x:%x:%x:%x:%x:%x"
343
344#define IPOIB_GID_ARG(gid) be16_to_cpup((__be16 *) ((gid).raw + 0)), \
345 be16_to_cpup((__be16 *) ((gid).raw + 2)), \
346 be16_to_cpup((__be16 *) ((gid).raw + 4)), \
347 be16_to_cpup((__be16 *) ((gid).raw + 6)), \
348 be16_to_cpup((__be16 *) ((gid).raw + 8)), \
349 be16_to_cpup((__be16 *) ((gid).raw + 10)), \
350 be16_to_cpup((__be16 *) ((gid).raw + 12)), \
351 be16_to_cpup((__be16 *) ((gid).raw + 14))
352
353#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
new file mode 100644
index 000000000000..044f2c78ef15
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -0,0 +1,287 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $
33 */
34
35#include <linux/pagemap.h>
36#include <linux/seq_file.h>
37
38#include "ipoib.h"
39
40enum {
41 IPOIB_MAGIC = 0x49504942 /* "IPIB" */
42};
43
44static DECLARE_MUTEX(ipoib_fs_mutex);
45static struct dentry *ipoib_root;
46static struct super_block *ipoib_sb;
47static LIST_HEAD(ipoib_device_list);
48
49static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos)
50{
51 struct ipoib_mcast_iter *iter;
52 loff_t n = *pos;
53
54 iter = ipoib_mcast_iter_init(file->private);
55 if (!iter)
56 return NULL;
57
58 while (n--) {
59 if (ipoib_mcast_iter_next(iter)) {
60 ipoib_mcast_iter_free(iter);
61 return NULL;
62 }
63 }
64
65 return iter;
66}
67
68static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr,
69 loff_t *pos)
70{
71 struct ipoib_mcast_iter *iter = iter_ptr;
72
73 (*pos)++;
74
75 if (ipoib_mcast_iter_next(iter)) {
76 ipoib_mcast_iter_free(iter);
77 return NULL;
78 }
79
80 return iter;
81}
82
83static void ipoib_mcg_seq_stop(struct seq_file *file, void *iter_ptr)
84{
85 /* nothing for now */
86}
87
88static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
89{
90 struct ipoib_mcast_iter *iter = iter_ptr;
91 char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
92 union ib_gid mgid;
93 int i, n;
94 unsigned long created;
95 unsigned int queuelen, complete, send_only;
96
97 if (iter) {
98 ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
99 &complete, &send_only);
100
101 for (n = 0, i = 0; i < sizeof mgid / 2; ++i) {
102 n += sprintf(gid_buf + n, "%x",
103 be16_to_cpu(((u16 *)mgid.raw)[i]));
104 if (i < sizeof mgid / 2 - 1)
105 gid_buf[n++] = ':';
106 }
107 }
108
109 seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf);
110
111 seq_printf(file,
112 " created: %10ld queuelen: %4d complete: %d send_only: %d\n",
113 created, queuelen, complete, send_only);
114
115 return 0;
116}
117
118static struct seq_operations ipoib_seq_ops = {
119 .start = ipoib_mcg_seq_start,
120 .next = ipoib_mcg_seq_next,
121 .stop = ipoib_mcg_seq_stop,
122 .show = ipoib_mcg_seq_show,
123};
124
125static int ipoib_mcg_open(struct inode *inode, struct file *file)
126{
127 struct seq_file *seq;
128 int ret;
129
130 ret = seq_open(file, &ipoib_seq_ops);
131 if (ret)
132 return ret;
133
134 seq = file->private_data;
135 seq->private = inode->u.generic_ip;
136
137 return 0;
138}
139
140static struct file_operations ipoib_fops = {
141 .owner = THIS_MODULE,
142 .open = ipoib_mcg_open,
143 .read = seq_read,
144 .llseek = seq_lseek,
145 .release = seq_release
146};
147
148static struct inode *ipoib_get_inode(void)
149{
150 struct inode *inode = new_inode(ipoib_sb);
151
152 if (inode) {
153 inode->i_mode = S_IFREG | S_IRUGO;
154 inode->i_uid = 0;
155 inode->i_gid = 0;
156 inode->i_blksize = PAGE_CACHE_SIZE;
157 inode->i_blocks = 0;
158 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
159 inode->i_fop = &ipoib_fops;
160 }
161
162 return inode;
163}
164
165static int __ipoib_create_debug_file(struct net_device *dev)
166{
167 struct ipoib_dev_priv *priv = netdev_priv(dev);
168 struct dentry *dentry;
169 struct inode *inode;
170 char name[IFNAMSIZ + sizeof "_mcg"];
171
172 snprintf(name, sizeof name, "%s_mcg", dev->name);
173
174 dentry = d_alloc_name(ipoib_root, name);
175 if (!dentry)
176 return -ENOMEM;
177
178 inode = ipoib_get_inode();
179 if (!inode) {
180 dput(dentry);
181 return -ENOMEM;
182 }
183
184 inode->u.generic_ip = dev;
185 priv->mcg_dentry = dentry;
186
187 d_add(dentry, inode);
188
189 return 0;
190}
191
192int ipoib_create_debug_file(struct net_device *dev)
193{
194 struct ipoib_dev_priv *priv = netdev_priv(dev);
195
196 down(&ipoib_fs_mutex);
197
198 list_add_tail(&priv->fs_list, &ipoib_device_list);
199
200 if (!ipoib_sb) {
201 up(&ipoib_fs_mutex);
202 return 0;
203 }
204
205 up(&ipoib_fs_mutex);
206
207 return __ipoib_create_debug_file(dev);
208}
209
210void ipoib_delete_debug_file(struct net_device *dev)
211{
212 struct ipoib_dev_priv *priv = netdev_priv(dev);
213
214 down(&ipoib_fs_mutex);
215 list_del(&priv->fs_list);
216 if (!ipoib_sb) {
217 up(&ipoib_fs_mutex);
218 return;
219 }
220 up(&ipoib_fs_mutex);
221
222 if (priv->mcg_dentry) {
223 d_drop(priv->mcg_dentry);
224 simple_unlink(ipoib_root->d_inode, priv->mcg_dentry);
225 }
226}
227
228static int ipoib_fill_super(struct super_block *sb, void *data, int silent)
229{
230 static struct tree_descr ipoib_files[] = {
231 { "" }
232 };
233 struct ipoib_dev_priv *priv;
234 int ret;
235
236 ret = simple_fill_super(sb, IPOIB_MAGIC, ipoib_files);
237 if (ret)
238 return ret;
239
240 ipoib_root = sb->s_root;
241
242 down(&ipoib_fs_mutex);
243
244 ipoib_sb = sb;
245
246 list_for_each_entry(priv, &ipoib_device_list, fs_list) {
247 ret = __ipoib_create_debug_file(priv->dev);
248 if (ret)
249 break;
250 }
251
252 up(&ipoib_fs_mutex);
253
254 return ret;
255}
256
257static struct super_block *ipoib_get_sb(struct file_system_type *fs_type,
258 int flags, const char *dev_name, void *data)
259{
260 return get_sb_single(fs_type, flags, data, ipoib_fill_super);
261}
262
263static void ipoib_kill_sb(struct super_block *sb)
264{
265 down(&ipoib_fs_mutex);
266 ipoib_sb = NULL;
267 up(&ipoib_fs_mutex);
268
269 kill_litter_super(sb);
270}
271
272static struct file_system_type ipoib_fs_type = {
273 .owner = THIS_MODULE,
274 .name = "ipoib_debugfs",
275 .get_sb = ipoib_get_sb,
276 .kill_sb = ipoib_kill_sb,
277};
278
279int ipoib_register_debugfs(void)
280{
281 return register_filesystem(&ipoib_fs_type);
282}
283
284void ipoib_unregister_debugfs(void)
285{
286 unregister_filesystem(&ipoib_fs_type);
287}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
new file mode 100644
index 000000000000..c5a1d45e0ac5
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -0,0 +1,668 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
33 */
34
35#include <linux/delay.h>
36#include <linux/dma-mapping.h>
37
38#include <ib_cache.h>
39
40#include "ipoib.h"
41
42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
43static int data_debug_level;
44
45module_param(data_debug_level, int, 0644);
46MODULE_PARM_DESC(data_debug_level,
47 "Enable data path debug tracing if > 0");
48#endif
49
50#define IPOIB_OP_RECV (1ul << 31)
51
52static DECLARE_MUTEX(pkey_sem);
53
54struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
55 struct ib_pd *pd, struct ib_ah_attr *attr)
56{
57 struct ipoib_ah *ah;
58
59 ah = kmalloc(sizeof *ah, GFP_KERNEL);
60 if (!ah)
61 return NULL;
62
63 ah->dev = dev;
64 ah->last_send = 0;
65 kref_init(&ah->ref);
66
67 ah->ah = ib_create_ah(pd, attr);
68 if (IS_ERR(ah->ah)) {
69 kfree(ah);
70 ah = NULL;
71 } else
72 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
73
74 return ah;
75}
76
77void ipoib_free_ah(struct kref *kref)
78{
79 struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
80 struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
81
82 unsigned long flags;
83
84 if (ah->last_send <= priv->tx_tail) {
85 ipoib_dbg(priv, "Freeing ah %p\n", ah->ah);
86 ib_destroy_ah(ah->ah);
87 kfree(ah);
88 } else {
89 spin_lock_irqsave(&priv->lock, flags);
90 list_add_tail(&ah->list, &priv->dead_ahs);
91 spin_unlock_irqrestore(&priv->lock, flags);
92 }
93}
94
95static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
96 unsigned int wr_id,
97 dma_addr_t addr)
98{
99 struct ib_sge list = {
100 .addr = addr,
101 .length = IPOIB_BUF_SIZE,
102 .lkey = priv->mr->lkey,
103 };
104 struct ib_recv_wr param = {
105 .wr_id = wr_id | IPOIB_OP_RECV,
106 .sg_list = &list,
107 .num_sge = 1,
108 };
109 struct ib_recv_wr *bad_wr;
110
111 return ib_post_recv(priv->qp, &param, &bad_wr);
112}
113
114static int ipoib_ib_post_receive(struct net_device *dev, int id)
115{
116 struct ipoib_dev_priv *priv = netdev_priv(dev);
117 struct sk_buff *skb;
118 dma_addr_t addr;
119 int ret;
120
121 skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
122 if (!skb) {
123 ipoib_warn(priv, "failed to allocate receive buffer\n");
124
125 priv->rx_ring[id].skb = NULL;
126 return -ENOMEM;
127 }
128 skb_reserve(skb, 4); /* 16 byte align IP header */
129 priv->rx_ring[id].skb = skb;
130 addr = dma_map_single(priv->ca->dma_device,
131 skb->data, IPOIB_BUF_SIZE,
132 DMA_FROM_DEVICE);
133 pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
134
135 ret = ipoib_ib_receive(priv, id, addr);
136 if (ret) {
137 ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
138 id, ret);
139 dma_unmap_single(priv->ca->dma_device, addr,
140 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
141 dev_kfree_skb_any(skb);
142 priv->rx_ring[id].skb = NULL;
143 }
144
145 return ret;
146}
147
148static int ipoib_ib_post_receives(struct net_device *dev)
149{
150 struct ipoib_dev_priv *priv = netdev_priv(dev);
151 int i;
152
153 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
154 if (ipoib_ib_post_receive(dev, i)) {
155 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
156 return -EIO;
157 }
158 }
159
160 return 0;
161}
162
163static void ipoib_ib_handle_wc(struct net_device *dev,
164 struct ib_wc *wc)
165{
166 struct ipoib_dev_priv *priv = netdev_priv(dev);
167 unsigned int wr_id = wc->wr_id;
168
169 ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
170 wr_id, wc->opcode, wc->status);
171
172 if (wr_id & IPOIB_OP_RECV) {
173 wr_id &= ~IPOIB_OP_RECV;
174
175 if (wr_id < IPOIB_RX_RING_SIZE) {
176 struct sk_buff *skb = priv->rx_ring[wr_id].skb;
177
178 priv->rx_ring[wr_id].skb = NULL;
179
180 dma_unmap_single(priv->ca->dma_device,
181 pci_unmap_addr(&priv->rx_ring[wr_id],
182 mapping),
183 IPOIB_BUF_SIZE,
184 DMA_FROM_DEVICE);
185
186 if (wc->status != IB_WC_SUCCESS) {
187 if (wc->status != IB_WC_WR_FLUSH_ERR)
188 ipoib_warn(priv, "failed recv event "
189 "(status=%d, wrid=%d vend_err %x)\n",
190 wc->status, wr_id, wc->vendor_err);
191 dev_kfree_skb_any(skb);
192 return;
193 }
194
195 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
196 wc->byte_len, wc->slid);
197
198 skb_put(skb, wc->byte_len);
199 skb_pull(skb, IB_GRH_BYTES);
200
201 if (wc->slid != priv->local_lid ||
202 wc->src_qp != priv->qp->qp_num) {
203 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
204
205 skb_pull(skb, IPOIB_ENCAP_LEN);
206
207 dev->last_rx = jiffies;
208 ++priv->stats.rx_packets;
209 priv->stats.rx_bytes += skb->len;
210
211 skb->dev = dev;
212 /* XXX get correct PACKET_ type here */
213 skb->pkt_type = PACKET_HOST;
214 netif_rx_ni(skb);
215 } else {
216 ipoib_dbg_data(priv, "dropping loopback packet\n");
217 dev_kfree_skb_any(skb);
218 }
219
220 /* repost receive */
221 if (ipoib_ib_post_receive(dev, wr_id))
222 ipoib_warn(priv, "ipoib_ib_post_receive failed "
223 "for buf %d\n", wr_id);
224 } else
225 ipoib_warn(priv, "completion event with wrid %d\n",
226 wr_id);
227
228 } else {
229 struct ipoib_buf *tx_req;
230 unsigned long flags;
231
232 if (wr_id >= IPOIB_TX_RING_SIZE) {
233 ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
234 wr_id, IPOIB_TX_RING_SIZE);
235 return;
236 }
237
238 ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
239
240 tx_req = &priv->tx_ring[wr_id];
241
242 dma_unmap_single(priv->ca->dma_device,
243 pci_unmap_addr(tx_req, mapping),
244 tx_req->skb->len,
245 DMA_TO_DEVICE);
246
247 ++priv->stats.tx_packets;
248 priv->stats.tx_bytes += tx_req->skb->len;
249
250 dev_kfree_skb_any(tx_req->skb);
251
252 spin_lock_irqsave(&priv->tx_lock, flags);
253 ++priv->tx_tail;
254 if (netif_queue_stopped(dev) &&
255 priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
256 netif_wake_queue(dev);
257 spin_unlock_irqrestore(&priv->tx_lock, flags);
258
259 if (wc->status != IB_WC_SUCCESS &&
260 wc->status != IB_WC_WR_FLUSH_ERR)
261 ipoib_warn(priv, "failed send event "
262 "(status=%d, wrid=%d vend_err %x)\n",
263 wc->status, wr_id, wc->vendor_err);
264 }
265}
266
267void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
268{
269 struct net_device *dev = (struct net_device *) dev_ptr;
270 struct ipoib_dev_priv *priv = netdev_priv(dev);
271 int n, i;
272
273 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
274 do {
275 n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
276 for (i = 0; i < n; ++i)
277 ipoib_ib_handle_wc(dev, priv->ibwc + i);
278 } while (n == IPOIB_NUM_WC);
279}
280
281static inline int post_send(struct ipoib_dev_priv *priv,
282 unsigned int wr_id,
283 struct ib_ah *address, u32 qpn,
284 dma_addr_t addr, int len)
285{
286 struct ib_send_wr *bad_wr;
287
288 priv->tx_sge.addr = addr;
289 priv->tx_sge.length = len;
290
291 priv->tx_wr.wr_id = wr_id;
292 priv->tx_wr.wr.ud.remote_qpn = qpn;
293 priv->tx_wr.wr.ud.ah = address;
294
295 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
296}
297
298void ipoib_send(struct net_device *dev, struct sk_buff *skb,
299 struct ipoib_ah *address, u32 qpn)
300{
301 struct ipoib_dev_priv *priv = netdev_priv(dev);
302 struct ipoib_buf *tx_req;
303 dma_addr_t addr;
304
305 if (skb->len > dev->mtu + INFINIBAND_ALEN) {
306 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
307 skb->len, dev->mtu + INFINIBAND_ALEN);
308 ++priv->stats.tx_dropped;
309 ++priv->stats.tx_errors;
310 dev_kfree_skb_any(skb);
311 return;
312 }
313
314 ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
315 skb->len, address, qpn);
316
317 /*
318 * We put the skb into the tx_ring _before_ we call post_send()
319 * because it's entirely possible that the completion handler will
320 * run before we execute anything after the post_send(). That
321 * means we have to make sure everything is properly recorded and
322 * our state is consistent before we call post_send().
323 */
324 tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)];
325 tx_req->skb = skb;
326 addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
327 DMA_TO_DEVICE);
328 pci_unmap_addr_set(tx_req, mapping, addr);
329
330 if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
331 address->ah, qpn, addr, skb->len))) {
332 ipoib_warn(priv, "post_send failed\n");
333 ++priv->stats.tx_errors;
334 dma_unmap_single(priv->ca->dma_device, addr, skb->len,
335 DMA_TO_DEVICE);
336 dev_kfree_skb_any(skb);
337 } else {
338 dev->trans_start = jiffies;
339
340 address->last_send = priv->tx_head;
341 ++priv->tx_head;
342
343 if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
344 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
345 netif_stop_queue(dev);
346 }
347 }
348}
349
350static void __ipoib_reap_ah(struct net_device *dev)
351{
352 struct ipoib_dev_priv *priv = netdev_priv(dev);
353 struct ipoib_ah *ah, *tah;
354 LIST_HEAD(remove_list);
355
356 spin_lock_irq(&priv->lock);
357 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
358 if (ah->last_send <= priv->tx_tail) {
359 list_del(&ah->list);
360 list_add_tail(&ah->list, &remove_list);
361 }
362 spin_unlock_irq(&priv->lock);
363
364 list_for_each_entry_safe(ah, tah, &remove_list, list) {
365 ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
366 ib_destroy_ah(ah->ah);
367 kfree(ah);
368 }
369}
370
371void ipoib_reap_ah(void *dev_ptr)
372{
373 struct net_device *dev = dev_ptr;
374 struct ipoib_dev_priv *priv = netdev_priv(dev);
375
376 __ipoib_reap_ah(dev);
377
378 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
379 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
380}
381
382int ipoib_ib_dev_open(struct net_device *dev)
383{
384 struct ipoib_dev_priv *priv = netdev_priv(dev);
385 int ret;
386
387 ret = ipoib_qp_create(dev);
388 if (ret) {
389 ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret);
390 return -1;
391 }
392
393 ret = ipoib_ib_post_receives(dev);
394 if (ret) {
395 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
396 return -1;
397 }
398
399 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
400 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
401
402 return 0;
403}
404
405int ipoib_ib_dev_up(struct net_device *dev)
406{
407 struct ipoib_dev_priv *priv = netdev_priv(dev);
408
409 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
410
411 return ipoib_mcast_start_thread(dev);
412}
413
414int ipoib_ib_dev_down(struct net_device *dev)
415{
416 struct ipoib_dev_priv *priv = netdev_priv(dev);
417
418 ipoib_dbg(priv, "downing ib_dev\n");
419
420 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
421 netif_carrier_off(dev);
422
423 /* Shutdown the P_Key thread if still active */
424 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
425 down(&pkey_sem);
426 set_bit(IPOIB_PKEY_STOP, &priv->flags);
427 cancel_delayed_work(&priv->pkey_task);
428 up(&pkey_sem);
429 flush_workqueue(ipoib_workqueue);
430 }
431
432 ipoib_mcast_stop_thread(dev);
433
434 /*
435 * Flush the multicast groups first so we stop any multicast joins. The
436 * completion thread may have already died and we may deadlock waiting
437 * for the completion thread to finish some multicast joins.
438 */
439 ipoib_mcast_dev_flush(dev);
440
441 /* Delete broadcast and local addresses since they will be recreated */
442 ipoib_mcast_dev_down(dev);
443
444 ipoib_flush_paths(dev);
445
446 return 0;
447}
448
449static int recvs_pending(struct net_device *dev)
450{
451 struct ipoib_dev_priv *priv = netdev_priv(dev);
452 int pending = 0;
453 int i;
454
455 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
456 if (priv->rx_ring[i].skb)
457 ++pending;
458
459 return pending;
460}
461
462int ipoib_ib_dev_stop(struct net_device *dev)
463{
464 struct ipoib_dev_priv *priv = netdev_priv(dev);
465 struct ib_qp_attr qp_attr;
466 int attr_mask;
467 unsigned long begin;
468 struct ipoib_buf *tx_req;
469 int i;
470
471 /* Kill the existing QP and allocate a new one */
472 qp_attr.qp_state = IB_QPS_ERR;
473 attr_mask = IB_QP_STATE;
474 if (ib_modify_qp(priv->qp, &qp_attr, attr_mask))
475 ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
476
477 /* Wait for all sends and receives to complete */
478 begin = jiffies;
479
480 while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
481 if (time_after(jiffies, begin + 5 * HZ)) {
482 ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
483 priv->tx_head - priv->tx_tail, recvs_pending(dev));
484
485 /*
486 * assume the HW is wedged and just free up
487 * all our pending work requests.
488 */
489 while (priv->tx_tail < priv->tx_head) {
490 tx_req = &priv->tx_ring[priv->tx_tail &
491 (IPOIB_TX_RING_SIZE - 1)];
492 dma_unmap_single(priv->ca->dma_device,
493 pci_unmap_addr(tx_req, mapping),
494 tx_req->skb->len,
495 DMA_TO_DEVICE);
496 dev_kfree_skb_any(tx_req->skb);
497 ++priv->tx_tail;
498 }
499
500 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
501 if (priv->rx_ring[i].skb) {
502 dma_unmap_single(priv->ca->dma_device,
503 pci_unmap_addr(&priv->rx_ring[i],
504 mapping),
505 IPOIB_BUF_SIZE,
506 DMA_FROM_DEVICE);
507 dev_kfree_skb_any(priv->rx_ring[i].skb);
508 priv->rx_ring[i].skb = NULL;
509 }
510
511 goto timeout;
512 }
513
514 msleep(1);
515 }
516
517 ipoib_dbg(priv, "All sends and receives done.\n");
518
519timeout:
520 qp_attr.qp_state = IB_QPS_RESET;
521 attr_mask = IB_QP_STATE;
522 if (ib_modify_qp(priv->qp, &qp_attr, attr_mask))
523 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
524
525 /* Wait for all AHs to be reaped */
526 set_bit(IPOIB_STOP_REAPER, &priv->flags);
527 cancel_delayed_work(&priv->ah_reap_task);
528 flush_workqueue(ipoib_workqueue);
529
530 begin = jiffies;
531
532 while (!list_empty(&priv->dead_ahs)) {
533 __ipoib_reap_ah(dev);
534
535 if (time_after(jiffies, begin + HZ)) {
536 ipoib_warn(priv, "timing out; will leak address handles\n");
537 break;
538 }
539
540 msleep(1);
541 }
542
543 return 0;
544}
545
546int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
547{
548 struct ipoib_dev_priv *priv = netdev_priv(dev);
549
550 priv->ca = ca;
551 priv->port = port;
552 priv->qp = NULL;
553
554 if (ipoib_transport_dev_init(dev, ca)) {
555 printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
556 return -ENODEV;
557 }
558
559 if (dev->flags & IFF_UP) {
560 if (ipoib_ib_dev_open(dev)) {
561 ipoib_transport_dev_cleanup(dev);
562 return -ENODEV;
563 }
564 }
565
566 return 0;
567}
568
569void ipoib_ib_dev_flush(void *_dev)
570{
571 struct net_device *dev = (struct net_device *)_dev;
572 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
573
574 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
575 return;
576
577 ipoib_dbg(priv, "flushing\n");
578
579 ipoib_ib_dev_down(dev);
580
581 /*
582 * The device could have been brought down between the start and when
583 * we get here, don't bring it back up if it's not configured up
584 */
585 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
586 ipoib_ib_dev_up(dev);
587
588 /* Flush any child interfaces too */
589 list_for_each_entry(cpriv, &priv->child_intfs, list)
590 ipoib_ib_dev_flush(&cpriv->dev);
591}
592
593void ipoib_ib_dev_cleanup(struct net_device *dev)
594{
595 struct ipoib_dev_priv *priv = netdev_priv(dev);
596
597 ipoib_dbg(priv, "cleaning up ib_dev\n");
598
599 ipoib_mcast_stop_thread(dev);
600
601 /* Delete the broadcast address and the local address */
602 ipoib_mcast_dev_down(dev);
603
604 ipoib_transport_dev_cleanup(dev);
605}
606
607/*
608 * Delayed P_Key Assigment Interim Support
609 *
610 * The following is initial implementation of delayed P_Key assigment
611 * mechanism. It is using the same approach implemented for the multicast
612 * group join. The single goal of this implementation is to quickly address
613 * Bug #2507. This implementation will probably be removed when the P_Key
614 * change async notification is available.
615 */
616int ipoib_open(struct net_device *dev);
617
618static void ipoib_pkey_dev_check_presence(struct net_device *dev)
619{
620 struct ipoib_dev_priv *priv = netdev_priv(dev);
621 u16 pkey_index = 0;
622
623 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
624 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
625 else
626 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
627}
628
629void ipoib_pkey_poll(void *dev_ptr)
630{
631 struct net_device *dev = dev_ptr;
632 struct ipoib_dev_priv *priv = netdev_priv(dev);
633
634 ipoib_pkey_dev_check_presence(dev);
635
636 if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
637 ipoib_open(dev);
638 else {
639 down(&pkey_sem);
640 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
641 queue_delayed_work(ipoib_workqueue,
642 &priv->pkey_task,
643 HZ);
644 up(&pkey_sem);
645 }
646}
647
648int ipoib_pkey_dev_delay_open(struct net_device *dev)
649{
650 struct ipoib_dev_priv *priv = netdev_priv(dev);
651
652 /* Look for the interface pkey value in the IB Port P_Key table and */
653 /* set the interface pkey assigment flag */
654 ipoib_pkey_dev_check_presence(dev);
655
656 /* P_Key value not assigned yet - start polling */
657 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
658 down(&pkey_sem);
659 clear_bit(IPOIB_PKEY_STOP, &priv->flags);
660 queue_delayed_work(ipoib_workqueue,
661 &priv->pkey_task,
662 HZ);
663 up(&pkey_sem);
664 return 1;
665 }
666
667 return 0;
668}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
new file mode 100644
index 000000000000..5a3b5c6a4494
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -0,0 +1,1103 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
33 */
34
35#include "ipoib.h"
36
37#include <linux/version.h>
38#include <linux/module.h>
39
40#include <linux/init.h>
41#include <linux/slab.h>
42#include <linux/vmalloc.h>
43
44#include <linux/if_arp.h> /* For ARPHRD_xxx */
45
46#include <linux/ip.h>
47#include <linux/in.h>
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
51MODULE_LICENSE("Dual BSD/GPL");
52
53#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
54int ipoib_debug_level;
55
56module_param_named(debug_level, ipoib_debug_level, int, 0644);
57MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
58#endif
59
60static const u8 ipv4_bcast_addr[] = {
61 0x00, 0xff, 0xff, 0xff,
62 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
63 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
64};
65
66struct workqueue_struct *ipoib_workqueue;
67
68static void ipoib_add_one(struct ib_device *device);
69static void ipoib_remove_one(struct ib_device *device);
70
71static struct ib_client ipoib_client = {
72 .name = "ipoib",
73 .add = ipoib_add_one,
74 .remove = ipoib_remove_one
75};
76
77int ipoib_open(struct net_device *dev)
78{
79 struct ipoib_dev_priv *priv = netdev_priv(dev);
80
81 ipoib_dbg(priv, "bringing up interface\n");
82
83 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
84
85 if (ipoib_pkey_dev_delay_open(dev))
86 return 0;
87
88 if (ipoib_ib_dev_open(dev))
89 return -EINVAL;
90
91 if (ipoib_ib_dev_up(dev))
92 return -EINVAL;
93
94 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
95 struct ipoib_dev_priv *cpriv;
96
97 /* Bring up any child interfaces too */
98 down(&priv->vlan_mutex);
99 list_for_each_entry(cpriv, &priv->child_intfs, list) {
100 int flags;
101
102 flags = cpriv->dev->flags;
103 if (flags & IFF_UP)
104 continue;
105
106 dev_change_flags(cpriv->dev, flags | IFF_UP);
107 }
108 up(&priv->vlan_mutex);
109 }
110
111 netif_start_queue(dev);
112
113 return 0;
114}
115
116static int ipoib_stop(struct net_device *dev)
117{
118 struct ipoib_dev_priv *priv = netdev_priv(dev);
119
120 ipoib_dbg(priv, "stopping interface\n");
121
122 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
123
124 netif_stop_queue(dev);
125
126 ipoib_ib_dev_down(dev);
127 ipoib_ib_dev_stop(dev);
128
129 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
130 struct ipoib_dev_priv *cpriv;
131
132 /* Bring down any child interfaces too */
133 down(&priv->vlan_mutex);
134 list_for_each_entry(cpriv, &priv->child_intfs, list) {
135 int flags;
136
137 flags = cpriv->dev->flags;
138 if (!(flags & IFF_UP))
139 continue;
140
141 dev_change_flags(cpriv->dev, flags & ~IFF_UP);
142 }
143 up(&priv->vlan_mutex);
144 }
145
146 return 0;
147}
148
149static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
150{
151 struct ipoib_dev_priv *priv = netdev_priv(dev);
152
153 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
154 return -EINVAL;
155
156 priv->admin_mtu = new_mtu;
157
158 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
159
160 return 0;
161}
162
163static struct ipoib_path *__path_find(struct net_device *dev,
164 union ib_gid *gid)
165{
166 struct ipoib_dev_priv *priv = netdev_priv(dev);
167 struct rb_node *n = priv->path_tree.rb_node;
168 struct ipoib_path *path;
169 int ret;
170
171 while (n) {
172 path = rb_entry(n, struct ipoib_path, rb_node);
173
174 ret = memcmp(gid->raw, path->pathrec.dgid.raw,
175 sizeof (union ib_gid));
176
177 if (ret < 0)
178 n = n->rb_left;
179 else if (ret > 0)
180 n = n->rb_right;
181 else
182 return path;
183 }
184
185 return NULL;
186}
187
188static int __path_add(struct net_device *dev, struct ipoib_path *path)
189{
190 struct ipoib_dev_priv *priv = netdev_priv(dev);
191 struct rb_node **n = &priv->path_tree.rb_node;
192 struct rb_node *pn = NULL;
193 struct ipoib_path *tpath;
194 int ret;
195
196 while (*n) {
197 pn = *n;
198 tpath = rb_entry(pn, struct ipoib_path, rb_node);
199
200 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
201 sizeof (union ib_gid));
202 if (ret < 0)
203 n = &pn->rb_left;
204 else if (ret > 0)
205 n = &pn->rb_right;
206 else
207 return -EEXIST;
208 }
209
210 rb_link_node(&path->rb_node, pn, n);
211 rb_insert_color(&path->rb_node, &priv->path_tree);
212
213 list_add_tail(&path->list, &priv->path_list);
214
215 return 0;
216}
217
218static void path_free(struct net_device *dev, struct ipoib_path *path)
219{
220 struct ipoib_dev_priv *priv = netdev_priv(dev);
221 struct ipoib_neigh *neigh, *tn;
222 struct sk_buff *skb;
223 unsigned long flags;
224
225 while ((skb = __skb_dequeue(&path->queue)))
226 dev_kfree_skb_irq(skb);
227
228 spin_lock_irqsave(&priv->lock, flags);
229
230 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
231 /*
232 * It's safe to call ipoib_put_ah() inside priv->lock
233 * here, because we know that path->ah will always
234 * hold one more reference, so ipoib_put_ah() will
235 * never do more than decrement the ref count.
236 */
237 if (neigh->ah)
238 ipoib_put_ah(neigh->ah);
239 *to_ipoib_neigh(neigh->neighbour) = NULL;
240 neigh->neighbour->ops->destructor = NULL;
241 kfree(neigh);
242 }
243
244 spin_unlock_irqrestore(&priv->lock, flags);
245
246 if (path->ah)
247 ipoib_put_ah(path->ah);
248
249 kfree(path);
250}
251
252void ipoib_flush_paths(struct net_device *dev)
253{
254 struct ipoib_dev_priv *priv = netdev_priv(dev);
255 struct ipoib_path *path, *tp;
256 LIST_HEAD(remove_list);
257 unsigned long flags;
258
259 spin_lock_irqsave(&priv->lock, flags);
260
261 list_splice(&priv->path_list, &remove_list);
262 INIT_LIST_HEAD(&priv->path_list);
263
264 list_for_each_entry(path, &remove_list, list)
265 rb_erase(&path->rb_node, &priv->path_tree);
266
267 spin_unlock_irqrestore(&priv->lock, flags);
268
269 list_for_each_entry_safe(path, tp, &remove_list, list) {
270 if (path->query)
271 ib_sa_cancel_query(path->query_id, path->query);
272 wait_for_completion(&path->done);
273 path_free(dev, path);
274 }
275}
276
277static void path_rec_completion(int status,
278 struct ib_sa_path_rec *pathrec,
279 void *path_ptr)
280{
281 struct ipoib_path *path = path_ptr;
282 struct net_device *dev = path->dev;
283 struct ipoib_dev_priv *priv = netdev_priv(dev);
284 struct ipoib_ah *ah = NULL;
285 struct ipoib_neigh *neigh;
286 struct sk_buff_head skqueue;
287 struct sk_buff *skb;
288 unsigned long flags;
289
290 if (pathrec)
291 ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n",
292 be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
293 else
294 ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n",
295 status, IPOIB_GID_ARG(path->pathrec.dgid));
296
297 skb_queue_head_init(&skqueue);
298
299 if (!status) {
300 struct ib_ah_attr av = {
301 .dlid = be16_to_cpu(pathrec->dlid),
302 .sl = pathrec->sl,
303 .port_num = priv->port
304 };
305
306 if (ib_sa_rate_enum_to_int(pathrec->rate) > 0)
307 av.static_rate = (2 * priv->local_rate -
308 ib_sa_rate_enum_to_int(pathrec->rate) - 1) /
309 (priv->local_rate ? priv->local_rate : 1);
310
311 ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
312 av.static_rate, priv->local_rate,
313 ib_sa_rate_enum_to_int(pathrec->rate));
314
315 ah = ipoib_create_ah(dev, priv->pd, &av);
316 }
317
318 spin_lock_irqsave(&priv->lock, flags);
319
320 path->ah = ah;
321
322 if (ah) {
323 path->pathrec = *pathrec;
324
325 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
326 ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
327
328 while ((skb = __skb_dequeue(&path->queue)))
329 __skb_queue_tail(&skqueue, skb);
330
331 list_for_each_entry(neigh, &path->neigh_list, list) {
332 kref_get(&path->ah->ref);
333 neigh->ah = path->ah;
334
335 while ((skb = __skb_dequeue(&neigh->queue)))
336 __skb_queue_tail(&skqueue, skb);
337 }
338 } else
339 path->query = NULL;
340
341 complete(&path->done);
342
343 spin_unlock_irqrestore(&priv->lock, flags);
344
345 while ((skb = __skb_dequeue(&skqueue))) {
346 skb->dev = dev;
347 if (dev_queue_xmit(skb))
348 ipoib_warn(priv, "dev_queue_xmit failed "
349 "to requeue packet\n");
350 }
351}
352
353static struct ipoib_path *path_rec_create(struct net_device *dev,
354 union ib_gid *gid)
355{
356 struct ipoib_dev_priv *priv = netdev_priv(dev);
357 struct ipoib_path *path;
358
359 path = kmalloc(sizeof *path, GFP_ATOMIC);
360 if (!path)
361 return NULL;
362
363 path->dev = dev;
364 path->pathrec.dlid = 0;
365 path->ah = NULL;
366
367 skb_queue_head_init(&path->queue);
368
369 INIT_LIST_HEAD(&path->neigh_list);
370 path->query = NULL;
371 init_completion(&path->done);
372
373 memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
374 path->pathrec.sgid = priv->local_gid;
375 path->pathrec.pkey = cpu_to_be16(priv->pkey);
376 path->pathrec.numb_path = 1;
377
378 return path;
379}
380
381static int path_rec_start(struct net_device *dev,
382 struct ipoib_path *path)
383{
384 struct ipoib_dev_priv *priv = netdev_priv(dev);
385
386 ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n",
387 IPOIB_GID_ARG(path->pathrec.dgid));
388
389 path->query_id =
390 ib_sa_path_rec_get(priv->ca, priv->port,
391 &path->pathrec,
392 IB_SA_PATH_REC_DGID |
393 IB_SA_PATH_REC_SGID |
394 IB_SA_PATH_REC_NUMB_PATH |
395 IB_SA_PATH_REC_PKEY,
396 1000, GFP_ATOMIC,
397 path_rec_completion,
398 path, &path->query);
399 if (path->query_id < 0) {
400 ipoib_warn(priv, "ib_sa_path_rec_get failed\n");
401 path->query = NULL;
402 return path->query_id;
403 }
404
405 return 0;
406}
407
408static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
409{
410 struct ipoib_dev_priv *priv = netdev_priv(dev);
411 struct ipoib_path *path;
412 struct ipoib_neigh *neigh;
413
414 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
415 if (!neigh) {
416 ++priv->stats.tx_dropped;
417 dev_kfree_skb_any(skb);
418 return;
419 }
420
421 skb_queue_head_init(&neigh->queue);
422 neigh->neighbour = skb->dst->neighbour;
423 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
424
425 /*
426 * We can only be called from ipoib_start_xmit, so we're
427 * inside tx_lock -- no need to save/restore flags.
428 */
429 spin_lock(&priv->lock);
430
431 path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4));
432 if (!path) {
433 path = path_rec_create(dev,
434 (union ib_gid *) (skb->dst->neighbour->ha + 4));
435 if (!path)
436 goto err;
437
438 __path_add(dev, path);
439 }
440
441 list_add_tail(&neigh->list, &path->neigh_list);
442
443 if (path->pathrec.dlid) {
444 kref_get(&path->ah->ref);
445 neigh->ah = path->ah;
446
447 ipoib_send(dev, skb, path->ah,
448 be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
449 } else {
450 neigh->ah = NULL;
451 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
452 __skb_queue_tail(&neigh->queue, skb);
453 } else {
454 ++priv->stats.tx_dropped;
455 dev_kfree_skb_any(skb);
456 }
457
458 if (!path->query && path_rec_start(dev, path))
459 goto err;
460 }
461
462 spin_unlock(&priv->lock);
463 return;
464
465err:
466 *to_ipoib_neigh(skb->dst->neighbour) = NULL;
467 list_del(&neigh->list);
468 neigh->neighbour->ops->destructor = NULL;
469 kfree(neigh);
470
471 ++priv->stats.tx_dropped;
472 dev_kfree_skb_any(skb);
473
474 spin_unlock(&priv->lock);
475}
476
477static void path_lookup(struct sk_buff *skb, struct net_device *dev)
478{
479 struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
480
481 /* Look up path record for unicasts */
482 if (skb->dst->neighbour->ha[4] != 0xff) {
483 neigh_add_path(skb, dev);
484 return;
485 }
486
487 /* Add in the P_Key for multicasts */
488 skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
489 skb->dst->neighbour->ha[9] = priv->pkey & 0xff;
490 ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb);
491}
492
493static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
494 struct ipoib_pseudoheader *phdr)
495{
496 struct ipoib_dev_priv *priv = netdev_priv(dev);
497 struct ipoib_path *path;
498
499 /*
500 * We can only be called from ipoib_start_xmit, so we're
501 * inside tx_lock -- no need to save/restore flags.
502 */
503 spin_lock(&priv->lock);
504
505 path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4));
506 if (!path) {
507 path = path_rec_create(dev,
508 (union ib_gid *) (phdr->hwaddr + 4));
509 if (path) {
510 /* put pseudoheader back on for next time */
511 skb_push(skb, sizeof *phdr);
512 __skb_queue_tail(&path->queue, skb);
513
514 if (path_rec_start(dev, path)) {
515 spin_unlock(&priv->lock);
516 path_free(dev, path);
517 return;
518 } else
519 __path_add(dev, path);
520 } else {
521 ++priv->stats.tx_dropped;
522 dev_kfree_skb_any(skb);
523 }
524
525 spin_unlock(&priv->lock);
526 return;
527 }
528
529 if (path->pathrec.dlid) {
530 ipoib_dbg(priv, "Send unicast ARP to %04x\n",
531 be16_to_cpu(path->pathrec.dlid));
532
533 ipoib_send(dev, skb, path->ah,
534 be32_to_cpup((__be32 *) phdr->hwaddr));
535 } else if ((path->query || !path_rec_start(dev, path)) &&
536 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
537 /* put pseudoheader back on for next time */
538 skb_push(skb, sizeof *phdr);
539 __skb_queue_tail(&path->queue, skb);
540 } else {
541 ++priv->stats.tx_dropped;
542 dev_kfree_skb_any(skb);
543 }
544
545 spin_unlock(&priv->lock);
546}
547
548static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
549{
550 struct ipoib_dev_priv *priv = netdev_priv(dev);
551 struct ipoib_neigh *neigh;
552 unsigned long flags;
553
554 local_irq_save(flags);
555 if (!spin_trylock(&priv->tx_lock)) {
556 local_irq_restore(flags);
557 return NETDEV_TX_LOCKED;
558 }
559
560 /*
561 * Check if our queue is stopped. Since we have the LLTX bit
562 * set, we can't rely on netif_stop_queue() preventing our
563 * xmit function from being called with a full queue.
564 */
565 if (unlikely(netif_queue_stopped(dev))) {
566 spin_unlock_irqrestore(&priv->tx_lock, flags);
567 return NETDEV_TX_BUSY;
568 }
569
570 if (skb->dst && skb->dst->neighbour) {
571 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
572 path_lookup(skb, dev);
573 goto out;
574 }
575
576 neigh = *to_ipoib_neigh(skb->dst->neighbour);
577
578 if (likely(neigh->ah)) {
579 ipoib_send(dev, skb, neigh->ah,
580 be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
581 goto out;
582 }
583
584 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
585 spin_lock(&priv->lock);
586 __skb_queue_tail(&neigh->queue, skb);
587 spin_unlock(&priv->lock);
588 } else {
589 ++priv->stats.tx_dropped;
590 dev_kfree_skb_any(skb);
591 }
592 } else {
593 struct ipoib_pseudoheader *phdr =
594 (struct ipoib_pseudoheader *) skb->data;
595 skb_pull(skb, sizeof *phdr);
596
597 if (phdr->hwaddr[4] == 0xff) {
598 /* Add in the P_Key for multicast*/
599 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
600 phdr->hwaddr[9] = priv->pkey & 0xff;
601
602 ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
603 } else {
604 /* unicast GID -- should be ARP reply */
605
606 if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
607 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
608 IPOIB_GID_FMT "\n",
609 skb->dst ? "neigh" : "dst",
610 be16_to_cpup((u16 *) skb->data),
611 be32_to_cpup((u32 *) phdr->hwaddr),
612 IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
613 dev_kfree_skb_any(skb);
614 ++priv->stats.tx_dropped;
615 goto out;
616 }
617
618 unicast_arp_send(skb, dev, phdr);
619 }
620 }
621
622out:
623 spin_unlock_irqrestore(&priv->tx_lock, flags);
624
625 return NETDEV_TX_OK;
626}
627
628static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
629{
630 struct ipoib_dev_priv *priv = netdev_priv(dev);
631
632 return &priv->stats;
633}
634
635static void ipoib_timeout(struct net_device *dev)
636{
637 struct ipoib_dev_priv *priv = netdev_priv(dev);
638
639 ipoib_warn(priv, "transmit timeout: latency %ld\n",
640 jiffies - dev->trans_start);
641 /* XXX reset QP, etc. */
642}
643
644static int ipoib_hard_header(struct sk_buff *skb,
645 struct net_device *dev,
646 unsigned short type,
647 void *daddr, void *saddr, unsigned len)
648{
649 struct ipoib_header *header;
650
651 header = (struct ipoib_header *) skb_push(skb, sizeof *header);
652
653 header->proto = htons(type);
654 header->reserved = 0;
655
656 /*
657 * If we don't have a neighbour structure, stuff the
658 * destination address onto the front of the skb so we can
659 * figure out where to send the packet later.
660 */
661 if (!skb->dst || !skb->dst->neighbour) {
662 struct ipoib_pseudoheader *phdr =
663 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
664 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
665 }
666
667 return 0;
668}
669
670static void ipoib_set_mcast_list(struct net_device *dev)
671{
672 struct ipoib_dev_priv *priv = netdev_priv(dev);
673
674 schedule_work(&priv->restart_task);
675}
676
677static void ipoib_neigh_destructor(struct neighbour *n)
678{
679 struct ipoib_neigh *neigh;
680 struct ipoib_dev_priv *priv = netdev_priv(n->dev);
681 unsigned long flags;
682 struct ipoib_ah *ah = NULL;
683
684 ipoib_dbg(priv,
685 "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
686 be32_to_cpup((__be32 *) n->ha),
687 IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4))));
688
689 spin_lock_irqsave(&priv->lock, flags);
690
691 neigh = *to_ipoib_neigh(n);
692 if (neigh) {
693 if (neigh->ah)
694 ah = neigh->ah;
695 list_del(&neigh->list);
696 *to_ipoib_neigh(n) = NULL;
697 kfree(neigh);
698 }
699
700 spin_unlock_irqrestore(&priv->lock, flags);
701
702 if (ah)
703 ipoib_put_ah(ah);
704}
705
706static int ipoib_neigh_setup(struct neighbour *neigh)
707{
708 /*
709 * Is this kosher? I can't find anybody in the kernel that
710 * sets neigh->destructor, so we should be able to set it here
711 * without trouble.
712 */
713 neigh->ops->destructor = ipoib_neigh_destructor;
714
715 return 0;
716}
717
718static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
719{
720 parms->neigh_setup = ipoib_neigh_setup;
721
722 return 0;
723}
724
725int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
726{
727 struct ipoib_dev_priv *priv = netdev_priv(dev);
728
729 /* Allocate RX/TX "rings" to hold queued skbs */
730
731 priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
732 GFP_KERNEL);
733 if (!priv->rx_ring) {
734 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
735 ca->name, IPOIB_RX_RING_SIZE);
736 goto out;
737 }
738 memset(priv->rx_ring, 0,
739 IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
740
741 priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
742 GFP_KERNEL);
743 if (!priv->tx_ring) {
744 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
745 ca->name, IPOIB_TX_RING_SIZE);
746 goto out_rx_ring_cleanup;
747 }
748 memset(priv->tx_ring, 0,
749 IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
750
751 /* priv->tx_head & tx_tail are already 0 */
752
753 if (ipoib_ib_dev_init(dev, ca, port))
754 goto out_tx_ring_cleanup;
755
756 return 0;
757
758out_tx_ring_cleanup:
759 kfree(priv->tx_ring);
760
761out_rx_ring_cleanup:
762 kfree(priv->rx_ring);
763
764out:
765 return -ENOMEM;
766}
767
768void ipoib_dev_cleanup(struct net_device *dev)
769{
770 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
771
772 ipoib_delete_debug_file(dev);
773
774 /* Delete any child interfaces first */
775 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
776 unregister_netdev(cpriv->dev);
777 ipoib_dev_cleanup(cpriv->dev);
778 free_netdev(cpriv->dev);
779 }
780
781 ipoib_ib_dev_cleanup(dev);
782
783 if (priv->rx_ring) {
784 kfree(priv->rx_ring);
785 priv->rx_ring = NULL;
786 }
787
788 if (priv->tx_ring) {
789 kfree(priv->tx_ring);
790 priv->tx_ring = NULL;
791 }
792}
793
794static void ipoib_setup(struct net_device *dev)
795{
796 struct ipoib_dev_priv *priv = netdev_priv(dev);
797
798 dev->open = ipoib_open;
799 dev->stop = ipoib_stop;
800 dev->change_mtu = ipoib_change_mtu;
801 dev->hard_start_xmit = ipoib_start_xmit;
802 dev->get_stats = ipoib_get_stats;
803 dev->tx_timeout = ipoib_timeout;
804 dev->hard_header = ipoib_hard_header;
805 dev->set_multicast_list = ipoib_set_mcast_list;
806 dev->neigh_setup = ipoib_neigh_setup_dev;
807
808 dev->watchdog_timeo = HZ;
809
810 dev->rebuild_header = NULL;
811 dev->set_mac_address = NULL;
812 dev->header_cache_update = NULL;
813
814 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
815
816 /*
817 * We add in INFINIBAND_ALEN to allow for the destination
818 * address "pseudoheader" for skbs without neighbour struct.
819 */
820 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
821 dev->addr_len = INFINIBAND_ALEN;
822 dev->type = ARPHRD_INFINIBAND;
823 dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2;
824 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
825
826 /* MTU will be reset when mcast join happens */
827 dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
828 priv->mcast_mtu = priv->admin_mtu = dev->mtu;
829
830 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
831
832 netif_carrier_off(dev);
833
834 SET_MODULE_OWNER(dev);
835
836 priv->dev = dev;
837
838 spin_lock_init(&priv->lock);
839 spin_lock_init(&priv->tx_lock);
840
841 init_MUTEX(&priv->mcast_mutex);
842 init_MUTEX(&priv->vlan_mutex);
843
844 INIT_LIST_HEAD(&priv->path_list);
845 INIT_LIST_HEAD(&priv->child_intfs);
846 INIT_LIST_HEAD(&priv->dead_ahs);
847 INIT_LIST_HEAD(&priv->multicast_list);
848
849 INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev);
850 INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev);
851 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev);
852 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
853 INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev);
854}
855
856struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
857{
858 struct net_device *dev;
859
860 dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name,
861 ipoib_setup);
862 if (!dev)
863 return NULL;
864
865 return netdev_priv(dev);
866}
867
868static ssize_t show_pkey(struct class_device *cdev, char *buf)
869{
870 struct ipoib_dev_priv *priv =
871 netdev_priv(container_of(cdev, struct net_device, class_dev));
872
873 return sprintf(buf, "0x%04x\n", priv->pkey);
874}
875static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
876
877static ssize_t create_child(struct class_device *cdev,
878 const char *buf, size_t count)
879{
880 int pkey;
881 int ret;
882
883 if (sscanf(buf, "%i", &pkey) != 1)
884 return -EINVAL;
885
886 if (pkey < 0 || pkey > 0xffff)
887 return -EINVAL;
888
889 ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
890 pkey);
891
892 return ret ? ret : count;
893}
894static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child);
895
896static ssize_t delete_child(struct class_device *cdev,
897 const char *buf, size_t count)
898{
899 int pkey;
900 int ret;
901
902 if (sscanf(buf, "%i", &pkey) != 1)
903 return -EINVAL;
904
905 if (pkey < 0 || pkey > 0xffff)
906 return -EINVAL;
907
908 ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev),
909 pkey);
910
911 return ret ? ret : count;
912
913}
914static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child);
915
916int ipoib_add_pkey_attr(struct net_device *dev)
917{
918 return class_device_create_file(&dev->class_dev,
919 &class_device_attr_pkey);
920}
921
922static struct net_device *ipoib_add_port(const char *format,
923 struct ib_device *hca, u8 port)
924{
925 struct ipoib_dev_priv *priv;
926 int result = -ENOMEM;
927
928 priv = ipoib_intf_alloc(format);
929 if (!priv)
930 goto alloc_mem_failed;
931
932 SET_NETDEV_DEV(priv->dev, hca->dma_device);
933
934 result = ib_query_pkey(hca, port, 0, &priv->pkey);
935 if (result) {
936 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
937 hca->name, port, result);
938 goto alloc_mem_failed;
939 }
940
941 priv->dev->broadcast[8] = priv->pkey >> 8;
942 priv->dev->broadcast[9] = priv->pkey & 0xff;
943
944 result = ib_query_gid(hca, port, 0, &priv->local_gid);
945 if (result) {
946 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
947 hca->name, port, result);
948 goto alloc_mem_failed;
949 } else
950 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
951
952
953 result = ipoib_dev_init(priv->dev, hca, port);
954 if (result < 0) {
955 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
956 hca->name, port, result);
957 goto device_init_failed;
958 }
959
960 INIT_IB_EVENT_HANDLER(&priv->event_handler,
961 priv->ca, ipoib_event);
962 result = ib_register_event_handler(&priv->event_handler);
963 if (result < 0) {
964 printk(KERN_WARNING "%s: ib_register_event_handler failed for "
965 "port %d (ret = %d)\n",
966 hca->name, port, result);
967 goto event_failed;
968 }
969
970 result = register_netdev(priv->dev);
971 if (result) {
972 printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
973 hca->name, port, result);
974 goto register_failed;
975 }
976
977 if (ipoib_create_debug_file(priv->dev))
978 goto debug_failed;
979
980 if (ipoib_add_pkey_attr(priv->dev))
981 goto sysfs_failed;
982 if (class_device_create_file(&priv->dev->class_dev,
983 &class_device_attr_create_child))
984 goto sysfs_failed;
985 if (class_device_create_file(&priv->dev->class_dev,
986 &class_device_attr_delete_child))
987 goto sysfs_failed;
988
989 return priv->dev;
990
991sysfs_failed:
992 ipoib_delete_debug_file(priv->dev);
993
994debug_failed:
995 unregister_netdev(priv->dev);
996
997register_failed:
998 ib_unregister_event_handler(&priv->event_handler);
999
1000event_failed:
1001 ipoib_dev_cleanup(priv->dev);
1002
1003device_init_failed:
1004 free_netdev(priv->dev);
1005
1006alloc_mem_failed:
1007 return ERR_PTR(result);
1008}
1009
1010static void ipoib_add_one(struct ib_device *device)
1011{
1012 struct list_head *dev_list;
1013 struct net_device *dev;
1014 struct ipoib_dev_priv *priv;
1015 int s, e, p;
1016
1017 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1018 if (!dev_list)
1019 return;
1020
1021 INIT_LIST_HEAD(dev_list);
1022
1023 if (device->node_type == IB_NODE_SWITCH) {
1024 s = 0;
1025 e = 0;
1026 } else {
1027 s = 1;
1028 e = device->phys_port_cnt;
1029 }
1030
1031 for (p = s; p <= e; ++p) {
1032 dev = ipoib_add_port("ib%d", device, p);
1033 if (!IS_ERR(dev)) {
1034 priv = netdev_priv(dev);
1035 list_add_tail(&priv->list, dev_list);
1036 }
1037 }
1038
1039 ib_set_client_data(device, &ipoib_client, dev_list);
1040}
1041
1042static void ipoib_remove_one(struct ib_device *device)
1043{
1044 struct ipoib_dev_priv *priv, *tmp;
1045 struct list_head *dev_list;
1046
1047 dev_list = ib_get_client_data(device, &ipoib_client);
1048
1049 list_for_each_entry_safe(priv, tmp, dev_list, list) {
1050 ib_unregister_event_handler(&priv->event_handler);
1051
1052 unregister_netdev(priv->dev);
1053 ipoib_dev_cleanup(priv->dev);
1054 free_netdev(priv->dev);
1055 }
1056}
1057
1058static int __init ipoib_init_module(void)
1059{
1060 int ret;
1061
1062 ret = ipoib_register_debugfs();
1063 if (ret)
1064 return ret;
1065
1066 /*
1067 * We create our own workqueue mainly because we want to be
1068 * able to flush it when devices are being removed. We can't
1069 * use schedule_work()/flush_scheduled_work() because both
1070 * unregister_netdev() and linkwatch_event take the rtnl lock,
1071 * so flush_scheduled_work() can deadlock during device
1072 * removal.
1073 */
1074 ipoib_workqueue = create_singlethread_workqueue("ipoib");
1075 if (!ipoib_workqueue) {
1076 ret = -ENOMEM;
1077 goto err_fs;
1078 }
1079
1080 ret = ib_register_client(&ipoib_client);
1081 if (ret)
1082 goto err_wq;
1083
1084 return 0;
1085
1086err_fs:
1087 ipoib_unregister_debugfs();
1088
1089err_wq:
1090 destroy_workqueue(ipoib_workqueue);
1091
1092 return ret;
1093}
1094
1095static void __exit ipoib_cleanup_module(void)
1096{
1097 ipoib_unregister_debugfs();
1098 ib_unregister_client(&ipoib_client);
1099 destroy_workqueue(ipoib_workqueue);
1100}
1101
1102module_init(ipoib_init_module);
1103module_exit(ipoib_cleanup_module);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
new file mode 100644
index 000000000000..f46932dc81c9
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -0,0 +1,991 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
33 */
34
35#include <linux/skbuff.h>
36#include <linux/rtnetlink.h>
37#include <linux/ip.h>
38#include <linux/in.h>
39#include <linux/igmp.h>
40#include <linux/inetdevice.h>
41#include <linux/delay.h>
42#include <linux/completion.h>
43
44#include "ipoib.h"
45
46#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
47static int mcast_debug_level;
48
49module_param(mcast_debug_level, int, 0644);
50MODULE_PARM_DESC(mcast_debug_level,
51 "Enable multicast debug tracing if > 0");
52#endif
53
54static DECLARE_MUTEX(mcast_mutex);
55
56/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
57struct ipoib_mcast {
58 struct ib_sa_mcmember_rec mcmember;
59 struct ipoib_ah *ah;
60
61 struct rb_node rb_node;
62 struct list_head list;
63 struct completion done;
64
65 int query_id;
66 struct ib_sa_query *query;
67
68 unsigned long created;
69 unsigned long backoff;
70
71 unsigned long flags;
72 unsigned char logcount;
73
74 struct list_head neigh_list;
75
76 struct sk_buff_head pkt_queue;
77
78 struct net_device *dev;
79};
80
81struct ipoib_mcast_iter {
82 struct net_device *dev;
83 union ib_gid mgid;
84 unsigned long created;
85 unsigned int queuelen;
86 unsigned int complete;
87 unsigned int send_only;
88};
89
90static void ipoib_mcast_free(struct ipoib_mcast *mcast)
91{
92 struct net_device *dev = mcast->dev;
93 struct ipoib_dev_priv *priv = netdev_priv(dev);
94 struct ipoib_neigh *neigh, *tmp;
95 unsigned long flags;
96 LIST_HEAD(ah_list);
97 struct ipoib_ah *ah, *tah;
98
99 ipoib_dbg_mcast(netdev_priv(dev),
100 "deleting multicast group " IPOIB_GID_FMT "\n",
101 IPOIB_GID_ARG(mcast->mcmember.mgid));
102
103 spin_lock_irqsave(&priv->lock, flags);
104
105 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
106 if (neigh->ah)
107 list_add_tail(&neigh->ah->list, &ah_list);
108 *to_ipoib_neigh(neigh->neighbour) = NULL;
109 neigh->neighbour->ops->destructor = NULL;
110 kfree(neigh);
111 }
112
113 spin_unlock_irqrestore(&priv->lock, flags);
114
115 list_for_each_entry_safe(ah, tah, &ah_list, list)
116 ipoib_put_ah(ah);
117
118 if (mcast->ah)
119 ipoib_put_ah(mcast->ah);
120
121 while (!skb_queue_empty(&mcast->pkt_queue)) {
122 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
123
124 skb->dev = dev;
125 dev_kfree_skb_any(skb);
126 }
127
128 kfree(mcast);
129}
130
131static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
132 int can_sleep)
133{
134 struct ipoib_mcast *mcast;
135
136 mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
137 if (!mcast)
138 return NULL;
139
140 memset(mcast, 0, sizeof (*mcast));
141
142 init_completion(&mcast->done);
143
144 mcast->dev = dev;
145 mcast->created = jiffies;
146 mcast->backoff = HZ;
147 mcast->logcount = 0;
148
149 INIT_LIST_HEAD(&mcast->list);
150 INIT_LIST_HEAD(&mcast->neigh_list);
151 skb_queue_head_init(&mcast->pkt_queue);
152
153 mcast->ah = NULL;
154 mcast->query = NULL;
155
156 return mcast;
157}
158
159static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid)
160{
161 struct ipoib_dev_priv *priv = netdev_priv(dev);
162 struct rb_node *n = priv->multicast_tree.rb_node;
163
164 while (n) {
165 struct ipoib_mcast *mcast;
166 int ret;
167
168 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
169
170 ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw,
171 sizeof (union ib_gid));
172 if (ret < 0)
173 n = n->rb_left;
174 else if (ret > 0)
175 n = n->rb_right;
176 else
177 return mcast;
178 }
179
180 return NULL;
181}
182
183static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
184{
185 struct ipoib_dev_priv *priv = netdev_priv(dev);
186 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
187
188 while (*n) {
189 struct ipoib_mcast *tmcast;
190 int ret;
191
192 pn = *n;
193 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
194
195 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
196 sizeof (union ib_gid));
197 if (ret < 0)
198 n = &pn->rb_left;
199 else if (ret > 0)
200 n = &pn->rb_right;
201 else
202 return -EEXIST;
203 }
204
205 rb_link_node(&mcast->rb_node, pn, n);
206 rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
207
208 return 0;
209}
210
211static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
212 struct ib_sa_mcmember_rec *mcmember)
213{
214 struct net_device *dev = mcast->dev;
215 struct ipoib_dev_priv *priv = netdev_priv(dev);
216 int ret;
217
218 mcast->mcmember = *mcmember;
219
220 /* Set the cached Q_Key before we attach if it's the broadcast group */
221 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
222 sizeof (union ib_gid))) {
223 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
224 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
225 }
226
227 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
228 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
229 ipoib_warn(priv, "multicast group " IPOIB_GID_FMT
230 " already attached\n",
231 IPOIB_GID_ARG(mcast->mcmember.mgid));
232
233 return 0;
234 }
235
236 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
237 &mcast->mcmember.mgid);
238 if (ret < 0) {
239 ipoib_warn(priv, "couldn't attach QP to multicast group "
240 IPOIB_GID_FMT "\n",
241 IPOIB_GID_ARG(mcast->mcmember.mgid));
242
243 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
244 return ret;
245 }
246 }
247
248 {
249 struct ib_ah_attr av = {
250 .dlid = be16_to_cpu(mcast->mcmember.mlid),
251 .port_num = priv->port,
252 .sl = mcast->mcmember.sl,
253 .ah_flags = IB_AH_GRH,
254 .grh = {
255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
256 .hop_limit = mcast->mcmember.hop_limit,
257 .sgid_index = 0,
258 .traffic_class = mcast->mcmember.traffic_class
259 }
260 };
261
262 av.grh.dgid = mcast->mcmember.mgid;
263
264 if (ib_sa_rate_enum_to_int(mcast->mcmember.rate) > 0)
265 av.static_rate = (2 * priv->local_rate -
266 ib_sa_rate_enum_to_int(mcast->mcmember.rate) - 1) /
267 (priv->local_rate ? priv->local_rate : 1);
268
269 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
270 av.static_rate, priv->local_rate,
271 ib_sa_rate_enum_to_int(mcast->mcmember.rate));
272
273 mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
274 if (!mcast->ah) {
275 ipoib_warn(priv, "ib_address_create failed\n");
276 } else {
277 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
278 " AV %p, LID 0x%04x, SL %d\n",
279 IPOIB_GID_ARG(mcast->mcmember.mgid),
280 mcast->ah->ah,
281 be16_to_cpu(mcast->mcmember.mlid),
282 mcast->mcmember.sl);
283 }
284 }
285
286 /* actually send any queued packets */
287 while (!skb_queue_empty(&mcast->pkt_queue)) {
288 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
289
290 skb->dev = dev;
291
292 if (!skb->dst || !skb->dst->neighbour) {
293 /* put pseudoheader back on for next time */
294 skb_push(skb, sizeof (struct ipoib_pseudoheader));
295 }
296
297 if (dev_queue_xmit(skb))
298 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
299 }
300
301 return 0;
302}
303
304static void
305ipoib_mcast_sendonly_join_complete(int status,
306 struct ib_sa_mcmember_rec *mcmember,
307 void *mcast_ptr)
308{
309 struct ipoib_mcast *mcast = mcast_ptr;
310 struct net_device *dev = mcast->dev;
311
312 if (!status)
313 ipoib_mcast_join_finish(mcast, mcmember);
314 else {
315 if (mcast->logcount++ < 20)
316 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
317 IPOIB_GID_FMT ", status %d\n",
318 IPOIB_GID_ARG(mcast->mcmember.mgid), status);
319
320 /* Flush out any queued packets */
321 while (!skb_queue_empty(&mcast->pkt_queue)) {
322 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
323
324 skb->dev = dev;
325
326 dev_kfree_skb_any(skb);
327 }
328
329 /* Clear the busy flag so we try again */
330 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
331 }
332
333 complete(&mcast->done);
334}
335
336static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
337{
338 struct net_device *dev = mcast->dev;
339 struct ipoib_dev_priv *priv = netdev_priv(dev);
340 struct ib_sa_mcmember_rec rec = {
341#if 0 /* Some SMs don't support send-only yet */
342 .join_state = 4
343#else
344 .join_state = 1
345#endif
346 };
347 int ret = 0;
348
349 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
350 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
351 return -ENODEV;
352 }
353
354 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
355 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
356 return -EBUSY;
357 }
358
359 rec.mgid = mcast->mcmember.mgid;
360 rec.port_gid = priv->local_gid;
361 rec.pkey = be16_to_cpu(priv->pkey);
362
363 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
364 IB_SA_MCMEMBER_REC_MGID |
365 IB_SA_MCMEMBER_REC_PORT_GID |
366 IB_SA_MCMEMBER_REC_PKEY |
367 IB_SA_MCMEMBER_REC_JOIN_STATE,
368 1000, GFP_ATOMIC,
369 ipoib_mcast_sendonly_join_complete,
370 mcast, &mcast->query);
371 if (ret < 0) {
372 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
373 ret);
374 } else {
375 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
376 ", starting join\n",
377 IPOIB_GID_ARG(mcast->mcmember.mgid));
378
379 mcast->query_id = ret;
380 }
381
382 return ret;
383}
384
385static void ipoib_mcast_join_complete(int status,
386 struct ib_sa_mcmember_rec *mcmember,
387 void *mcast_ptr)
388{
389 struct ipoib_mcast *mcast = mcast_ptr;
390 struct net_device *dev = mcast->dev;
391 struct ipoib_dev_priv *priv = netdev_priv(dev);
392
393 ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT
394 " (status %d)\n",
395 IPOIB_GID_ARG(mcast->mcmember.mgid), status);
396
397 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
398 mcast->backoff = HZ;
399 down(&mcast_mutex);
400 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
401 queue_work(ipoib_workqueue, &priv->mcast_task);
402 up(&mcast_mutex);
403 complete(&mcast->done);
404 return;
405 }
406
407 if (status == -EINTR) {
408 complete(&mcast->done);
409 return;
410 }
411
412 if (status && mcast->logcount++ < 20) {
413 if (status == -ETIMEDOUT || status == -EINTR) {
414 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
415 ", status %d\n",
416 IPOIB_GID_ARG(mcast->mcmember.mgid),
417 status);
418 } else {
419 ipoib_warn(priv, "multicast join failed for "
420 IPOIB_GID_FMT ", status %d\n",
421 IPOIB_GID_ARG(mcast->mcmember.mgid),
422 status);
423 }
424 }
425
426 mcast->backoff *= 2;
427 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
428 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
429
430 mcast->query = NULL;
431
432 down(&mcast_mutex);
433 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
434 if (status == -ETIMEDOUT)
435 queue_work(ipoib_workqueue, &priv->mcast_task);
436 else
437 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
438 mcast->backoff * HZ);
439 } else
440 complete(&mcast->done);
441 up(&mcast_mutex);
442
443 return;
444}
445
446static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
447 int create)
448{
449 struct ipoib_dev_priv *priv = netdev_priv(dev);
450 struct ib_sa_mcmember_rec rec = {
451 .join_state = 1
452 };
453 ib_sa_comp_mask comp_mask;
454 int ret = 0;
455
456 ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n",
457 IPOIB_GID_ARG(mcast->mcmember.mgid));
458
459 rec.mgid = mcast->mcmember.mgid;
460 rec.port_gid = priv->local_gid;
461 rec.pkey = be16_to_cpu(priv->pkey);
462
463 comp_mask =
464 IB_SA_MCMEMBER_REC_MGID |
465 IB_SA_MCMEMBER_REC_PORT_GID |
466 IB_SA_MCMEMBER_REC_PKEY |
467 IB_SA_MCMEMBER_REC_JOIN_STATE;
468
469 if (create) {
470 comp_mask |=
471 IB_SA_MCMEMBER_REC_QKEY |
472 IB_SA_MCMEMBER_REC_SL |
473 IB_SA_MCMEMBER_REC_FLOW_LABEL |
474 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
475
476 rec.qkey = priv->broadcast->mcmember.qkey;
477 rec.sl = priv->broadcast->mcmember.sl;
478 rec.flow_label = priv->broadcast->mcmember.flow_label;
479 rec.traffic_class = priv->broadcast->mcmember.traffic_class;
480 }
481
482 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
483 mcast->backoff * 1000, GFP_ATOMIC,
484 ipoib_mcast_join_complete,
485 mcast, &mcast->query);
486
487 if (ret < 0) {
488 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
489
490 mcast->backoff *= 2;
491 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
492 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
493
494 down(&mcast_mutex);
495 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
496 queue_delayed_work(ipoib_workqueue,
497 &priv->mcast_task,
498 mcast->backoff);
499 up(&mcast_mutex);
500 } else
501 mcast->query_id = ret;
502}
503
504void ipoib_mcast_join_task(void *dev_ptr)
505{
506 struct net_device *dev = dev_ptr;
507 struct ipoib_dev_priv *priv = netdev_priv(dev);
508
509 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
510 return;
511
512 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
513 ipoib_warn(priv, "ib_gid_entry_get() failed\n");
514 else
515 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
516
517 {
518 struct ib_port_attr attr;
519
520 if (!ib_query_port(priv->ca, priv->port, &attr)) {
521 priv->local_lid = attr.lid;
522 priv->local_rate = attr.active_speed *
523 ib_width_enum_to_int(attr.active_width);
524 } else
525 ipoib_warn(priv, "ib_query_port failed\n");
526 }
527
528 if (!priv->broadcast) {
529 priv->broadcast = ipoib_mcast_alloc(dev, 1);
530 if (!priv->broadcast) {
531 ipoib_warn(priv, "failed to allocate broadcast group\n");
532 down(&mcast_mutex);
533 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
534 queue_delayed_work(ipoib_workqueue,
535 &priv->mcast_task, HZ);
536 up(&mcast_mutex);
537 return;
538 }
539
540 memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
541 sizeof (union ib_gid));
542
543 spin_lock_irq(&priv->lock);
544 __ipoib_mcast_add(dev, priv->broadcast);
545 spin_unlock_irq(&priv->lock);
546 }
547
548 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
549 ipoib_mcast_join(dev, priv->broadcast, 0);
550 return;
551 }
552
553 while (1) {
554 struct ipoib_mcast *mcast = NULL;
555
556 spin_lock_irq(&priv->lock);
557 list_for_each_entry(mcast, &priv->multicast_list, list) {
558 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
559 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
560 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
561 /* Found the next unjoined group */
562 break;
563 }
564 }
565 spin_unlock_irq(&priv->lock);
566
567 if (&mcast->list == &priv->multicast_list) {
568 /* All done */
569 break;
570 }
571
572 ipoib_mcast_join(dev, mcast, 1);
573 return;
574 }
575
576 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
577 IPOIB_ENCAP_LEN;
578 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
579
580 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
581
582 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
583 netif_carrier_on(dev);
584}
585
586int ipoib_mcast_start_thread(struct net_device *dev)
587{
588 struct ipoib_dev_priv *priv = netdev_priv(dev);
589
590 ipoib_dbg_mcast(priv, "starting multicast thread\n");
591
592 down(&mcast_mutex);
593 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
594 queue_work(ipoib_workqueue, &priv->mcast_task);
595 up(&mcast_mutex);
596
597 return 0;
598}
599
600int ipoib_mcast_stop_thread(struct net_device *dev)
601{
602 struct ipoib_dev_priv *priv = netdev_priv(dev);
603 struct ipoib_mcast *mcast;
604
605 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
606
607 down(&mcast_mutex);
608 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
609 cancel_delayed_work(&priv->mcast_task);
610 up(&mcast_mutex);
611
612 flush_workqueue(ipoib_workqueue);
613
614 if (priv->broadcast && priv->broadcast->query) {
615 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
616 priv->broadcast->query = NULL;
617 ipoib_dbg_mcast(priv, "waiting for bcast\n");
618 wait_for_completion(&priv->broadcast->done);
619 }
620
621 list_for_each_entry(mcast, &priv->multicast_list, list) {
622 if (mcast->query) {
623 ib_sa_cancel_query(mcast->query_id, mcast->query);
624 mcast->query = NULL;
625 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
626 IPOIB_GID_ARG(mcast->mcmember.mgid));
627 wait_for_completion(&mcast->done);
628 }
629 }
630
631 return 0;
632}
633
634static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
635{
636 struct ipoib_dev_priv *priv = netdev_priv(dev);
637 struct ib_sa_mcmember_rec rec = {
638 .join_state = 1
639 };
640 int ret = 0;
641
642 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
643 return 0;
644
645 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
646 IPOIB_GID_ARG(mcast->mcmember.mgid));
647
648 rec.mgid = mcast->mcmember.mgid;
649 rec.port_gid = priv->local_gid;
650 rec.pkey = be16_to_cpu(priv->pkey);
651
652 /* Remove ourselves from the multicast group */
653 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
654 &mcast->mcmember.mgid);
655 if (ret)
656 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
657
658 /*
659 * Just make one shot at leaving and don't wait for a reply;
660 * if we fail, too bad.
661 */
662 ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
663 IB_SA_MCMEMBER_REC_MGID |
664 IB_SA_MCMEMBER_REC_PORT_GID |
665 IB_SA_MCMEMBER_REC_PKEY |
666 IB_SA_MCMEMBER_REC_JOIN_STATE,
667 0, GFP_ATOMIC, NULL,
668 mcast, &mcast->query);
669 if (ret < 0)
670 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
671 "for leave (result = %d)\n", ret);
672
673 return 0;
674}
675
676void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
677 struct sk_buff *skb)
678{
679 struct ipoib_dev_priv *priv = netdev_priv(dev);
680 struct ipoib_mcast *mcast;
681
682 /*
683 * We can only be called from ipoib_start_xmit, so we're
684 * inside tx_lock -- no need to save/restore flags.
685 */
686 spin_lock(&priv->lock);
687
688 mcast = __ipoib_mcast_find(dev, mgid);
689 if (!mcast) {
690 /* Let's create a new send only group now */
691 ipoib_dbg_mcast(priv, "setting up send only multicast group for "
692 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid));
693
694 mcast = ipoib_mcast_alloc(dev, 0);
695 if (!mcast) {
696 ipoib_warn(priv, "unable to allocate memory for "
697 "multicast structure\n");
698 dev_kfree_skb_any(skb);
699 goto out;
700 }
701
702 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
703 mcast->mcmember.mgid = *mgid;
704 __ipoib_mcast_add(dev, mcast);
705 list_add_tail(&mcast->list, &priv->multicast_list);
706 }
707
708 if (!mcast->ah) {
709 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
710 skb_queue_tail(&mcast->pkt_queue, skb);
711 else
712 dev_kfree_skb_any(skb);
713
714 if (mcast->query)
715 ipoib_dbg_mcast(priv, "no address vector, "
716 "but multicast join already started\n");
717 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
718 ipoib_mcast_sendonly_join(mcast);
719
720 /*
721 * If lookup completes between here and out:, don't
722 * want to send packet twice.
723 */
724 mcast = NULL;
725 }
726
727out:
728 if (mcast && mcast->ah) {
729 if (skb->dst &&
730 skb->dst->neighbour &&
731 !*to_ipoib_neigh(skb->dst->neighbour)) {
732 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
733
734 if (neigh) {
735 kref_get(&mcast->ah->ref);
736 neigh->ah = mcast->ah;
737 neigh->neighbour = skb->dst->neighbour;
738 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
739 list_add_tail(&neigh->list, &mcast->neigh_list);
740 }
741 }
742
743 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
744 }
745
746 spin_unlock(&priv->lock);
747}
748
749void ipoib_mcast_dev_flush(struct net_device *dev)
750{
751 struct ipoib_dev_priv *priv = netdev_priv(dev);
752 LIST_HEAD(remove_list);
753 struct ipoib_mcast *mcast, *tmcast, *nmcast;
754 unsigned long flags;
755
756 ipoib_dbg_mcast(priv, "flushing multicast list\n");
757
758 spin_lock_irqsave(&priv->lock, flags);
759 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
760 nmcast = ipoib_mcast_alloc(dev, 0);
761 if (nmcast) {
762 nmcast->flags =
763 mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY);
764
765 nmcast->mcmember.mgid = mcast->mcmember.mgid;
766
767 /* Add the new group in before the to-be-destroyed group */
768 list_add_tail(&nmcast->list, &mcast->list);
769 list_del_init(&mcast->list);
770
771 rb_replace_node(&mcast->rb_node, &nmcast->rb_node,
772 &priv->multicast_tree);
773
774 list_add_tail(&mcast->list, &remove_list);
775 } else {
776 ipoib_warn(priv, "could not reallocate multicast group "
777 IPOIB_GID_FMT "\n",
778 IPOIB_GID_ARG(mcast->mcmember.mgid));
779 }
780 }
781
782 if (priv->broadcast) {
783 nmcast = ipoib_mcast_alloc(dev, 0);
784 if (nmcast) {
785 nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid;
786
787 rb_replace_node(&priv->broadcast->rb_node,
788 &nmcast->rb_node,
789 &priv->multicast_tree);
790
791 list_add_tail(&priv->broadcast->list, &remove_list);
792 }
793
794 priv->broadcast = nmcast;
795 }
796
797 spin_unlock_irqrestore(&priv->lock, flags);
798
799 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
800 ipoib_mcast_leave(dev, mcast);
801 ipoib_mcast_free(mcast);
802 }
803}
804
805void ipoib_mcast_dev_down(struct net_device *dev)
806{
807 struct ipoib_dev_priv *priv = netdev_priv(dev);
808 unsigned long flags;
809
810 /* Delete broadcast since it will be recreated */
811 if (priv->broadcast) {
812 ipoib_dbg_mcast(priv, "deleting broadcast group\n");
813
814 spin_lock_irqsave(&priv->lock, flags);
815 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
816 spin_unlock_irqrestore(&priv->lock, flags);
817 ipoib_mcast_leave(dev, priv->broadcast);
818 ipoib_mcast_free(priv->broadcast);
819 priv->broadcast = NULL;
820 }
821}
822
823void ipoib_mcast_restart_task(void *dev_ptr)
824{
825 struct net_device *dev = dev_ptr;
826 struct ipoib_dev_priv *priv = netdev_priv(dev);
827 struct dev_mc_list *mclist;
828 struct ipoib_mcast *mcast, *tmcast;
829 LIST_HEAD(remove_list);
830 unsigned long flags;
831
832 ipoib_dbg_mcast(priv, "restarting multicast task\n");
833
834 ipoib_mcast_stop_thread(dev);
835
836 spin_lock_irqsave(&priv->lock, flags);
837
838 /*
839 * Unfortunately, the networking core only gives us a list of all of
840 * the multicast hardware addresses. We need to figure out which ones
841 * are new and which ones have been removed
842 */
843
844 /* Clear out the found flag */
845 list_for_each_entry(mcast, &priv->multicast_list, list)
846 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
847
848 /* Mark all of the entries that are found or don't exist */
849 for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
850 union ib_gid mgid;
851
852 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
853
854 /* Add in the P_Key */
855 mgid.raw[4] = (priv->pkey >> 8) & 0xff;
856 mgid.raw[5] = priv->pkey & 0xff;
857
858 mcast = __ipoib_mcast_find(dev, &mgid);
859 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
860 struct ipoib_mcast *nmcast;
861
862 /* Not found or send-only group, let's add a new entry */
863 ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
864 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
865
866 nmcast = ipoib_mcast_alloc(dev, 0);
867 if (!nmcast) {
868 ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
869 continue;
870 }
871
872 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
873
874 nmcast->mcmember.mgid = mgid;
875
876 if (mcast) {
877 /* Destroy the send only entry */
878 list_del(&mcast->list);
879 list_add_tail(&mcast->list, &remove_list);
880
881 rb_replace_node(&mcast->rb_node,
882 &nmcast->rb_node,
883 &priv->multicast_tree);
884 } else
885 __ipoib_mcast_add(dev, nmcast);
886
887 list_add_tail(&nmcast->list, &priv->multicast_list);
888 }
889
890 if (mcast)
891 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
892 }
893
894 /* Remove all of the entries don't exist anymore */
895 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
896 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
897 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
898 ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n",
899 IPOIB_GID_ARG(mcast->mcmember.mgid));
900
901 rb_erase(&mcast->rb_node, &priv->multicast_tree);
902
903 /* Move to the remove list */
904 list_del(&mcast->list);
905 list_add_tail(&mcast->list, &remove_list);
906 }
907 }
908 spin_unlock_irqrestore(&priv->lock, flags);
909
910 /* We have to cancel outside of the spinlock */
911 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
912 ipoib_mcast_leave(mcast->dev, mcast);
913 ipoib_mcast_free(mcast);
914 }
915
916 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
917 ipoib_mcast_start_thread(dev);
918}
919
920struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
921{
922 struct ipoib_mcast_iter *iter;
923
924 iter = kmalloc(sizeof *iter, GFP_KERNEL);
925 if (!iter)
926 return NULL;
927
928 iter->dev = dev;
929 memset(iter->mgid.raw, 0, sizeof iter->mgid);
930
931 if (ipoib_mcast_iter_next(iter)) {
932 ipoib_mcast_iter_free(iter);
933 return NULL;
934 }
935
936 return iter;
937}
938
939void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter)
940{
941 kfree(iter);
942}
943
944int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
945{
946 struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
947 struct rb_node *n;
948 struct ipoib_mcast *mcast;
949 int ret = 1;
950
951 spin_lock_irq(&priv->lock);
952
953 n = rb_first(&priv->multicast_tree);
954
955 while (n) {
956 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
957
958 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
959 sizeof (union ib_gid)) < 0) {
960 iter->mgid = mcast->mcmember.mgid;
961 iter->created = mcast->created;
962 iter->queuelen = skb_queue_len(&mcast->pkt_queue);
963 iter->complete = !!mcast->ah;
964 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
965
966 ret = 0;
967
968 break;
969 }
970
971 n = rb_next(n);
972 }
973
974 spin_unlock_irq(&priv->lock);
975
976 return ret;
977}
978
979void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
980 union ib_gid *mgid,
981 unsigned long *created,
982 unsigned int *queuelen,
983 unsigned int *complete,
984 unsigned int *send_only)
985{
986 *mgid = iter->mgid;
987 *created = iter->created;
988 *queuelen = iter->queuelen;
989 *complete = iter->complete;
990 *send_only = iter->send_only;
991}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
new file mode 100644
index 000000000000..4933edf062c2
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -0,0 +1,260 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <ib_cache.h>
36
37#include "ipoib.h"
38
39int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
40{
41 struct ipoib_dev_priv *priv = netdev_priv(dev);
42 struct ib_qp_attr *qp_attr;
43 int attr_mask;
44 int ret;
45 u16 pkey_index;
46
47 ret = -ENOMEM;
48 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
49 if (!qp_attr)
50 goto out;
51
52 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
53 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
54 ret = -ENXIO;
55 goto out;
56 }
57 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
58
59 /* set correct QKey for QP */
60 qp_attr->qkey = priv->qkey;
61 attr_mask = IB_QP_QKEY;
62 ret = ib_modify_qp(priv->qp, qp_attr, attr_mask);
63 if (ret) {
64 ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
65 goto out;
66 }
67
68 /* attach QP to multicast group */
69 down(&priv->mcast_mutex);
70 ret = ib_attach_mcast(priv->qp, mgid, mlid);
71 up(&priv->mcast_mutex);
72 if (ret)
73 ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
74
75out:
76 kfree(qp_attr);
77 return ret;
78}
79
80int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
81{
82 struct ipoib_dev_priv *priv = netdev_priv(dev);
83 int ret;
84
85 down(&priv->mcast_mutex);
86 ret = ib_detach_mcast(priv->qp, mgid, mlid);
87 up(&priv->mcast_mutex);
88 if (ret)
89 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
90
91 return ret;
92}
93
94int ipoib_qp_create(struct net_device *dev)
95{
96 struct ipoib_dev_priv *priv = netdev_priv(dev);
97 int ret;
98 u16 pkey_index;
99 struct ib_qp_attr qp_attr;
100 int attr_mask;
101
102 /*
103 * Search through the port P_Key table for the requested pkey value.
104 * The port has to be assigned to the respective IB partition in
105 * advance.
106 */
107 ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
108 if (ret) {
109 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
110 return ret;
111 }
112 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
113
114 qp_attr.qp_state = IB_QPS_INIT;
115 qp_attr.qkey = 0;
116 qp_attr.port_num = priv->port;
117 qp_attr.pkey_index = pkey_index;
118 attr_mask =
119 IB_QP_QKEY |
120 IB_QP_PORT |
121 IB_QP_PKEY_INDEX |
122 IB_QP_STATE;
123 ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
124 if (ret) {
125 ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
126 goto out_fail;
127 }
128
129 qp_attr.qp_state = IB_QPS_RTR;
130 /* Can't set this in a INIT->RTR transition */
131 attr_mask &= ~IB_QP_PORT;
132 ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
133 if (ret) {
134 ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
135 goto out_fail;
136 }
137
138 qp_attr.qp_state = IB_QPS_RTS;
139 qp_attr.sq_psn = 0;
140 attr_mask |= IB_QP_SQ_PSN;
141 attr_mask &= ~IB_QP_PKEY_INDEX;
142 ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
143 if (ret) {
144 ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
145 goto out_fail;
146 }
147
148 return 0;
149
150out_fail:
151 ib_destroy_qp(priv->qp);
152 priv->qp = NULL;
153
154 return -EINVAL;
155}
156
157int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
158{
159 struct ipoib_dev_priv *priv = netdev_priv(dev);
160 struct ib_qp_init_attr init_attr = {
161 .cap = {
162 .max_send_wr = IPOIB_TX_RING_SIZE,
163 .max_recv_wr = IPOIB_RX_RING_SIZE,
164 .max_send_sge = 1,
165 .max_recv_sge = 1
166 },
167 .sq_sig_type = IB_SIGNAL_ALL_WR,
168 .qp_type = IB_QPT_UD
169 };
170
171 priv->pd = ib_alloc_pd(priv->ca);
172 if (IS_ERR(priv->pd)) {
173 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
174 return -ENODEV;
175 }
176
177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
178 IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1);
179 if (IS_ERR(priv->cq)) {
180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
181 goto out_free_pd;
182 }
183
184 if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
185 goto out_free_cq;
186
187 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
188 if (IS_ERR(priv->mr)) {
189 printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
190 goto out_free_cq;
191 }
192
193 init_attr.send_cq = priv->cq;
194 init_attr.recv_cq = priv->cq,
195
196 priv->qp = ib_create_qp(priv->pd, &init_attr);
197 if (IS_ERR(priv->qp)) {
198 printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
199 goto out_free_mr;
200 }
201
202 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
203 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
204 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
205
206 priv->tx_sge.lkey = priv->mr->lkey;
207
208 priv->tx_wr.opcode = IB_WR_SEND;
209 priv->tx_wr.sg_list = &priv->tx_sge;
210 priv->tx_wr.num_sge = 1;
211 priv->tx_wr.send_flags = IB_SEND_SIGNALED;
212
213 return 0;
214
215out_free_mr:
216 ib_dereg_mr(priv->mr);
217
218out_free_cq:
219 ib_destroy_cq(priv->cq);
220
221out_free_pd:
222 ib_dealloc_pd(priv->pd);
223 return -ENODEV;
224}
225
226void ipoib_transport_dev_cleanup(struct net_device *dev)
227{
228 struct ipoib_dev_priv *priv = netdev_priv(dev);
229
230 if (priv->qp) {
231 if (ib_destroy_qp(priv->qp))
232 ipoib_warn(priv, "ib_qp_destroy failed\n");
233
234 priv->qp = NULL;
235 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
236 }
237
238 if (ib_dereg_mr(priv->mr))
239 ipoib_warn(priv, "ib_dereg_mr failed\n");
240
241 if (ib_destroy_cq(priv->cq))
242 ipoib_warn(priv, "ib_cq_destroy failed\n");
243
244 if (ib_dealloc_pd(priv->pd))
245 ipoib_warn(priv, "ib_dealloc_pd failed\n");
246}
247
248void ipoib_event(struct ib_event_handler *handler,
249 struct ib_event *record)
250{
251 struct ipoib_dev_priv *priv =
252 container_of(handler, struct ipoib_dev_priv, event_handler);
253
254 if (record->event == IB_EVENT_PORT_ACTIVE ||
255 record->event == IB_EVENT_LID_CHANGE ||
256 record->event == IB_EVENT_SM_CHANGE) {
257 ipoib_dbg(priv, "Port active event\n");
258 schedule_work(&priv->flush_task);
259 }
260}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000000..94b8ea812fef
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -0,0 +1,177 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/version.h>
36#include <linux/module.h>
37
38#include <linux/init.h>
39#include <linux/slab.h>
40#include <linux/seq_file.h>
41
42#include <asm/uaccess.h>
43
44#include "ipoib.h"
45
46static ssize_t show_parent(struct class_device *class_dev, char *buf)
47{
48 struct net_device *dev =
49 container_of(class_dev, struct net_device, class_dev);
50 struct ipoib_dev_priv *priv = netdev_priv(dev);
51
52 return sprintf(buf, "%s\n", priv->parent->name);
53}
54static CLASS_DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
55
56int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
57{
58 struct ipoib_dev_priv *ppriv, *priv;
59 char intf_name[IFNAMSIZ];
60 int result;
61
62 if (!capable(CAP_NET_ADMIN))
63 return -EPERM;
64
65 ppriv = netdev_priv(pdev);
66
67 down(&ppriv->vlan_mutex);
68
69 /*
70 * First ensure this isn't a duplicate. We check the parent device and
71 * then all of the child interfaces to make sure the Pkey doesn't match.
72 */
73 if (ppriv->pkey == pkey) {
74 result = -ENOTUNIQ;
75 goto err;
76 }
77
78 list_for_each_entry(priv, &ppriv->child_intfs, list) {
79 if (priv->pkey == pkey) {
80 result = -ENOTUNIQ;
81 goto err;
82 }
83 }
84
85 snprintf(intf_name, sizeof intf_name, "%s.%04x",
86 ppriv->dev->name, pkey);
87 priv = ipoib_intf_alloc(intf_name);
88 if (!priv) {
89 result = -ENOMEM;
90 goto err;
91 }
92
93 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
94
95 priv->pkey = pkey;
96
97 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
98 priv->dev->broadcast[8] = pkey >> 8;
99 priv->dev->broadcast[9] = pkey & 0xff;
100
101 result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
102 if (result < 0) {
103 ipoib_warn(ppriv, "failed to initialize subinterface: "
104 "device %s, port %d",
105 ppriv->ca->name, ppriv->port);
106 goto device_init_failed;
107 }
108
109 result = register_netdev(priv->dev);
110 if (result) {
111 ipoib_warn(priv, "failed to initialize; error %i", result);
112 goto register_failed;
113 }
114
115 priv->parent = ppriv->dev;
116
117 if (ipoib_create_debug_file(priv->dev))
118 goto debug_failed;
119
120 if (ipoib_add_pkey_attr(priv->dev))
121 goto sysfs_failed;
122
123 if (class_device_create_file(&priv->dev->class_dev,
124 &class_device_attr_parent))
125 goto sysfs_failed;
126
127 list_add_tail(&priv->list, &ppriv->child_intfs);
128
129 up(&ppriv->vlan_mutex);
130
131 return 0;
132
133sysfs_failed:
134 ipoib_delete_debug_file(priv->dev);
135
136debug_failed:
137 unregister_netdev(priv->dev);
138
139register_failed:
140 ipoib_dev_cleanup(priv->dev);
141
142device_init_failed:
143 free_netdev(priv->dev);
144
145err:
146 up(&ppriv->vlan_mutex);
147 return result;
148}
149
150int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
151{
152 struct ipoib_dev_priv *ppriv, *priv, *tpriv;
153 int ret = -ENOENT;
154
155 if (!capable(CAP_NET_ADMIN))
156 return -EPERM;
157
158 ppriv = netdev_priv(pdev);
159
160 down(&ppriv->vlan_mutex);
161 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
162 if (priv->pkey == pkey) {
163 unregister_netdev(priv->dev);
164 ipoib_dev_cleanup(priv->dev);
165
166 list_del(&priv->list);
167
168 kfree(priv);
169
170 ret = 0;
171 break;
172 }
173 }
174 up(&ppriv->vlan_mutex);
175
176 return ret;
177}