aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile12
-rw-r--r--drivers/infiniband/core/agent.c22
-rw-r--r--drivers/infiniband/core/agent_priv.h3
-rw-r--r--drivers/infiniband/core/cm.c3324
-rw-r--r--drivers/infiniband/core/cm_msgs.h819
-rw-r--r--drivers/infiniband/core/fmr_pool.c7
-rw-r--r--drivers/infiniband/core/mad.c600
-rw-r--r--drivers/infiniband/core/mad_priv.h33
-rw-r--r--drivers/infiniband/core/mad_rmpp.c765
-rw-r--r--drivers/infiniband/core/mad_rmpp.h58
-rw-r--r--drivers/infiniband/core/packer.c4
-rw-r--r--drivers/infiniband/core/sa_query.c224
-rw-r--r--drivers/infiniband/core/ucm.c1387
-rw-r--r--drivers/infiniband/core/ucm.h89
-rw-r--r--drivers/infiniband/core/user_mad.c299
-rw-r--r--drivers/infiniband/core/uverbs.h133
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1006
-rw-r--r--drivers/infiniband/core/uverbs_main.c710
-rw-r--r--drivers/infiniband/core/uverbs_mem.c221
-rw-r--r--drivers/infiniband/core/verbs.c67
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c531
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h48
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c183
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h18
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c58
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c34
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c151
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c367
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c334
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h30
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c344
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h81
-rw-r--r--drivers/infiniband/include/ib_cm.h569
-rw-r--r--drivers/infiniband/include/ib_fmr_pool.h5
-rw-r--r--drivers/infiniband/include/ib_mad.h213
-rw-r--r--drivers/infiniband/include/ib_sa.h87
-rw-r--r--drivers/infiniband/include/ib_user_cm.h328
-rw-r--r--drivers/infiniband/include/ib_user_mad.h28
-rw-r--r--drivers/infiniband/include/ib_user_verbs.h389
-rw-r--r--drivers/infiniband/include/ib_verbs.h149
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c5
48 files changed, 12521 insertions, 1333 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 3cc3ff0cccb1..79c8e2dd9c33 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -7,6 +7,16 @@ config INFINIBAND
7 any protocols you wish to use as well as drivers for your 7 any protocols you wish to use as well as drivers for your
8 InfiniBand hardware. 8 InfiniBand hardware.
9 9
10config INFINIBAND_USER_VERBS
11 tristate "InfiniBand userspace verbs support"
12 depends on INFINIBAND
13 ---help---
14 Userspace InfiniBand verbs support. This is the kernel side
15 of userspace verbs, which allows userspace processes to
16 directly access InfiniBand hardware for fast-path
17 operations. You will also need libibverbs and a hardware
18 driver library from <http://www.openib.org>.
19
10source "drivers/infiniband/hw/mthca/Kconfig" 20source "drivers/infiniband/hw/mthca/Kconfig"
11 21
12source "drivers/infiniband/ulp/ipoib/Kconfig" 22source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d2dbfb52c0a3..10be36731ed7 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,12 +1,20 @@
1EXTRA_CFLAGS += -Idrivers/infiniband/include 1EXTRA_CFLAGS += -Idrivers/infiniband/include
2 2
3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
4 ib_cm.o ib_umad.o ib_ucm.o
5obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o
4 6
5ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 7ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
6 device.o fmr_pool.o cache.o 8 device.o fmr_pool.o cache.o
7 9
8ib_mad-y := mad.o smi.o agent.o 10ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
9 11
10ib_sa-y := sa_query.o 12ib_sa-y := sa_query.o
11 13
14ib_cm-y := cm.o
15
12ib_umad-y := user_mad.o 16ib_umad-y := user_mad.o
17
18ib_ucm-y := ucm.o
19
20ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 23d1957c4b29..729f0b0d983a 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -134,7 +134,7 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent,
134 sizeof(mad_priv->mad), 134 sizeof(mad_priv->mad),
135 DMA_TO_DEVICE); 135 DMA_TO_DEVICE);
136 gather_list.length = sizeof(mad_priv->mad); 136 gather_list.length = sizeof(mad_priv->mad);
137 gather_list.lkey = (*port_priv->mr).lkey; 137 gather_list.lkey = mad_agent->mr->lkey;
138 138
139 send_wr.next = NULL; 139 send_wr.next = NULL;
140 send_wr.opcode = IB_WR_SEND; 140 send_wr.opcode = IB_WR_SEND;
@@ -156,10 +156,10 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent,
156 /* Should sgid be looked up ? */ 156 /* Should sgid be looked up ? */
157 ah_attr.grh.sgid_index = 0; 157 ah_attr.grh.sgid_index = 0;
158 ah_attr.grh.hop_limit = grh->hop_limit; 158 ah_attr.grh.hop_limit = grh->hop_limit;
159 ah_attr.grh.flow_label = be32_to_cpup( 159 ah_attr.grh.flow_label = be32_to_cpu(
160 &grh->version_tclass_flow) & 0xfffff; 160 grh->version_tclass_flow) & 0xfffff;
161 ah_attr.grh.traffic_class = (be32_to_cpup( 161 ah_attr.grh.traffic_class = (be32_to_cpu(
162 &grh->version_tclass_flow) >> 20) & 0xff; 162 grh->version_tclass_flow) >> 20) & 0xff;
163 memcpy(ah_attr.grh.dgid.raw, 163 memcpy(ah_attr.grh.dgid.raw,
164 grh->sgid.raw, 164 grh->sgid.raw,
165 sizeof(ah_attr.grh.dgid)); 165 sizeof(ah_attr.grh.dgid));
@@ -322,22 +322,12 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
322 goto error3; 322 goto error3;
323 } 323 }
324 324
325 port_priv->mr = ib_get_dma_mr(port_priv->smp_agent->qp->pd,
326 IB_ACCESS_LOCAL_WRITE);
327 if (IS_ERR(port_priv->mr)) {
328 printk(KERN_ERR SPFX "Couldn't get DMA MR\n");
329 ret = PTR_ERR(port_priv->mr);
330 goto error4;
331 }
332
333 spin_lock_irqsave(&ib_agent_port_list_lock, flags); 325 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
334 list_add_tail(&port_priv->port_list, &ib_agent_port_list); 326 list_add_tail(&port_priv->port_list, &ib_agent_port_list);
335 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 327 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
336 328
337 return 0; 329 return 0;
338 330
339error4:
340 ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
341error3: 331error3:
342 ib_unregister_mad_agent(port_priv->smp_agent); 332 ib_unregister_mad_agent(port_priv->smp_agent);
343error2: 333error2:
@@ -361,8 +351,6 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
361 list_del(&port_priv->port_list); 351 list_del(&port_priv->port_list);
362 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 352 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
363 353
364 ib_dereg_mr(port_priv->mr);
365
366 ib_unregister_mad_agent(port_priv->perf_mgmt_agent); 354 ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
367 ib_unregister_mad_agent(port_priv->smp_agent); 355 ib_unregister_mad_agent(port_priv->smp_agent);
368 kfree(port_priv); 356 kfree(port_priv);
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
index 17a0cce5813c..17435af1e914 100644
--- a/drivers/infiniband/core/agent_priv.h
+++ b/drivers/infiniband/core/agent_priv.h
@@ -33,7 +33,7 @@
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 * 35 *
36 * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $ 36 * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
37 */ 37 */
38 38
39#ifndef __IB_AGENT_PRIV_H__ 39#ifndef __IB_AGENT_PRIV_H__
@@ -57,7 +57,6 @@ struct ib_agent_port_private {
57 int port_num; 57 int port_num;
58 struct ib_mad_agent *smp_agent; /* SM class */ 58 struct ib_mad_agent *smp_agent; /* SM class */
59 struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ 59 struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
60 struct ib_mr *mr;
61}; 60};
62 61
63#endif /* __IB_AGENT_PRIV_H__ */ 62#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
new file mode 100644
index 000000000000..403ed125d8f4
--- /dev/null
+++ b/drivers/infiniband/core/cm.c
@@ -0,0 +1,3324 @@
1/*
2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $
36 */
37#include <linux/dma-mapping.h>
38#include <linux/err.h>
39#include <linux/idr.h>
40#include <linux/interrupt.h>
41#include <linux/pci.h>
42#include <linux/rbtree.h>
43#include <linux/spinlock.h>
44#include <linux/workqueue.h>
45
46#include <ib_cache.h>
47#include <ib_cm.h>
48#include "cm_msgs.h"
49
50MODULE_AUTHOR("Sean Hefty");
51MODULE_DESCRIPTION("InfiniBand CM");
52MODULE_LICENSE("Dual BSD/GPL");
53
54static void cm_add_one(struct ib_device *device);
55static void cm_remove_one(struct ib_device *device);
56
57static struct ib_client cm_client = {
58 .name = "cm",
59 .add = cm_add_one,
60 .remove = cm_remove_one
61};
62
63static struct ib_cm {
64 spinlock_t lock;
65 struct list_head device_list;
66 rwlock_t device_lock;
67 struct rb_root listen_service_table;
68 u64 listen_service_id;
69 /* struct rb_root peer_service_table; todo: fix peer to peer */
70 struct rb_root remote_qp_table;
71 struct rb_root remote_id_table;
72 struct rb_root remote_sidr_table;
73 struct idr local_id_table;
74 struct workqueue_struct *wq;
75} cm;
76
77struct cm_port {
78 struct cm_device *cm_dev;
79 struct ib_mad_agent *mad_agent;
80 u8 port_num;
81};
82
83struct cm_device {
84 struct list_head list;
85 struct ib_device *device;
86 u64 ca_guid;
87 struct cm_port port[0];
88};
89
90struct cm_av {
91 struct cm_port *port;
92 union ib_gid dgid;
93 struct ib_ah_attr ah_attr;
94 u16 pkey_index;
95 u8 packet_life_time;
96};
97
98struct cm_work {
99 struct work_struct work;
100 struct list_head list;
101 struct cm_port *port;
102 struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */
103 u32 local_id; /* Established / timewait */
104 u32 remote_id;
105 struct ib_cm_event cm_event;
106 struct ib_sa_path_rec path[0];
107};
108
109struct cm_timewait_info {
110 struct cm_work work; /* Must be first. */
111 struct rb_node remote_qp_node;
112 struct rb_node remote_id_node;
113 u64 remote_ca_guid;
114 u32 remote_qpn;
115 u8 inserted_remote_qp;
116 u8 inserted_remote_id;
117};
118
119struct cm_id_private {
120 struct ib_cm_id id;
121
122 struct rb_node service_node;
123 struct rb_node sidr_id_node;
124 spinlock_t lock;
125 wait_queue_head_t wait;
126 atomic_t refcount;
127
128 struct ib_mad_send_buf *msg;
129 struct cm_timewait_info *timewait_info;
130 /* todo: use alternate port on send failure */
131 struct cm_av av;
132 struct cm_av alt_av;
133
134 void *private_data;
135 u64 tid;
136 u32 local_qpn;
137 u32 remote_qpn;
138 u32 sq_psn;
139 u32 rq_psn;
140 int timeout_ms;
141 enum ib_mtu path_mtu;
142 u8 private_data_len;
143 u8 max_cm_retries;
144 u8 peer_to_peer;
145 u8 responder_resources;
146 u8 initiator_depth;
147 u8 local_ack_timeout;
148 u8 retry_count;
149 u8 rnr_retry_count;
150 u8 service_timeout;
151
152 struct list_head work_list;
153 atomic_t work_count;
154};
155
156static void cm_work_handler(void *data);
157
158static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
159{
160 if (atomic_dec_and_test(&cm_id_priv->refcount))
161 wake_up(&cm_id_priv->wait);
162}
163
164static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
165 struct ib_mad_send_buf **msg)
166{
167 struct ib_mad_agent *mad_agent;
168 struct ib_mad_send_buf *m;
169 struct ib_ah *ah;
170
171 mad_agent = cm_id_priv->av.port->mad_agent;
172 ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
173 if (IS_ERR(ah))
174 return PTR_ERR(ah);
175
176 m = ib_create_send_mad(mad_agent, 1, cm_id_priv->av.pkey_index,
177 ah, 0, sizeof(struct ib_mad_hdr),
178 sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
179 GFP_ATOMIC);
180 if (IS_ERR(m)) {
181 ib_destroy_ah(ah);
182 return PTR_ERR(m);
183 }
184
185 /* Timeout set by caller if response is expected. */
186 m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries;
187
188 atomic_inc(&cm_id_priv->refcount);
189 m->context[0] = cm_id_priv;
190 *msg = m;
191 return 0;
192}
193
194static int cm_alloc_response_msg(struct cm_port *port,
195 struct ib_mad_recv_wc *mad_recv_wc,
196 struct ib_mad_send_buf **msg)
197{
198 struct ib_mad_send_buf *m;
199 struct ib_ah *ah;
200
201 ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
202 mad_recv_wc->recv_buf.grh, port->port_num);
203 if (IS_ERR(ah))
204 return PTR_ERR(ah);
205
206 m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
207 ah, 0, sizeof(struct ib_mad_hdr),
208 sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
209 GFP_ATOMIC);
210 if (IS_ERR(m)) {
211 ib_destroy_ah(ah);
212 return PTR_ERR(m);
213 }
214 *msg = m;
215 return 0;
216}
217
218static void cm_free_msg(struct ib_mad_send_buf *msg)
219{
220 ib_destroy_ah(msg->send_wr.wr.ud.ah);
221 if (msg->context[0])
222 cm_deref_id(msg->context[0]);
223 ib_free_send_mad(msg);
224}
225
226static void * cm_copy_private_data(const void *private_data,
227 u8 private_data_len)
228{
229 void *data;
230
231 if (!private_data || !private_data_len)
232 return NULL;
233
234 data = kmalloc(private_data_len, GFP_KERNEL);
235 if (!data)
236 return ERR_PTR(-ENOMEM);
237
238 memcpy(data, private_data, private_data_len);
239 return data;
240}
241
242static void cm_set_private_data(struct cm_id_private *cm_id_priv,
243 void *private_data, u8 private_data_len)
244{
245 if (cm_id_priv->private_data && cm_id_priv->private_data_len)
246 kfree(cm_id_priv->private_data);
247
248 cm_id_priv->private_data = private_data;
249 cm_id_priv->private_data_len = private_data_len;
250}
251
252static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num,
253 u16 dlid, u8 sl, u16 src_path_bits)
254{
255 memset(ah_attr, 0, sizeof ah_attr);
256 ah_attr->dlid = be16_to_cpu(dlid);
257 ah_attr->sl = sl;
258 ah_attr->src_path_bits = src_path_bits;
259 ah_attr->port_num = port_num;
260}
261
262static void cm_init_av_for_response(struct cm_port *port,
263 struct ib_wc *wc, struct cm_av *av)
264{
265 av->port = port;
266 av->pkey_index = wc->pkey_index;
267 cm_set_ah_attr(&av->ah_attr, port->port_num, cpu_to_be16(wc->slid),
268 wc->sl, wc->dlid_path_bits);
269}
270
271static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
272{
273 struct cm_device *cm_dev;
274 struct cm_port *port = NULL;
275 unsigned long flags;
276 int ret;
277 u8 p;
278
279 read_lock_irqsave(&cm.device_lock, flags);
280 list_for_each_entry(cm_dev, &cm.device_list, list) {
281 if (!ib_find_cached_gid(cm_dev->device, &path->sgid,
282 &p, NULL)) {
283 port = &cm_dev->port[p-1];
284 break;
285 }
286 }
287 read_unlock_irqrestore(&cm.device_lock, flags);
288
289 if (!port)
290 return -EINVAL;
291
292 ret = ib_find_cached_pkey(cm_dev->device, port->port_num,
293 be16_to_cpu(path->pkey), &av->pkey_index);
294 if (ret)
295 return ret;
296
297 av->port = port;
298 cm_set_ah_attr(&av->ah_attr, av->port->port_num, path->dlid,
299 path->sl, path->slid & 0x7F);
300 av->packet_life_time = path->packet_life_time;
301 return 0;
302}
303
304static int cm_alloc_id(struct cm_id_private *cm_id_priv)
305{
306 unsigned long flags;
307 int ret;
308
309 do {
310 spin_lock_irqsave(&cm.lock, flags);
311 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1,
312 (int *) &cm_id_priv->id.local_id);
313 spin_unlock_irqrestore(&cm.lock, flags);
314 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
315 return ret;
316}
317
318static void cm_free_id(u32 local_id)
319{
320 unsigned long flags;
321
322 spin_lock_irqsave(&cm.lock, flags);
323 idr_remove(&cm.local_id_table, (int) local_id);
324 spin_unlock_irqrestore(&cm.lock, flags);
325}
326
327static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id)
328{
329 struct cm_id_private *cm_id_priv;
330
331 cm_id_priv = idr_find(&cm.local_id_table, (int) local_id);
332 if (cm_id_priv) {
333 if (cm_id_priv->id.remote_id == remote_id)
334 atomic_inc(&cm_id_priv->refcount);
335 else
336 cm_id_priv = NULL;
337 }
338
339 return cm_id_priv;
340}
341
342static struct cm_id_private * cm_acquire_id(u32 local_id, u32 remote_id)
343{
344 struct cm_id_private *cm_id_priv;
345 unsigned long flags;
346
347 spin_lock_irqsave(&cm.lock, flags);
348 cm_id_priv = cm_get_id(local_id, remote_id);
349 spin_unlock_irqrestore(&cm.lock, flags);
350
351 return cm_id_priv;
352}
353
354static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
355{
356 struct rb_node **link = &cm.listen_service_table.rb_node;
357 struct rb_node *parent = NULL;
358 struct cm_id_private *cur_cm_id_priv;
359 u64 service_id = cm_id_priv->id.service_id;
360 u64 service_mask = cm_id_priv->id.service_mask;
361
362 while (*link) {
363 parent = *link;
364 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
365 service_node);
366 if ((cur_cm_id_priv->id.service_mask & service_id) ==
367 (service_mask & cur_cm_id_priv->id.service_id))
368 return cm_id_priv;
369 if (service_id < cur_cm_id_priv->id.service_id)
370 link = &(*link)->rb_left;
371 else
372 link = &(*link)->rb_right;
373 }
374 rb_link_node(&cm_id_priv->service_node, parent, link);
375 rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
376 return NULL;
377}
378
379static struct cm_id_private * cm_find_listen(u64 service_id)
380{
381 struct rb_node *node = cm.listen_service_table.rb_node;
382 struct cm_id_private *cm_id_priv;
383
384 while (node) {
385 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
386 if ((cm_id_priv->id.service_mask & service_id) ==
387 (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
388 return cm_id_priv;
389 if (service_id < cm_id_priv->id.service_id)
390 node = node->rb_left;
391 else
392 node = node->rb_right;
393 }
394 return NULL;
395}
396
397static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
398 *timewait_info)
399{
400 struct rb_node **link = &cm.remote_id_table.rb_node;
401 struct rb_node *parent = NULL;
402 struct cm_timewait_info *cur_timewait_info;
403 u64 remote_ca_guid = timewait_info->remote_ca_guid;
404 u32 remote_id = timewait_info->work.remote_id;
405
406 while (*link) {
407 parent = *link;
408 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
409 remote_id_node);
410 if (remote_id < cur_timewait_info->work.remote_id)
411 link = &(*link)->rb_left;
412 else if (remote_id > cur_timewait_info->work.remote_id)
413 link = &(*link)->rb_right;
414 else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
415 link = &(*link)->rb_left;
416 else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
417 link = &(*link)->rb_right;
418 else
419 return cur_timewait_info;
420 }
421 timewait_info->inserted_remote_id = 1;
422 rb_link_node(&timewait_info->remote_id_node, parent, link);
423 rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
424 return NULL;
425}
426
427static struct cm_timewait_info * cm_find_remote_id(u64 remote_ca_guid,
428 u32 remote_id)
429{
430 struct rb_node *node = cm.remote_id_table.rb_node;
431 struct cm_timewait_info *timewait_info;
432
433 while (node) {
434 timewait_info = rb_entry(node, struct cm_timewait_info,
435 remote_id_node);
436 if (remote_id < timewait_info->work.remote_id)
437 node = node->rb_left;
438 else if (remote_id > timewait_info->work.remote_id)
439 node = node->rb_right;
440 else if (remote_ca_guid < timewait_info->remote_ca_guid)
441 node = node->rb_left;
442 else if (remote_ca_guid > timewait_info->remote_ca_guid)
443 node = node->rb_right;
444 else
445 return timewait_info;
446 }
447 return NULL;
448}
449
450static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
451 *timewait_info)
452{
453 struct rb_node **link = &cm.remote_qp_table.rb_node;
454 struct rb_node *parent = NULL;
455 struct cm_timewait_info *cur_timewait_info;
456 u64 remote_ca_guid = timewait_info->remote_ca_guid;
457 u32 remote_qpn = timewait_info->remote_qpn;
458
459 while (*link) {
460 parent = *link;
461 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
462 remote_qp_node);
463 if (remote_qpn < cur_timewait_info->remote_qpn)
464 link = &(*link)->rb_left;
465 else if (remote_qpn > cur_timewait_info->remote_qpn)
466 link = &(*link)->rb_right;
467 else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
468 link = &(*link)->rb_left;
469 else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
470 link = &(*link)->rb_right;
471 else
472 return cur_timewait_info;
473 }
474 timewait_info->inserted_remote_qp = 1;
475 rb_link_node(&timewait_info->remote_qp_node, parent, link);
476 rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
477 return NULL;
478}
479
480static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
481 *cm_id_priv)
482{
483 struct rb_node **link = &cm.remote_sidr_table.rb_node;
484 struct rb_node *parent = NULL;
485 struct cm_id_private *cur_cm_id_priv;
486 union ib_gid *port_gid = &cm_id_priv->av.dgid;
487 u32 remote_id = cm_id_priv->id.remote_id;
488
489 while (*link) {
490 parent = *link;
491 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
492 sidr_id_node);
493 if (remote_id < cur_cm_id_priv->id.remote_id)
494 link = &(*link)->rb_left;
495 else if (remote_id > cur_cm_id_priv->id.remote_id)
496 link = &(*link)->rb_right;
497 else {
498 int cmp;
499 cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
500 sizeof *port_gid);
501 if (cmp < 0)
502 link = &(*link)->rb_left;
503 else if (cmp > 0)
504 link = &(*link)->rb_right;
505 else
506 return cur_cm_id_priv;
507 }
508 }
509 rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
510 rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
511 return NULL;
512}
513
514static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
515 enum ib_cm_sidr_status status)
516{
517 struct ib_cm_sidr_rep_param param;
518
519 memset(&param, 0, sizeof param);
520 param.status = status;
521 ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
522}
523
524struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
525 void *context)
526{
527 struct cm_id_private *cm_id_priv;
528 int ret;
529
530 cm_id_priv = kmalloc(sizeof *cm_id_priv, GFP_KERNEL);
531 if (!cm_id_priv)
532 return ERR_PTR(-ENOMEM);
533
534 memset(cm_id_priv, 0, sizeof *cm_id_priv);
535 cm_id_priv->id.state = IB_CM_IDLE;
536 cm_id_priv->id.cm_handler = cm_handler;
537 cm_id_priv->id.context = context;
538 ret = cm_alloc_id(cm_id_priv);
539 if (ret)
540 goto error;
541
542 spin_lock_init(&cm_id_priv->lock);
543 init_waitqueue_head(&cm_id_priv->wait);
544 INIT_LIST_HEAD(&cm_id_priv->work_list);
545 atomic_set(&cm_id_priv->work_count, -1);
546 atomic_set(&cm_id_priv->refcount, 1);
547 return &cm_id_priv->id;
548
549error:
550 kfree(cm_id_priv);
551 return ERR_PTR(-ENOMEM);
552}
553EXPORT_SYMBOL(ib_create_cm_id);
554
555static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
556{
557 struct cm_work *work;
558
559 if (list_empty(&cm_id_priv->work_list))
560 return NULL;
561
562 work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
563 list_del(&work->list);
564 return work;
565}
566
567static void cm_free_work(struct cm_work *work)
568{
569 if (work->mad_recv_wc)
570 ib_free_recv_mad(work->mad_recv_wc);
571 kfree(work);
572}
573
574static inline int cm_convert_to_ms(int iba_time)
575{
576 /* approximate conversion to ms from 4.096us x 2^iba_time */
577 return 1 << max(iba_time - 8, 0);
578}
579
580static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
581{
582 unsigned long flags;
583
584 if (!timewait_info->inserted_remote_id &&
585 !timewait_info->inserted_remote_qp)
586 return;
587
588 spin_lock_irqsave(&cm.lock, flags);
589 if (timewait_info->inserted_remote_id) {
590 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
591 timewait_info->inserted_remote_id = 0;
592 }
593
594 if (timewait_info->inserted_remote_qp) {
595 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
596 timewait_info->inserted_remote_qp = 0;
597 }
598 spin_unlock_irqrestore(&cm.lock, flags);
599}
600
601static struct cm_timewait_info * cm_create_timewait_info(u32 local_id)
602{
603 struct cm_timewait_info *timewait_info;
604
605 timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL);
606 if (!timewait_info)
607 return ERR_PTR(-ENOMEM);
608 memset(timewait_info, 0, sizeof *timewait_info);
609
610 timewait_info->work.local_id = local_id;
611 INIT_WORK(&timewait_info->work.work, cm_work_handler,
612 &timewait_info->work);
613 timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
614 return timewait_info;
615}
616
617static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
618{
619 int wait_time;
620
621 /*
622 * The cm_id could be destroyed by the user before we exit timewait.
623 * To protect against this, we search for the cm_id after exiting
624 * timewait before notifying the user that we've exited timewait.
625 */
626 cm_id_priv->id.state = IB_CM_TIMEWAIT;
627 wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout);
628 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
629 msecs_to_jiffies(wait_time));
630 cm_id_priv->timewait_info = NULL;
631}
632
633static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
634{
635 cm_id_priv->id.state = IB_CM_IDLE;
636 if (cm_id_priv->timewait_info) {
637 cm_cleanup_timewait(cm_id_priv->timewait_info);
638 kfree(cm_id_priv->timewait_info);
639 cm_id_priv->timewait_info = NULL;
640 }
641}
642
643void ib_destroy_cm_id(struct ib_cm_id *cm_id)
644{
645 struct cm_id_private *cm_id_priv;
646 struct cm_work *work;
647 unsigned long flags;
648
649 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
650retest:
651 spin_lock_irqsave(&cm_id_priv->lock, flags);
652 switch (cm_id->state) {
653 case IB_CM_LISTEN:
654 cm_id->state = IB_CM_IDLE;
655 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
656 spin_lock_irqsave(&cm.lock, flags);
657 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
658 spin_unlock_irqrestore(&cm.lock, flags);
659 break;
660 case IB_CM_SIDR_REQ_SENT:
661 cm_id->state = IB_CM_IDLE;
662 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
663 (unsigned long) cm_id_priv->msg);
664 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
665 break;
666 case IB_CM_SIDR_REQ_RCVD:
667 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
668 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
669 break;
670 case IB_CM_REQ_SENT:
671 case IB_CM_MRA_REQ_RCVD:
672 case IB_CM_REP_SENT:
673 case IB_CM_MRA_REP_RCVD:
674 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
675 (unsigned long) cm_id_priv->msg);
676 /* Fall through */
677 case IB_CM_REQ_RCVD:
678 case IB_CM_MRA_REQ_SENT:
679 case IB_CM_REP_RCVD:
680 case IB_CM_MRA_REP_SENT:
681 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
682 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
683 &cm_id_priv->av.port->cm_dev->ca_guid,
684 sizeof cm_id_priv->av.port->cm_dev->ca_guid,
685 NULL, 0);
686 break;
687 case IB_CM_ESTABLISHED:
688 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
689 ib_send_cm_dreq(cm_id, NULL, 0);
690 goto retest;
691 case IB_CM_DREQ_SENT:
692 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
693 (unsigned long) cm_id_priv->msg);
694 cm_enter_timewait(cm_id_priv);
695 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
696 break;
697 case IB_CM_DREQ_RCVD:
698 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
699 ib_send_cm_drep(cm_id, NULL, 0);
700 break;
701 default:
702 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
703 break;
704 }
705
706 cm_free_id(cm_id->local_id);
707 atomic_dec(&cm_id_priv->refcount);
708 wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount));
709 while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
710 cm_free_work(work);
711 if (cm_id_priv->private_data && cm_id_priv->private_data_len)
712 kfree(cm_id_priv->private_data);
713 kfree(cm_id_priv);
714}
715EXPORT_SYMBOL(ib_destroy_cm_id);
716
717int ib_cm_listen(struct ib_cm_id *cm_id,
718 u64 service_id,
719 u64 service_mask)
720{
721 struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
722 unsigned long flags;
723 int ret = 0;
724
725 service_mask = service_mask ? service_mask : ~0ULL;
726 service_id &= service_mask;
727 if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
728 (service_id != IB_CM_ASSIGN_SERVICE_ID))
729 return -EINVAL;
730
731 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
732 BUG_ON(cm_id->state != IB_CM_IDLE);
733
734 cm_id->state = IB_CM_LISTEN;
735
736 spin_lock_irqsave(&cm.lock, flags);
737 if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
738 cm_id->service_id = __cpu_to_be64(cm.listen_service_id++);
739 cm_id->service_mask = ~0ULL;
740 } else {
741 cm_id->service_id = service_id;
742 cm_id->service_mask = service_mask;
743 }
744 cur_cm_id_priv = cm_insert_listen(cm_id_priv);
745 spin_unlock_irqrestore(&cm.lock, flags);
746
747 if (cur_cm_id_priv) {
748 cm_id->state = IB_CM_IDLE;
749 ret = -EBUSY;
750 }
751 return ret;
752}
753EXPORT_SYMBOL(ib_cm_listen);
754
755static u64 cm_form_tid(struct cm_id_private *cm_id_priv,
756 enum cm_msg_sequence msg_seq)
757{
758 u64 hi_tid, low_tid;
759
760 hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
761 low_tid = (u64) (cm_id_priv->id.local_id | (msg_seq << 30));
762 return cpu_to_be64(hi_tid | low_tid);
763}
764
765static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
766 enum cm_msg_attr_id attr_id, u64 tid)
767{
768 hdr->base_version = IB_MGMT_BASE_VERSION;
769 hdr->mgmt_class = IB_MGMT_CLASS_CM;
770 hdr->class_version = IB_CM_CLASS_VERSION;
771 hdr->method = IB_MGMT_METHOD_SEND;
772 hdr->attr_id = attr_id;
773 hdr->tid = tid;
774}
775
776static void cm_format_req(struct cm_req_msg *req_msg,
777 struct cm_id_private *cm_id_priv,
778 struct ib_cm_req_param *param)
779{
780 cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
781 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
782
783 req_msg->local_comm_id = cm_id_priv->id.local_id;
784 req_msg->service_id = param->service_id;
785 req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
786 cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
787 cm_req_set_resp_res(req_msg, param->responder_resources);
788 cm_req_set_init_depth(req_msg, param->initiator_depth);
789 cm_req_set_remote_resp_timeout(req_msg,
790 param->remote_cm_response_timeout);
791 cm_req_set_qp_type(req_msg, param->qp_type);
792 cm_req_set_flow_ctrl(req_msg, param->flow_control);
793 cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
794 cm_req_set_local_resp_timeout(req_msg,
795 param->local_cm_response_timeout);
796 cm_req_set_retry_count(req_msg, param->retry_count);
797 req_msg->pkey = param->primary_path->pkey;
798 cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
799 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
800 cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
801 cm_req_set_srq(req_msg, param->srq);
802
803 req_msg->primary_local_lid = param->primary_path->slid;
804 req_msg->primary_remote_lid = param->primary_path->dlid;
805 req_msg->primary_local_gid = param->primary_path->sgid;
806 req_msg->primary_remote_gid = param->primary_path->dgid;
807 cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label);
808 cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate);
809 req_msg->primary_traffic_class = param->primary_path->traffic_class;
810 req_msg->primary_hop_limit = param->primary_path->hop_limit;
811 cm_req_set_primary_sl(req_msg, param->primary_path->sl);
812 cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
813 cm_req_set_primary_local_ack_timeout(req_msg,
814 min(31, param->primary_path->packet_life_time + 1));
815
816 if (param->alternate_path) {
817 req_msg->alt_local_lid = param->alternate_path->slid;
818 req_msg->alt_remote_lid = param->alternate_path->dlid;
819 req_msg->alt_local_gid = param->alternate_path->sgid;
820 req_msg->alt_remote_gid = param->alternate_path->dgid;
821 cm_req_set_alt_flow_label(req_msg,
822 param->alternate_path->flow_label);
823 cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate);
824 req_msg->alt_traffic_class = param->alternate_path->traffic_class;
825 req_msg->alt_hop_limit = param->alternate_path->hop_limit;
826 cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
827 cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
828 cm_req_set_alt_local_ack_timeout(req_msg,
829 min(31, param->alternate_path->packet_life_time + 1));
830 }
831
832 if (param->private_data && param->private_data_len)
833 memcpy(req_msg->private_data, param->private_data,
834 param->private_data_len);
835}
836
837static inline int cm_validate_req_param(struct ib_cm_req_param *param)
838{
839 /* peer-to-peer not supported */
840 if (param->peer_to_peer)
841 return -EINVAL;
842
843 if (!param->primary_path)
844 return -EINVAL;
845
846 if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
847 return -EINVAL;
848
849 if (param->private_data &&
850 param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
851 return -EINVAL;
852
853 if (param->alternate_path &&
854 (param->alternate_path->pkey != param->primary_path->pkey ||
855 param->alternate_path->mtu != param->primary_path->mtu))
856 return -EINVAL;
857
858 return 0;
859}
860
861int ib_send_cm_req(struct ib_cm_id *cm_id,
862 struct ib_cm_req_param *param)
863{
864 struct cm_id_private *cm_id_priv;
865 struct ib_send_wr *bad_send_wr;
866 struct cm_req_msg *req_msg;
867 unsigned long flags;
868 int ret;
869
870 ret = cm_validate_req_param(param);
871 if (ret)
872 return ret;
873
874 /* Verify that we're not in timewait. */
875 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
876 spin_lock_irqsave(&cm_id_priv->lock, flags);
877 if (cm_id->state != IB_CM_IDLE) {
878 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
879 ret = -EINVAL;
880 goto out;
881 }
882 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
883
884 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
885 id.local_id);
886 if (IS_ERR(cm_id_priv->timewait_info))
887 goto out;
888
889 ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
890 if (ret)
891 goto error1;
892 if (param->alternate_path) {
893 ret = cm_init_av_by_path(param->alternate_path,
894 &cm_id_priv->alt_av);
895 if (ret)
896 goto error1;
897 }
898 cm_id->service_id = param->service_id;
899 cm_id->service_mask = ~0ULL;
900 cm_id_priv->timeout_ms = cm_convert_to_ms(
901 param->primary_path->packet_life_time) * 2 +
902 cm_convert_to_ms(
903 param->remote_cm_response_timeout);
904 cm_id_priv->max_cm_retries = param->max_cm_retries;
905 cm_id_priv->initiator_depth = param->initiator_depth;
906 cm_id_priv->responder_resources = param->responder_resources;
907 cm_id_priv->retry_count = param->retry_count;
908 cm_id_priv->path_mtu = param->primary_path->mtu;
909
910 ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
911 if (ret)
912 goto error1;
913
914 req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
915 cm_format_req(req_msg, cm_id_priv, param);
916 cm_id_priv->tid = req_msg->hdr.tid;
917 cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
918 cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
919
920 cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
921 cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
922 cm_id_priv->local_ack_timeout =
923 cm_req_get_primary_local_ack_timeout(req_msg);
924
925 spin_lock_irqsave(&cm_id_priv->lock, flags);
926 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
927 &cm_id_priv->msg->send_wr, &bad_send_wr);
928 if (ret) {
929 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
930 goto error2;
931 }
932 BUG_ON(cm_id->state != IB_CM_IDLE);
933 cm_id->state = IB_CM_REQ_SENT;
934 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
935 return 0;
936
937error2: cm_free_msg(cm_id_priv->msg);
938error1: kfree(cm_id_priv->timewait_info);
939out: return ret;
940}
941EXPORT_SYMBOL(ib_send_cm_req);
942
943static int cm_issue_rej(struct cm_port *port,
944 struct ib_mad_recv_wc *mad_recv_wc,
945 enum ib_cm_rej_reason reason,
946 enum cm_msg_response msg_rejected,
947 void *ari, u8 ari_length)
948{
949 struct ib_mad_send_buf *msg = NULL;
950 struct ib_send_wr *bad_send_wr;
951 struct cm_rej_msg *rej_msg, *rcv_msg;
952 int ret;
953
954 ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
955 if (ret)
956 return ret;
957
958 /* We just need common CM header information. Cast to any message. */
959 rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
960 rej_msg = (struct cm_rej_msg *) msg->mad;
961
962 cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
963 rej_msg->remote_comm_id = rcv_msg->local_comm_id;
964 rej_msg->local_comm_id = rcv_msg->remote_comm_id;
965 cm_rej_set_msg_rejected(rej_msg, msg_rejected);
966 rej_msg->reason = reason;
967
968 if (ari && ari_length) {
969 cm_rej_set_reject_info_len(rej_msg, ari_length);
970 memcpy(rej_msg->ari, ari, ari_length);
971 }
972
973 ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr);
974 if (ret)
975 cm_free_msg(msg);
976
977 return ret;
978}
979
980static inline int cm_is_active_peer(u64 local_ca_guid, u64 remote_ca_guid,
981 u32 local_qpn, u32 remote_qpn)
982{
983 return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
984 ((local_ca_guid == remote_ca_guid) &&
985 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
986}
987
988static inline void cm_format_paths_from_req(struct cm_req_msg *req_msg,
989 struct ib_sa_path_rec *primary_path,
990 struct ib_sa_path_rec *alt_path)
991{
992 memset(primary_path, 0, sizeof *primary_path);
993 primary_path->dgid = req_msg->primary_local_gid;
994 primary_path->sgid = req_msg->primary_remote_gid;
995 primary_path->dlid = req_msg->primary_local_lid;
996 primary_path->slid = req_msg->primary_remote_lid;
997 primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
998 primary_path->hop_limit = req_msg->primary_hop_limit;
999 primary_path->traffic_class = req_msg->primary_traffic_class;
1000 primary_path->reversible = 1;
1001 primary_path->pkey = req_msg->pkey;
1002 primary_path->sl = cm_req_get_primary_sl(req_msg);
1003 primary_path->mtu_selector = IB_SA_EQ;
1004 primary_path->mtu = cm_req_get_path_mtu(req_msg);
1005 primary_path->rate_selector = IB_SA_EQ;
1006 primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1007 primary_path->packet_life_time_selector = IB_SA_EQ;
1008 primary_path->packet_life_time =
1009 cm_req_get_primary_local_ack_timeout(req_msg);
1010 primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1011
1012 if (req_msg->alt_local_lid) {
1013 memset(alt_path, 0, sizeof *alt_path);
1014 alt_path->dgid = req_msg->alt_local_gid;
1015 alt_path->sgid = req_msg->alt_remote_gid;
1016 alt_path->dlid = req_msg->alt_local_lid;
1017 alt_path->slid = req_msg->alt_remote_lid;
1018 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1019 alt_path->hop_limit = req_msg->alt_hop_limit;
1020 alt_path->traffic_class = req_msg->alt_traffic_class;
1021 alt_path->reversible = 1;
1022 alt_path->pkey = req_msg->pkey;
1023 alt_path->sl = cm_req_get_alt_sl(req_msg);
1024 alt_path->mtu_selector = IB_SA_EQ;
1025 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1026 alt_path->rate_selector = IB_SA_EQ;
1027 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1028 alt_path->packet_life_time_selector = IB_SA_EQ;
1029 alt_path->packet_life_time =
1030 cm_req_get_alt_local_ack_timeout(req_msg);
1031 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1032 }
1033}
1034
1035static void cm_format_req_event(struct cm_work *work,
1036 struct cm_id_private *cm_id_priv,
1037 struct ib_cm_id *listen_id)
1038{
1039 struct cm_req_msg *req_msg;
1040 struct ib_cm_req_event_param *param;
1041
1042 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1043 param = &work->cm_event.param.req_rcvd;
1044 param->listen_id = listen_id;
1045 param->device = cm_id_priv->av.port->mad_agent->device;
1046 param->port = cm_id_priv->av.port->port_num;
1047 param->primary_path = &work->path[0];
1048 if (req_msg->alt_local_lid)
1049 param->alternate_path = &work->path[1];
1050 else
1051 param->alternate_path = NULL;
1052 param->remote_ca_guid = req_msg->local_ca_guid;
1053 param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1054 param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1055 param->qp_type = cm_req_get_qp_type(req_msg);
1056 param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1057 param->responder_resources = cm_req_get_init_depth(req_msg);
1058 param->initiator_depth = cm_req_get_resp_res(req_msg);
1059 param->local_cm_response_timeout =
1060 cm_req_get_remote_resp_timeout(req_msg);
1061 param->flow_control = cm_req_get_flow_ctrl(req_msg);
1062 param->remote_cm_response_timeout =
1063 cm_req_get_local_resp_timeout(req_msg);
1064 param->retry_count = cm_req_get_retry_count(req_msg);
1065 param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1066 param->srq = cm_req_get_srq(req_msg);
1067 work->cm_event.private_data = &req_msg->private_data;
1068}
1069
1070static void cm_process_work(struct cm_id_private *cm_id_priv,
1071 struct cm_work *work)
1072{
1073 unsigned long flags;
1074 int ret;
1075
1076 /* We will typically only have the current event to report. */
1077 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1078 cm_free_work(work);
1079
1080 while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1081 spin_lock_irqsave(&cm_id_priv->lock, flags);
1082 work = cm_dequeue_work(cm_id_priv);
1083 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1084 BUG_ON(!work);
1085 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1086 &work->cm_event);
1087 cm_free_work(work);
1088 }
1089 cm_deref_id(cm_id_priv);
1090 if (ret)
1091 ib_destroy_cm_id(&cm_id_priv->id);
1092}
1093
1094static void cm_format_mra(struct cm_mra_msg *mra_msg,
1095 struct cm_id_private *cm_id_priv,
1096 enum cm_msg_response msg_mraed, u8 service_timeout,
1097 const void *private_data, u8 private_data_len)
1098{
1099 cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1100 cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1101 mra_msg->local_comm_id = cm_id_priv->id.local_id;
1102 mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1103 cm_mra_set_service_timeout(mra_msg, service_timeout);
1104
1105 if (private_data && private_data_len)
1106 memcpy(mra_msg->private_data, private_data, private_data_len);
1107}
1108
1109static void cm_format_rej(struct cm_rej_msg *rej_msg,
1110 struct cm_id_private *cm_id_priv,
1111 enum ib_cm_rej_reason reason,
1112 void *ari,
1113 u8 ari_length,
1114 const void *private_data,
1115 u8 private_data_len)
1116{
1117 cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1118 rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1119
1120 switch(cm_id_priv->id.state) {
1121 case IB_CM_REQ_RCVD:
1122 rej_msg->local_comm_id = 0;
1123 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1124 break;
1125 case IB_CM_MRA_REQ_SENT:
1126 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1127 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1128 break;
1129 case IB_CM_REP_RCVD:
1130 case IB_CM_MRA_REP_SENT:
1131 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1132 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1133 break;
1134 default:
1135 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1136 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1137 break;
1138 }
1139
1140 rej_msg->reason = reason;
1141 if (ari && ari_length) {
1142 cm_rej_set_reject_info_len(rej_msg, ari_length);
1143 memcpy(rej_msg->ari, ari, ari_length);
1144 }
1145
1146 if (private_data && private_data_len)
1147 memcpy(rej_msg->private_data, private_data, private_data_len);
1148}
1149
1150static void cm_dup_req_handler(struct cm_work *work,
1151 struct cm_id_private *cm_id_priv)
1152{
1153 struct ib_mad_send_buf *msg = NULL;
1154 struct ib_send_wr *bad_send_wr;
1155 unsigned long flags;
1156 int ret;
1157
1158 /* Quick state check to discard duplicate REQs. */
1159 if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1160 return;
1161
1162 ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1163 if (ret)
1164 return;
1165
1166 spin_lock_irqsave(&cm_id_priv->lock, flags);
1167 switch (cm_id_priv->id.state) {
1168 case IB_CM_MRA_REQ_SENT:
1169 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1170 CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1171 cm_id_priv->private_data,
1172 cm_id_priv->private_data_len);
1173 break;
1174 case IB_CM_TIMEWAIT:
1175 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1176 IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1177 break;
1178 default:
1179 goto unlock;
1180 }
1181 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1182
1183 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
1184 &bad_send_wr);
1185 if (ret)
1186 goto free;
1187 return;
1188
1189unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1190free: cm_free_msg(msg);
1191}
1192
1193static struct cm_id_private * cm_match_req(struct cm_work *work,
1194 struct cm_id_private *cm_id_priv)
1195{
1196 struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1197 struct cm_timewait_info *timewait_info;
1198 struct cm_req_msg *req_msg;
1199 unsigned long flags;
1200
1201 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1202
1203 /* Check for duplicate REQ and stale connections. */
1204 spin_lock_irqsave(&cm.lock, flags);
1205 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1206 if (!timewait_info)
1207 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1208
1209 if (timewait_info) {
1210 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1211 timewait_info->work.remote_id);
1212 spin_unlock_irqrestore(&cm.lock, flags);
1213 if (cur_cm_id_priv) {
1214 cm_dup_req_handler(work, cur_cm_id_priv);
1215 cm_deref_id(cur_cm_id_priv);
1216 } else
1217 cm_issue_rej(work->port, work->mad_recv_wc,
1218 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1219 NULL, 0);
1220 goto error;
1221 }
1222
1223 /* Find matching listen request. */
1224 listen_cm_id_priv = cm_find_listen(req_msg->service_id);
1225 if (!listen_cm_id_priv) {
1226 spin_unlock_irqrestore(&cm.lock, flags);
1227 cm_issue_rej(work->port, work->mad_recv_wc,
1228 IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1229 NULL, 0);
1230 goto error;
1231 }
1232 atomic_inc(&listen_cm_id_priv->refcount);
1233 atomic_inc(&cm_id_priv->refcount);
1234 cm_id_priv->id.state = IB_CM_REQ_RCVD;
1235 atomic_inc(&cm_id_priv->work_count);
1236 spin_unlock_irqrestore(&cm.lock, flags);
1237 return listen_cm_id_priv;
1238
1239error: cm_cleanup_timewait(cm_id_priv->timewait_info);
1240 return NULL;
1241}
1242
1243static int cm_req_handler(struct cm_work *work)
1244{
1245 struct ib_cm_id *cm_id;
1246 struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1247 struct cm_req_msg *req_msg;
1248 int ret;
1249
1250 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1251
1252 cm_id = ib_create_cm_id(NULL, NULL);
1253 if (IS_ERR(cm_id))
1254 return PTR_ERR(cm_id);
1255
1256 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1257 cm_id_priv->id.remote_id = req_msg->local_comm_id;
1258 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1259 &cm_id_priv->av);
1260 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1261 id.local_id);
1262 if (IS_ERR(cm_id_priv->timewait_info)) {
1263 ret = PTR_ERR(cm_id_priv->timewait_info);
1264 goto error1;
1265 }
1266 cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1267 cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1268 cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1269
1270 listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1271 if (!listen_cm_id_priv) {
1272 ret = -EINVAL;
1273 goto error2;
1274 }
1275
1276 cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1277 cm_id_priv->id.context = listen_cm_id_priv->id.context;
1278 cm_id_priv->id.service_id = req_msg->service_id;
1279 cm_id_priv->id.service_mask = ~0ULL;
1280
1281 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1282 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1283 if (ret)
1284 goto error3;
1285 if (req_msg->alt_local_lid) {
1286 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1287 if (ret)
1288 goto error3;
1289 }
1290 cm_id_priv->tid = req_msg->hdr.tid;
1291 cm_id_priv->timeout_ms = cm_convert_to_ms(
1292 cm_req_get_local_resp_timeout(req_msg));
1293 cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1294 cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1295 cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1296 cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1297 cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1298 cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1299 cm_id_priv->local_ack_timeout =
1300 cm_req_get_primary_local_ack_timeout(req_msg);
1301 cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1302 cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1303
1304 cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1305 cm_process_work(cm_id_priv, work);
1306 cm_deref_id(listen_cm_id_priv);
1307 return 0;
1308
1309error3: atomic_dec(&cm_id_priv->refcount);
1310 cm_deref_id(listen_cm_id_priv);
1311 cm_cleanup_timewait(cm_id_priv->timewait_info);
1312error2: kfree(cm_id_priv->timewait_info);
1313error1: ib_destroy_cm_id(&cm_id_priv->id);
1314 return ret;
1315}
1316
1317static void cm_format_rep(struct cm_rep_msg *rep_msg,
1318 struct cm_id_private *cm_id_priv,
1319 struct ib_cm_rep_param *param)
1320{
1321 cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1322 rep_msg->local_comm_id = cm_id_priv->id.local_id;
1323 rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1324 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1325 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1326 rep_msg->resp_resources = param->responder_resources;
1327 rep_msg->initiator_depth = param->initiator_depth;
1328 cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay);
1329 cm_rep_set_failover(rep_msg, param->failover_accepted);
1330 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1331 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1332 cm_rep_set_srq(rep_msg, param->srq);
1333 rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
1334
1335 if (param->private_data && param->private_data_len)
1336 memcpy(rep_msg->private_data, param->private_data,
1337 param->private_data_len);
1338}
1339
1340int ib_send_cm_rep(struct ib_cm_id *cm_id,
1341 struct ib_cm_rep_param *param)
1342{
1343 struct cm_id_private *cm_id_priv;
1344 struct ib_mad_send_buf *msg;
1345 struct cm_rep_msg *rep_msg;
1346 struct ib_send_wr *bad_send_wr;
1347 unsigned long flags;
1348 int ret;
1349
1350 if (param->private_data &&
1351 param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1352 return -EINVAL;
1353
1354 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1355 spin_lock_irqsave(&cm_id_priv->lock, flags);
1356 if (cm_id->state != IB_CM_REQ_RCVD &&
1357 cm_id->state != IB_CM_MRA_REQ_SENT) {
1358 ret = -EINVAL;
1359 goto out;
1360 }
1361
1362 ret = cm_alloc_msg(cm_id_priv, &msg);
1363 if (ret)
1364 goto out;
1365
1366 rep_msg = (struct cm_rep_msg *) msg->mad;
1367 cm_format_rep(rep_msg, cm_id_priv, param);
1368 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
1369 msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1370
1371 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
1372 &msg->send_wr, &bad_send_wr);
1373 if (ret) {
1374 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1375 cm_free_msg(msg);
1376 return ret;
1377 }
1378
1379 cm_id->state = IB_CM_REP_SENT;
1380 cm_id_priv->msg = msg;
1381 cm_id_priv->initiator_depth = param->initiator_depth;
1382 cm_id_priv->responder_resources = param->responder_resources;
1383 cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1384 cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1385
1386out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1387 return ret;
1388}
1389EXPORT_SYMBOL(ib_send_cm_rep);
1390
1391static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1392 struct cm_id_private *cm_id_priv,
1393 const void *private_data,
1394 u8 private_data_len)
1395{
1396 cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1397 rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1398 rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1399
1400 if (private_data && private_data_len)
1401 memcpy(rtu_msg->private_data, private_data, private_data_len);
1402}
1403
1404int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1405 const void *private_data,
1406 u8 private_data_len)
1407{
1408 struct cm_id_private *cm_id_priv;
1409 struct ib_mad_send_buf *msg;
1410 struct ib_send_wr *bad_send_wr;
1411 unsigned long flags;
1412 void *data;
1413 int ret;
1414
1415 if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1416 return -EINVAL;
1417
1418 data = cm_copy_private_data(private_data, private_data_len);
1419 if (IS_ERR(data))
1420 return PTR_ERR(data);
1421
1422 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1423 spin_lock_irqsave(&cm_id_priv->lock, flags);
1424 if (cm_id->state != IB_CM_REP_RCVD &&
1425 cm_id->state != IB_CM_MRA_REP_SENT) {
1426 ret = -EINVAL;
1427 goto error;
1428 }
1429
1430 ret = cm_alloc_msg(cm_id_priv, &msg);
1431 if (ret)
1432 goto error;
1433
1434 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1435 private_data, private_data_len);
1436
1437 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
1438 &msg->send_wr, &bad_send_wr);
1439 if (ret) {
1440 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1441 cm_free_msg(msg);
1442 kfree(data);
1443 return ret;
1444 }
1445
1446 cm_id->state = IB_CM_ESTABLISHED;
1447 cm_set_private_data(cm_id_priv, data, private_data_len);
1448 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1449 return 0;
1450
1451error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1452 kfree(data);
1453 return ret;
1454}
1455EXPORT_SYMBOL(ib_send_cm_rtu);
1456
1457static void cm_format_rep_event(struct cm_work *work)
1458{
1459 struct cm_rep_msg *rep_msg;
1460 struct ib_cm_rep_event_param *param;
1461
1462 rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1463 param = &work->cm_event.param.rep_rcvd;
1464 param->remote_ca_guid = rep_msg->local_ca_guid;
1465 param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1466 param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1467 param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1468 param->responder_resources = rep_msg->initiator_depth;
1469 param->initiator_depth = rep_msg->resp_resources;
1470 param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1471 param->failover_accepted = cm_rep_get_failover(rep_msg);
1472 param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1473 param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1474 param->srq = cm_rep_get_srq(rep_msg);
1475 work->cm_event.private_data = &rep_msg->private_data;
1476}
1477
1478static void cm_dup_rep_handler(struct cm_work *work)
1479{
1480 struct cm_id_private *cm_id_priv;
1481 struct cm_rep_msg *rep_msg;
1482 struct ib_mad_send_buf *msg = NULL;
1483 struct ib_send_wr *bad_send_wr;
1484 unsigned long flags;
1485 int ret;
1486
1487 rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1488 cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1489 rep_msg->local_comm_id);
1490 if (!cm_id_priv)
1491 return;
1492
1493 ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1494 if (ret)
1495 goto deref;
1496
1497 spin_lock_irqsave(&cm_id_priv->lock, flags);
1498 if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1499 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1500 cm_id_priv->private_data,
1501 cm_id_priv->private_data_len);
1502 else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1503 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1504 CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1505 cm_id_priv->private_data,
1506 cm_id_priv->private_data_len);
1507 else
1508 goto unlock;
1509 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1510
1511 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
1512 &bad_send_wr);
1513 if (ret)
1514 goto free;
1515 goto deref;
1516
1517unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1518free: cm_free_msg(msg);
1519deref: cm_deref_id(cm_id_priv);
1520}
1521
1522static int cm_rep_handler(struct cm_work *work)
1523{
1524 struct cm_id_private *cm_id_priv;
1525 struct cm_rep_msg *rep_msg;
1526 unsigned long flags;
1527 int ret;
1528
1529 rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1530 cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1531 if (!cm_id_priv) {
1532 cm_dup_rep_handler(work);
1533 return -EINVAL;
1534 }
1535
1536 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1537 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1538 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1539
1540 spin_lock_irqsave(&cm.lock, flags);
1541 /* Check for duplicate REP. */
1542 if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1543 spin_unlock_irqrestore(&cm.lock, flags);
1544 ret = -EINVAL;
1545 goto error;
1546 }
1547 /* Check for a stale connection. */
1548 if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1549 spin_unlock_irqrestore(&cm.lock, flags);
1550 cm_issue_rej(work->port, work->mad_recv_wc,
1551 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1552 NULL, 0);
1553 ret = -EINVAL;
1554 goto error;
1555 }
1556 spin_unlock_irqrestore(&cm.lock, flags);
1557
1558 cm_format_rep_event(work);
1559
1560 spin_lock_irqsave(&cm_id_priv->lock, flags);
1561 switch (cm_id_priv->id.state) {
1562 case IB_CM_REQ_SENT:
1563 case IB_CM_MRA_REQ_RCVD:
1564 break;
1565 default:
1566 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1567 ret = -EINVAL;
1568 goto error;
1569 }
1570 cm_id_priv->id.state = IB_CM_REP_RCVD;
1571 cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1572 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1573 cm_id_priv->initiator_depth = rep_msg->resp_resources;
1574 cm_id_priv->responder_resources = rep_msg->initiator_depth;
1575 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1576 cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1577
1578 /* todo: handle peer_to_peer */
1579
1580 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
1581 (unsigned long) cm_id_priv->msg);
1582 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1583 if (!ret)
1584 list_add_tail(&work->list, &cm_id_priv->work_list);
1585 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1586
1587 if (ret)
1588 cm_process_work(cm_id_priv, work);
1589 else
1590 cm_deref_id(cm_id_priv);
1591 return 0;
1592
1593error: cm_cleanup_timewait(cm_id_priv->timewait_info);
1594 cm_deref_id(cm_id_priv);
1595 return ret;
1596}
1597
1598static int cm_establish_handler(struct cm_work *work)
1599{
1600 struct cm_id_private *cm_id_priv;
1601 unsigned long flags;
1602 int ret;
1603
1604 /* See comment in ib_cm_establish about lookup. */
1605 cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1606 if (!cm_id_priv)
1607 return -EINVAL;
1608
1609 spin_lock_irqsave(&cm_id_priv->lock, flags);
1610 if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1611 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1612 goto out;
1613 }
1614
1615 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
1616 (unsigned long) cm_id_priv->msg);
1617 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1618 if (!ret)
1619 list_add_tail(&work->list, &cm_id_priv->work_list);
1620 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1621
1622 if (ret)
1623 cm_process_work(cm_id_priv, work);
1624 else
1625 cm_deref_id(cm_id_priv);
1626 return 0;
1627out:
1628 cm_deref_id(cm_id_priv);
1629 return -EINVAL;
1630}
1631
1632static int cm_rtu_handler(struct cm_work *work)
1633{
1634 struct cm_id_private *cm_id_priv;
1635 struct cm_rtu_msg *rtu_msg;
1636 unsigned long flags;
1637 int ret;
1638
1639 rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1640 cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1641 rtu_msg->local_comm_id);
1642 if (!cm_id_priv)
1643 return -EINVAL;
1644
1645 work->cm_event.private_data = &rtu_msg->private_data;
1646
1647 spin_lock_irqsave(&cm_id_priv->lock, flags);
1648 if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1649 cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1650 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1651 goto out;
1652 }
1653 cm_id_priv->id.state = IB_CM_ESTABLISHED;
1654
1655 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
1656 (unsigned long) cm_id_priv->msg);
1657 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1658 if (!ret)
1659 list_add_tail(&work->list, &cm_id_priv->work_list);
1660 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1661
1662 if (ret)
1663 cm_process_work(cm_id_priv, work);
1664 else
1665 cm_deref_id(cm_id_priv);
1666 return 0;
1667out:
1668 cm_deref_id(cm_id_priv);
1669 return -EINVAL;
1670}
1671
1672static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1673 struct cm_id_private *cm_id_priv,
1674 const void *private_data,
1675 u8 private_data_len)
1676{
1677 cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1678 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1679 dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1680 dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1681 cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1682
1683 if (private_data && private_data_len)
1684 memcpy(dreq_msg->private_data, private_data, private_data_len);
1685}
1686
1687int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1688 const void *private_data,
1689 u8 private_data_len)
1690{
1691 struct cm_id_private *cm_id_priv;
1692 struct ib_mad_send_buf *msg;
1693 struct ib_send_wr *bad_send_wr;
1694 unsigned long flags;
1695 int ret;
1696
1697 if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
1698 return -EINVAL;
1699
1700 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1701 spin_lock_irqsave(&cm_id_priv->lock, flags);
1702 if (cm_id->state != IB_CM_ESTABLISHED) {
1703 ret = -EINVAL;
1704 goto out;
1705 }
1706
1707 ret = cm_alloc_msg(cm_id_priv, &msg);
1708 if (ret) {
1709 cm_enter_timewait(cm_id_priv);
1710 goto out;
1711 }
1712
1713 cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
1714 private_data, private_data_len);
1715 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
1716 msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
1717
1718 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
1719 &msg->send_wr, &bad_send_wr);
1720 if (ret) {
1721 cm_enter_timewait(cm_id_priv);
1722 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1723 cm_free_msg(msg);
1724 return ret;
1725 }
1726
1727 cm_id->state = IB_CM_DREQ_SENT;
1728 cm_id_priv->msg = msg;
1729out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1730 return ret;
1731}
1732EXPORT_SYMBOL(ib_send_cm_dreq);
1733
1734static void cm_format_drep(struct cm_drep_msg *drep_msg,
1735 struct cm_id_private *cm_id_priv,
1736 const void *private_data,
1737 u8 private_data_len)
1738{
1739 cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
1740 drep_msg->local_comm_id = cm_id_priv->id.local_id;
1741 drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1742
1743 if (private_data && private_data_len)
1744 memcpy(drep_msg->private_data, private_data, private_data_len);
1745}
1746
1747int ib_send_cm_drep(struct ib_cm_id *cm_id,
1748 const void *private_data,
1749 u8 private_data_len)
1750{
1751 struct cm_id_private *cm_id_priv;
1752 struct ib_mad_send_buf *msg;
1753 struct ib_send_wr *bad_send_wr;
1754 unsigned long flags;
1755 void *data;
1756 int ret;
1757
1758 if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
1759 return -EINVAL;
1760
1761 data = cm_copy_private_data(private_data, private_data_len);
1762 if (IS_ERR(data))
1763 return PTR_ERR(data);
1764
1765 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1766 spin_lock_irqsave(&cm_id_priv->lock, flags);
1767 if (cm_id->state != IB_CM_DREQ_RCVD) {
1768 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1769 kfree(data);
1770 return -EINVAL;
1771 }
1772
1773 cm_set_private_data(cm_id_priv, data, private_data_len);
1774 cm_enter_timewait(cm_id_priv);
1775
1776 ret = cm_alloc_msg(cm_id_priv, &msg);
1777 if (ret)
1778 goto out;
1779
1780 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1781 private_data, private_data_len);
1782
1783 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
1784 &bad_send_wr);
1785 if (ret) {
1786 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1787 cm_free_msg(msg);
1788 return ret;
1789 }
1790
1791out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1792 return ret;
1793}
1794EXPORT_SYMBOL(ib_send_cm_drep);
1795
1796static int cm_dreq_handler(struct cm_work *work)
1797{
1798 struct cm_id_private *cm_id_priv;
1799 struct cm_dreq_msg *dreq_msg;
1800 struct ib_mad_send_buf *msg = NULL;
1801 struct ib_send_wr *bad_send_wr;
1802 unsigned long flags;
1803 int ret;
1804
1805 dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
1806 cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
1807 dreq_msg->local_comm_id);
1808 if (!cm_id_priv)
1809 return -EINVAL;
1810
1811 work->cm_event.private_data = &dreq_msg->private_data;
1812
1813 spin_lock_irqsave(&cm_id_priv->lock, flags);
1814 if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
1815 goto unlock;
1816
1817 switch (cm_id_priv->id.state) {
1818 case IB_CM_REP_SENT:
1819 case IB_CM_DREQ_SENT:
1820 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
1821 (unsigned long) cm_id_priv->msg);
1822 break;
1823 case IB_CM_ESTABLISHED:
1824 case IB_CM_MRA_REP_RCVD:
1825 break;
1826 case IB_CM_TIMEWAIT:
1827 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
1828 goto unlock;
1829
1830 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1831 cm_id_priv->private_data,
1832 cm_id_priv->private_data_len);
1833 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1834
1835 if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
1836 &msg->send_wr, &bad_send_wr))
1837 cm_free_msg(msg);
1838 goto deref;
1839 default:
1840 goto unlock;
1841 }
1842 cm_id_priv->id.state = IB_CM_DREQ_RCVD;
1843 cm_id_priv->tid = dreq_msg->hdr.tid;
1844 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1845 if (!ret)
1846 list_add_tail(&work->list, &cm_id_priv->work_list);
1847 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1848
1849 if (ret)
1850 cm_process_work(cm_id_priv, work);
1851 else
1852 cm_deref_id(cm_id_priv);
1853 return 0;
1854
1855unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1856deref: cm_deref_id(cm_id_priv);
1857 return -EINVAL;
1858}
1859
1860static int cm_drep_handler(struct cm_work *work)
1861{
1862 struct cm_id_private *cm_id_priv;
1863 struct cm_drep_msg *drep_msg;
1864 unsigned long flags;
1865 int ret;
1866
1867 drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
1868 cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
1869 drep_msg->local_comm_id);
1870 if (!cm_id_priv)
1871 return -EINVAL;
1872
1873 work->cm_event.private_data = &drep_msg->private_data;
1874
1875 spin_lock_irqsave(&cm_id_priv->lock, flags);
1876 if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
1877 cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
1878 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1879 goto out;
1880 }
1881 cm_enter_timewait(cm_id_priv);
1882
1883 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
1884 (unsigned long) cm_id_priv->msg);
1885 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1886 if (!ret)
1887 list_add_tail(&work->list, &cm_id_priv->work_list);
1888 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1889
1890 if (ret)
1891 cm_process_work(cm_id_priv, work);
1892 else
1893 cm_deref_id(cm_id_priv);
1894 return 0;
1895out:
1896 cm_deref_id(cm_id_priv);
1897 return -EINVAL;
1898}
1899
1900int ib_send_cm_rej(struct ib_cm_id *cm_id,
1901 enum ib_cm_rej_reason reason,
1902 void *ari,
1903 u8 ari_length,
1904 const void *private_data,
1905 u8 private_data_len)
1906{
1907 struct cm_id_private *cm_id_priv;
1908 struct ib_mad_send_buf *msg;
1909 struct ib_send_wr *bad_send_wr;
1910 unsigned long flags;
1911 int ret;
1912
1913 if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
1914 (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
1915 return -EINVAL;
1916
1917 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1918
1919 spin_lock_irqsave(&cm_id_priv->lock, flags);
1920 switch (cm_id->state) {
1921 case IB_CM_REQ_SENT:
1922 case IB_CM_MRA_REQ_RCVD:
1923 case IB_CM_REQ_RCVD:
1924 case IB_CM_MRA_REQ_SENT:
1925 case IB_CM_REP_RCVD:
1926 case IB_CM_MRA_REP_SENT:
1927 ret = cm_alloc_msg(cm_id_priv, &msg);
1928 if (!ret)
1929 cm_format_rej((struct cm_rej_msg *) msg->mad,
1930 cm_id_priv, reason, ari, ari_length,
1931 private_data, private_data_len);
1932
1933 cm_reset_to_idle(cm_id_priv);
1934 break;
1935 case IB_CM_REP_SENT:
1936 case IB_CM_MRA_REP_RCVD:
1937 ret = cm_alloc_msg(cm_id_priv, &msg);
1938 if (!ret)
1939 cm_format_rej((struct cm_rej_msg *) msg->mad,
1940 cm_id_priv, reason, ari, ari_length,
1941 private_data, private_data_len);
1942
1943 cm_enter_timewait(cm_id_priv);
1944 break;
1945 default:
1946 ret = -EINVAL;
1947 goto out;
1948 }
1949
1950 if (ret)
1951 goto out;
1952
1953 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
1954 &msg->send_wr, &bad_send_wr);
1955 if (ret)
1956 cm_free_msg(msg);
1957
1958out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1959 return ret;
1960}
1961EXPORT_SYMBOL(ib_send_cm_rej);
1962
1963static void cm_format_rej_event(struct cm_work *work)
1964{
1965 struct cm_rej_msg *rej_msg;
1966 struct ib_cm_rej_event_param *param;
1967
1968 rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
1969 param = &work->cm_event.param.rej_rcvd;
1970 param->ari = rej_msg->ari;
1971 param->ari_length = cm_rej_get_reject_info_len(rej_msg);
1972 param->reason = rej_msg->reason;
1973 work->cm_event.private_data = &rej_msg->private_data;
1974}
1975
1976static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
1977{
1978 struct cm_timewait_info *timewait_info;
1979 struct cm_id_private *cm_id_priv;
1980 unsigned long flags;
1981 u32 remote_id;
1982
1983 remote_id = rej_msg->local_comm_id;
1984
1985 if (rej_msg->reason == IB_CM_REJ_TIMEOUT) {
1986 spin_lock_irqsave(&cm.lock, flags);
1987 timewait_info = cm_find_remote_id( *((u64 *) rej_msg->ari),
1988 remote_id);
1989 if (!timewait_info) {
1990 spin_unlock_irqrestore(&cm.lock, flags);
1991 return NULL;
1992 }
1993 cm_id_priv = idr_find(&cm.local_id_table,
1994 (int) timewait_info->work.local_id);
1995 if (cm_id_priv) {
1996 if (cm_id_priv->id.remote_id == remote_id)
1997 atomic_inc(&cm_id_priv->refcount);
1998 else
1999 cm_id_priv = NULL;
2000 }
2001 spin_unlock_irqrestore(&cm.lock, flags);
2002 } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2003 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2004 else
2005 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2006
2007 return cm_id_priv;
2008}
2009
2010static int cm_rej_handler(struct cm_work *work)
2011{
2012 struct cm_id_private *cm_id_priv;
2013 struct cm_rej_msg *rej_msg;
2014 unsigned long flags;
2015 int ret;
2016
2017 rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2018 cm_id_priv = cm_acquire_rejected_id(rej_msg);
2019 if (!cm_id_priv)
2020 return -EINVAL;
2021
2022 cm_format_rej_event(work);
2023
2024 spin_lock_irqsave(&cm_id_priv->lock, flags);
2025 switch (cm_id_priv->id.state) {
2026 case IB_CM_REQ_SENT:
2027 case IB_CM_MRA_REQ_RCVD:
2028 case IB_CM_REP_SENT:
2029 case IB_CM_MRA_REP_RCVD:
2030 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2031 (unsigned long) cm_id_priv->msg);
2032 /* fall through */
2033 case IB_CM_REQ_RCVD:
2034 case IB_CM_MRA_REQ_SENT:
2035 if (rej_msg->reason == IB_CM_REJ_STALE_CONN)
2036 cm_enter_timewait(cm_id_priv);
2037 else
2038 cm_reset_to_idle(cm_id_priv);
2039 break;
2040 case IB_CM_DREQ_SENT:
2041 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2042 (unsigned long) cm_id_priv->msg);
2043 /* fall through */
2044 case IB_CM_REP_RCVD:
2045 case IB_CM_MRA_REP_SENT:
2046 case IB_CM_ESTABLISHED:
2047 cm_enter_timewait(cm_id_priv);
2048 break;
2049 default:
2050 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2051 ret = -EINVAL;
2052 goto out;
2053 }
2054
2055 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2056 if (!ret)
2057 list_add_tail(&work->list, &cm_id_priv->work_list);
2058 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2059
2060 if (ret)
2061 cm_process_work(cm_id_priv, work);
2062 else
2063 cm_deref_id(cm_id_priv);
2064 return 0;
2065out:
2066 cm_deref_id(cm_id_priv);
2067 return -EINVAL;
2068}
2069
2070int ib_send_cm_mra(struct ib_cm_id *cm_id,
2071 u8 service_timeout,
2072 const void *private_data,
2073 u8 private_data_len)
2074{
2075 struct cm_id_private *cm_id_priv;
2076 struct ib_mad_send_buf *msg;
2077 struct ib_send_wr *bad_send_wr;
2078 void *data;
2079 unsigned long flags;
2080 int ret;
2081
2082 if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2083 return -EINVAL;
2084
2085 data = cm_copy_private_data(private_data, private_data_len);
2086 if (IS_ERR(data))
2087 return PTR_ERR(data);
2088
2089 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2090
2091 spin_lock_irqsave(&cm_id_priv->lock, flags);
2092 switch(cm_id_priv->id.state) {
2093 case IB_CM_REQ_RCVD:
2094 ret = cm_alloc_msg(cm_id_priv, &msg);
2095 if (ret)
2096 goto error1;
2097
2098 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2099 CM_MSG_RESPONSE_REQ, service_timeout,
2100 private_data, private_data_len);
2101 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2102 &msg->send_wr, &bad_send_wr);
2103 if (ret)
2104 goto error2;
2105 cm_id->state = IB_CM_MRA_REQ_SENT;
2106 break;
2107 case IB_CM_REP_RCVD:
2108 ret = cm_alloc_msg(cm_id_priv, &msg);
2109 if (ret)
2110 goto error1;
2111
2112 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2113 CM_MSG_RESPONSE_REP, service_timeout,
2114 private_data, private_data_len);
2115 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2116 &msg->send_wr, &bad_send_wr);
2117 if (ret)
2118 goto error2;
2119 cm_id->state = IB_CM_MRA_REP_SENT;
2120 break;
2121 case IB_CM_ESTABLISHED:
2122 ret = cm_alloc_msg(cm_id_priv, &msg);
2123 if (ret)
2124 goto error1;
2125
2126 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2127 CM_MSG_RESPONSE_OTHER, service_timeout,
2128 private_data, private_data_len);
2129 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2130 &msg->send_wr, &bad_send_wr);
2131 if (ret)
2132 goto error2;
2133 cm_id->lap_state = IB_CM_MRA_LAP_SENT;
2134 break;
2135 default:
2136 ret = -EINVAL;
2137 goto error1;
2138 }
2139 cm_id_priv->service_timeout = service_timeout;
2140 cm_set_private_data(cm_id_priv, data, private_data_len);
2141 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2142 return 0;
2143
2144error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2145 kfree(data);
2146 return ret;
2147
2148error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2149 kfree(data);
2150 cm_free_msg(msg);
2151 return ret;
2152}
2153EXPORT_SYMBOL(ib_send_cm_mra);
2154
2155static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2156{
2157 switch (cm_mra_get_msg_mraed(mra_msg)) {
2158 case CM_MSG_RESPONSE_REQ:
2159 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2160 case CM_MSG_RESPONSE_REP:
2161 case CM_MSG_RESPONSE_OTHER:
2162 return cm_acquire_id(mra_msg->remote_comm_id,
2163 mra_msg->local_comm_id);
2164 default:
2165 return NULL;
2166 }
2167}
2168
2169static int cm_mra_handler(struct cm_work *work)
2170{
2171 struct cm_id_private *cm_id_priv;
2172 struct cm_mra_msg *mra_msg;
2173 unsigned long flags;
2174 int timeout, ret;
2175
2176 mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2177 cm_id_priv = cm_acquire_mraed_id(mra_msg);
2178 if (!cm_id_priv)
2179 return -EINVAL;
2180
2181 work->cm_event.private_data = &mra_msg->private_data;
2182 work->cm_event.param.mra_rcvd.service_timeout =
2183 cm_mra_get_service_timeout(mra_msg);
2184 timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2185 cm_convert_to_ms(cm_id_priv->av.packet_life_time);
2186
2187 spin_lock_irqsave(&cm_id_priv->lock, flags);
2188 switch (cm_id_priv->id.state) {
2189 case IB_CM_REQ_SENT:
2190 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2191 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2192 (unsigned long) cm_id_priv->msg, timeout))
2193 goto out;
2194 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2195 break;
2196 case IB_CM_REP_SENT:
2197 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2198 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2199 (unsigned long) cm_id_priv->msg, timeout))
2200 goto out;
2201 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2202 break;
2203 case IB_CM_ESTABLISHED:
2204 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2205 cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2206 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2207 (unsigned long) cm_id_priv->msg, timeout))
2208 goto out;
2209 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2210 break;
2211 default:
2212 goto out;
2213 }
2214
2215 cm_id_priv->msg->context[1] = (void *) (unsigned long)
2216 cm_id_priv->id.state;
2217 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2218 if (!ret)
2219 list_add_tail(&work->list, &cm_id_priv->work_list);
2220 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2221
2222 if (ret)
2223 cm_process_work(cm_id_priv, work);
2224 else
2225 cm_deref_id(cm_id_priv);
2226 return 0;
2227out:
2228 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2229 cm_deref_id(cm_id_priv);
2230 return -EINVAL;
2231}
2232
2233static void cm_format_lap(struct cm_lap_msg *lap_msg,
2234 struct cm_id_private *cm_id_priv,
2235 struct ib_sa_path_rec *alternate_path,
2236 const void *private_data,
2237 u8 private_data_len)
2238{
2239 cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2240 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2241 lap_msg->local_comm_id = cm_id_priv->id.local_id;
2242 lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2243 cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2244 /* todo: need remote CM response timeout */
2245 cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2246 lap_msg->alt_local_lid = alternate_path->slid;
2247 lap_msg->alt_remote_lid = alternate_path->dlid;
2248 lap_msg->alt_local_gid = alternate_path->sgid;
2249 lap_msg->alt_remote_gid = alternate_path->dgid;
2250 cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2251 cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2252 lap_msg->alt_hop_limit = alternate_path->hop_limit;
2253 cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2254 cm_lap_set_sl(lap_msg, alternate_path->sl);
2255 cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2256 cm_lap_set_local_ack_timeout(lap_msg,
2257 min(31, alternate_path->packet_life_time + 1));
2258
2259 if (private_data && private_data_len)
2260 memcpy(lap_msg->private_data, private_data, private_data_len);
2261}
2262
2263int ib_send_cm_lap(struct ib_cm_id *cm_id,
2264 struct ib_sa_path_rec *alternate_path,
2265 const void *private_data,
2266 u8 private_data_len)
2267{
2268 struct cm_id_private *cm_id_priv;
2269 struct ib_mad_send_buf *msg;
2270 struct ib_send_wr *bad_send_wr;
2271 unsigned long flags;
2272 int ret;
2273
2274 if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2275 return -EINVAL;
2276
2277 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2278 spin_lock_irqsave(&cm_id_priv->lock, flags);
2279 if (cm_id->state != IB_CM_ESTABLISHED ||
2280 cm_id->lap_state != IB_CM_LAP_IDLE) {
2281 ret = -EINVAL;
2282 goto out;
2283 }
2284
2285 ret = cm_alloc_msg(cm_id_priv, &msg);
2286 if (ret)
2287 goto out;
2288
2289 cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2290 alternate_path, private_data, private_data_len);
2291 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
2292 msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2293
2294 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2295 &msg->send_wr, &bad_send_wr);
2296 if (ret) {
2297 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2298 cm_free_msg(msg);
2299 return ret;
2300 }
2301
2302 cm_id->lap_state = IB_CM_LAP_SENT;
2303 cm_id_priv->msg = msg;
2304
2305out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2306 return ret;
2307}
2308EXPORT_SYMBOL(ib_send_cm_lap);
2309
2310static void cm_format_path_from_lap(struct ib_sa_path_rec *path,
2311 struct cm_lap_msg *lap_msg)
2312{
2313 memset(path, 0, sizeof *path);
2314 path->dgid = lap_msg->alt_local_gid;
2315 path->sgid = lap_msg->alt_remote_gid;
2316 path->dlid = lap_msg->alt_local_lid;
2317 path->slid = lap_msg->alt_remote_lid;
2318 path->flow_label = cm_lap_get_flow_label(lap_msg);
2319 path->hop_limit = lap_msg->alt_hop_limit;
2320 path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2321 path->reversible = 1;
2322 /* pkey is same as in REQ */
2323 path->sl = cm_lap_get_sl(lap_msg);
2324 path->mtu_selector = IB_SA_EQ;
2325 /* mtu is same as in REQ */
2326 path->rate_selector = IB_SA_EQ;
2327 path->rate = cm_lap_get_packet_rate(lap_msg);
2328 path->packet_life_time_selector = IB_SA_EQ;
2329 path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2330 path->packet_life_time -= (path->packet_life_time > 0);
2331}
2332
2333static int cm_lap_handler(struct cm_work *work)
2334{
2335 struct cm_id_private *cm_id_priv;
2336 struct cm_lap_msg *lap_msg;
2337 struct ib_cm_lap_event_param *param;
2338 struct ib_mad_send_buf *msg = NULL;
2339 struct ib_send_wr *bad_send_wr;
2340 unsigned long flags;
2341 int ret;
2342
2343 /* todo: verify LAP request and send reject APR if invalid. */
2344 lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2345 cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2346 lap_msg->local_comm_id);
2347 if (!cm_id_priv)
2348 return -EINVAL;
2349
2350 param = &work->cm_event.param.lap_rcvd;
2351 param->alternate_path = &work->path[0];
2352 cm_format_path_from_lap(param->alternate_path, lap_msg);
2353 work->cm_event.private_data = &lap_msg->private_data;
2354
2355 spin_lock_irqsave(&cm_id_priv->lock, flags);
2356 if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2357 goto unlock;
2358
2359 switch (cm_id_priv->id.lap_state) {
2360 case IB_CM_LAP_IDLE:
2361 break;
2362 case IB_CM_MRA_LAP_SENT:
2363 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2364 goto unlock;
2365
2366 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2367 CM_MSG_RESPONSE_OTHER,
2368 cm_id_priv->service_timeout,
2369 cm_id_priv->private_data,
2370 cm_id_priv->private_data_len);
2371 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2372
2373 if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2374 &msg->send_wr, &bad_send_wr))
2375 cm_free_msg(msg);
2376 goto deref;
2377 default:
2378 goto unlock;
2379 }
2380
2381 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2382 cm_id_priv->tid = lap_msg->hdr.tid;
2383 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2384 if (!ret)
2385 list_add_tail(&work->list, &cm_id_priv->work_list);
2386 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2387
2388 if (ret)
2389 cm_process_work(cm_id_priv, work);
2390 else
2391 cm_deref_id(cm_id_priv);
2392 return 0;
2393
2394unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2395deref: cm_deref_id(cm_id_priv);
2396 return -EINVAL;
2397}
2398
2399static void cm_format_apr(struct cm_apr_msg *apr_msg,
2400 struct cm_id_private *cm_id_priv,
2401 enum ib_cm_apr_status status,
2402 void *info,
2403 u8 info_length,
2404 const void *private_data,
2405 u8 private_data_len)
2406{
2407 cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2408 apr_msg->local_comm_id = cm_id_priv->id.local_id;
2409 apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2410 apr_msg->ap_status = (u8) status;
2411
2412 if (info && info_length) {
2413 apr_msg->info_length = info_length;
2414 memcpy(apr_msg->info, info, info_length);
2415 }
2416
2417 if (private_data && private_data_len)
2418 memcpy(apr_msg->private_data, private_data, private_data_len);
2419}
2420
2421int ib_send_cm_apr(struct ib_cm_id *cm_id,
2422 enum ib_cm_apr_status status,
2423 void *info,
2424 u8 info_length,
2425 const void *private_data,
2426 u8 private_data_len)
2427{
2428 struct cm_id_private *cm_id_priv;
2429 struct ib_mad_send_buf *msg;
2430 struct ib_send_wr *bad_send_wr;
2431 unsigned long flags;
2432 int ret;
2433
2434 if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2435 (info && info_length > IB_CM_APR_INFO_LENGTH))
2436 return -EINVAL;
2437
2438 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2439 spin_lock_irqsave(&cm_id_priv->lock, flags);
2440 if (cm_id->state != IB_CM_ESTABLISHED ||
2441 (cm_id->lap_state != IB_CM_LAP_RCVD &&
2442 cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2443 ret = -EINVAL;
2444 goto out;
2445 }
2446
2447 ret = cm_alloc_msg(cm_id_priv, &msg);
2448 if (ret)
2449 goto out;
2450
2451 cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2452 info, info_length, private_data, private_data_len);
2453 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2454 &msg->send_wr, &bad_send_wr);
2455 if (ret) {
2456 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2457 cm_free_msg(msg);
2458 return ret;
2459 }
2460
2461 cm_id->lap_state = IB_CM_LAP_IDLE;
2462out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2463 return ret;
2464}
2465EXPORT_SYMBOL(ib_send_cm_apr);
2466
2467static int cm_apr_handler(struct cm_work *work)
2468{
2469 struct cm_id_private *cm_id_priv;
2470 struct cm_apr_msg *apr_msg;
2471 unsigned long flags;
2472 int ret;
2473
2474 apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2475 cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2476 apr_msg->local_comm_id);
2477 if (!cm_id_priv)
2478 return -EINVAL; /* Unmatched reply. */
2479
2480 work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2481 work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2482 work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2483 work->cm_event.private_data = &apr_msg->private_data;
2484
2485 spin_lock_irqsave(&cm_id_priv->lock, flags);
2486 if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2487 (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2488 cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2489 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2490 goto out;
2491 }
2492 cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2493 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2494 (unsigned long) cm_id_priv->msg);
2495 cm_id_priv->msg = NULL;
2496
2497 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2498 if (!ret)
2499 list_add_tail(&work->list, &cm_id_priv->work_list);
2500 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2501
2502 if (ret)
2503 cm_process_work(cm_id_priv, work);
2504 else
2505 cm_deref_id(cm_id_priv);
2506 return 0;
2507out:
2508 cm_deref_id(cm_id_priv);
2509 return -EINVAL;
2510}
2511
2512static int cm_timewait_handler(struct cm_work *work)
2513{
2514 struct cm_timewait_info *timewait_info;
2515 struct cm_id_private *cm_id_priv;
2516 unsigned long flags;
2517 int ret;
2518
2519 timewait_info = (struct cm_timewait_info *)work;
2520 cm_cleanup_timewait(timewait_info);
2521
2522 cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2523 timewait_info->work.remote_id);
2524 if (!cm_id_priv)
2525 return -EINVAL;
2526
2527 spin_lock_irqsave(&cm_id_priv->lock, flags);
2528 if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2529 cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2530 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2531 goto out;
2532 }
2533 cm_id_priv->id.state = IB_CM_IDLE;
2534 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2535 if (!ret)
2536 list_add_tail(&work->list, &cm_id_priv->work_list);
2537 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2538
2539 if (ret)
2540 cm_process_work(cm_id_priv, work);
2541 else
2542 cm_deref_id(cm_id_priv);
2543 return 0;
2544out:
2545 cm_deref_id(cm_id_priv);
2546 return -EINVAL;
2547}
2548
2549static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2550 struct cm_id_private *cm_id_priv,
2551 struct ib_cm_sidr_req_param *param)
2552{
2553 cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2554 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2555 sidr_req_msg->request_id = cm_id_priv->id.local_id;
2556 sidr_req_msg->pkey = param->pkey;
2557 sidr_req_msg->service_id = param->service_id;
2558
2559 if (param->private_data && param->private_data_len)
2560 memcpy(sidr_req_msg->private_data, param->private_data,
2561 param->private_data_len);
2562}
2563
2564int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2565 struct ib_cm_sidr_req_param *param)
2566{
2567 struct cm_id_private *cm_id_priv;
2568 struct ib_mad_send_buf *msg;
2569 struct ib_send_wr *bad_send_wr;
2570 unsigned long flags;
2571 int ret;
2572
2573 if (!param->path || (param->private_data &&
2574 param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2575 return -EINVAL;
2576
2577 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2578 ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2579 if (ret)
2580 goto out;
2581
2582 cm_id->service_id = param->service_id;
2583 cm_id->service_mask = ~0ULL;
2584 cm_id_priv->timeout_ms = param->timeout_ms;
2585 cm_id_priv->max_cm_retries = param->max_cm_retries;
2586 ret = cm_alloc_msg(cm_id_priv, &msg);
2587 if (ret)
2588 goto out;
2589
2590 cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2591 param);
2592 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
2593 msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2594
2595 spin_lock_irqsave(&cm_id_priv->lock, flags);
2596 if (cm_id->state == IB_CM_IDLE)
2597 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2598 &msg->send_wr, &bad_send_wr);
2599 else
2600 ret = -EINVAL;
2601
2602 if (ret) {
2603 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2604 cm_free_msg(msg);
2605 goto out;
2606 }
2607 cm_id->state = IB_CM_SIDR_REQ_SENT;
2608 cm_id_priv->msg = msg;
2609 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2610out:
2611 return ret;
2612}
2613EXPORT_SYMBOL(ib_send_cm_sidr_req);
2614
2615static void cm_format_sidr_req_event(struct cm_work *work,
2616 struct ib_cm_id *listen_id)
2617{
2618 struct cm_sidr_req_msg *sidr_req_msg;
2619 struct ib_cm_sidr_req_event_param *param;
2620
2621 sidr_req_msg = (struct cm_sidr_req_msg *)
2622 work->mad_recv_wc->recv_buf.mad;
2623 param = &work->cm_event.param.sidr_req_rcvd;
2624 param->pkey = sidr_req_msg->pkey;
2625 param->listen_id = listen_id;
2626 param->device = work->port->mad_agent->device;
2627 param->port = work->port->port_num;
2628 work->cm_event.private_data = &sidr_req_msg->private_data;
2629}
2630
2631static int cm_sidr_req_handler(struct cm_work *work)
2632{
2633 struct ib_cm_id *cm_id;
2634 struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2635 struct cm_sidr_req_msg *sidr_req_msg;
2636 struct ib_wc *wc;
2637 unsigned long flags;
2638
2639 cm_id = ib_create_cm_id(NULL, NULL);
2640 if (IS_ERR(cm_id))
2641 return PTR_ERR(cm_id);
2642 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2643
2644 /* Record SGID/SLID and request ID for lookup. */
2645 sidr_req_msg = (struct cm_sidr_req_msg *)
2646 work->mad_recv_wc->recv_buf.mad;
2647 wc = work->mad_recv_wc->wc;
2648 cm_id_priv->av.dgid.global.subnet_prefix = wc->slid;
2649 cm_id_priv->av.dgid.global.interface_id = 0;
2650 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2651 &cm_id_priv->av);
2652 cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2653 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2654 cm_id_priv->tid = sidr_req_msg->hdr.tid;
2655 atomic_inc(&cm_id_priv->work_count);
2656
2657 spin_lock_irqsave(&cm.lock, flags);
2658 cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2659 if (cur_cm_id_priv) {
2660 spin_unlock_irqrestore(&cm.lock, flags);
2661 goto out; /* Duplicate message. */
2662 }
2663 cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
2664 if (!cur_cm_id_priv) {
2665 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2666 spin_unlock_irqrestore(&cm.lock, flags);
2667 /* todo: reply with no match */
2668 goto out; /* No match. */
2669 }
2670 atomic_inc(&cur_cm_id_priv->refcount);
2671 spin_unlock_irqrestore(&cm.lock, flags);
2672
2673 cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
2674 cm_id_priv->id.context = cur_cm_id_priv->id.context;
2675 cm_id_priv->id.service_id = sidr_req_msg->service_id;
2676 cm_id_priv->id.service_mask = ~0ULL;
2677
2678 cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
2679 cm_process_work(cm_id_priv, work);
2680 cm_deref_id(cur_cm_id_priv);
2681 return 0;
2682out:
2683 ib_destroy_cm_id(&cm_id_priv->id);
2684 return -EINVAL;
2685}
2686
2687static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
2688 struct cm_id_private *cm_id_priv,
2689 struct ib_cm_sidr_rep_param *param)
2690{
2691 cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
2692 cm_id_priv->tid);
2693 sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
2694 sidr_rep_msg->status = param->status;
2695 cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
2696 sidr_rep_msg->service_id = cm_id_priv->id.service_id;
2697 sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
2698
2699 if (param->info && param->info_length)
2700 memcpy(sidr_rep_msg->info, param->info, param->info_length);
2701
2702 if (param->private_data && param->private_data_len)
2703 memcpy(sidr_rep_msg->private_data, param->private_data,
2704 param->private_data_len);
2705}
2706
2707int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
2708 struct ib_cm_sidr_rep_param *param)
2709{
2710 struct cm_id_private *cm_id_priv;
2711 struct ib_mad_send_buf *msg;
2712 struct ib_send_wr *bad_send_wr;
2713 unsigned long flags;
2714 int ret;
2715
2716 if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
2717 (param->private_data &&
2718 param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
2719 return -EINVAL;
2720
2721 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2722 spin_lock_irqsave(&cm_id_priv->lock, flags);
2723 if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
2724 ret = -EINVAL;
2725 goto error;
2726 }
2727
2728 ret = cm_alloc_msg(cm_id_priv, &msg);
2729 if (ret)
2730 goto error;
2731
2732 cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
2733 param);
2734 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
2735 &msg->send_wr, &bad_send_wr);
2736 if (ret) {
2737 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2738 cm_free_msg(msg);
2739 return ret;
2740 }
2741 cm_id->state = IB_CM_IDLE;
2742 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2743
2744 spin_lock_irqsave(&cm.lock, flags);
2745 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2746 spin_unlock_irqrestore(&cm.lock, flags);
2747 return 0;
2748
2749error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2750 return ret;
2751}
2752EXPORT_SYMBOL(ib_send_cm_sidr_rep);
2753
2754static void cm_format_sidr_rep_event(struct cm_work *work)
2755{
2756 struct cm_sidr_rep_msg *sidr_rep_msg;
2757 struct ib_cm_sidr_rep_event_param *param;
2758
2759 sidr_rep_msg = (struct cm_sidr_rep_msg *)
2760 work->mad_recv_wc->recv_buf.mad;
2761 param = &work->cm_event.param.sidr_rep_rcvd;
2762 param->status = sidr_rep_msg->status;
2763 param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
2764 param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
2765 param->info = &sidr_rep_msg->info;
2766 param->info_len = sidr_rep_msg->info_length;
2767 work->cm_event.private_data = &sidr_rep_msg->private_data;
2768}
2769
2770static int cm_sidr_rep_handler(struct cm_work *work)
2771{
2772 struct cm_sidr_rep_msg *sidr_rep_msg;
2773 struct cm_id_private *cm_id_priv;
2774 unsigned long flags;
2775
2776 sidr_rep_msg = (struct cm_sidr_rep_msg *)
2777 work->mad_recv_wc->recv_buf.mad;
2778 cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
2779 if (!cm_id_priv)
2780 return -EINVAL; /* Unmatched reply. */
2781
2782 spin_lock_irqsave(&cm_id_priv->lock, flags);
2783 if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
2784 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2785 goto out;
2786 }
2787 cm_id_priv->id.state = IB_CM_IDLE;
2788 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2789 (unsigned long) cm_id_priv->msg);
2790 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2791
2792 cm_format_sidr_rep_event(work);
2793 cm_process_work(cm_id_priv, work);
2794 return 0;
2795out:
2796 cm_deref_id(cm_id_priv);
2797 return -EINVAL;
2798}
2799
2800static void cm_process_send_error(struct ib_mad_send_buf *msg,
2801 enum ib_wc_status wc_status)
2802{
2803 struct cm_id_private *cm_id_priv;
2804 struct ib_cm_event cm_event;
2805 enum ib_cm_state state;
2806 unsigned long flags;
2807 int ret;
2808
2809 memset(&cm_event, 0, sizeof cm_event);
2810 cm_id_priv = msg->context[0];
2811
2812 /* Discard old sends or ones without a response. */
2813 spin_lock_irqsave(&cm_id_priv->lock, flags);
2814 state = (enum ib_cm_state) (unsigned long) msg->context[1];
2815 if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
2816 goto discard;
2817
2818 switch (state) {
2819 case IB_CM_REQ_SENT:
2820 case IB_CM_MRA_REQ_RCVD:
2821 cm_reset_to_idle(cm_id_priv);
2822 cm_event.event = IB_CM_REQ_ERROR;
2823 break;
2824 case IB_CM_REP_SENT:
2825 case IB_CM_MRA_REP_RCVD:
2826 cm_reset_to_idle(cm_id_priv);
2827 cm_event.event = IB_CM_REP_ERROR;
2828 break;
2829 case IB_CM_DREQ_SENT:
2830 cm_enter_timewait(cm_id_priv);
2831 cm_event.event = IB_CM_DREQ_ERROR;
2832 break;
2833 case IB_CM_SIDR_REQ_SENT:
2834 cm_id_priv->id.state = IB_CM_IDLE;
2835 cm_event.event = IB_CM_SIDR_REQ_ERROR;
2836 break;
2837 default:
2838 goto discard;
2839 }
2840 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2841 cm_event.param.send_status = wc_status;
2842
2843 /* No other events can occur on the cm_id at this point. */
2844 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
2845 cm_free_msg(msg);
2846 if (ret)
2847 ib_destroy_cm_id(&cm_id_priv->id);
2848 return;
2849discard:
2850 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2851 cm_free_msg(msg);
2852}
2853
2854static void cm_send_handler(struct ib_mad_agent *mad_agent,
2855 struct ib_mad_send_wc *mad_send_wc)
2856{
2857 struct ib_mad_send_buf *msg;
2858
2859 msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
2860
2861 switch (mad_send_wc->status) {
2862 case IB_WC_SUCCESS:
2863 case IB_WC_WR_FLUSH_ERR:
2864 cm_free_msg(msg);
2865 break;
2866 default:
2867 if (msg->context[0] && msg->context[1])
2868 cm_process_send_error(msg, mad_send_wc->status);
2869 else
2870 cm_free_msg(msg);
2871 break;
2872 }
2873}
2874
2875static void cm_work_handler(void *data)
2876{
2877 struct cm_work *work = data;
2878 int ret;
2879
2880 switch (work->cm_event.event) {
2881 case IB_CM_REQ_RECEIVED:
2882 ret = cm_req_handler(work);
2883 break;
2884 case IB_CM_MRA_RECEIVED:
2885 ret = cm_mra_handler(work);
2886 break;
2887 case IB_CM_REJ_RECEIVED:
2888 ret = cm_rej_handler(work);
2889 break;
2890 case IB_CM_REP_RECEIVED:
2891 ret = cm_rep_handler(work);
2892 break;
2893 case IB_CM_RTU_RECEIVED:
2894 ret = cm_rtu_handler(work);
2895 break;
2896 case IB_CM_USER_ESTABLISHED:
2897 ret = cm_establish_handler(work);
2898 break;
2899 case IB_CM_DREQ_RECEIVED:
2900 ret = cm_dreq_handler(work);
2901 break;
2902 case IB_CM_DREP_RECEIVED:
2903 ret = cm_drep_handler(work);
2904 break;
2905 case IB_CM_SIDR_REQ_RECEIVED:
2906 ret = cm_sidr_req_handler(work);
2907 break;
2908 case IB_CM_SIDR_REP_RECEIVED:
2909 ret = cm_sidr_rep_handler(work);
2910 break;
2911 case IB_CM_LAP_RECEIVED:
2912 ret = cm_lap_handler(work);
2913 break;
2914 case IB_CM_APR_RECEIVED:
2915 ret = cm_apr_handler(work);
2916 break;
2917 case IB_CM_TIMEWAIT_EXIT:
2918 ret = cm_timewait_handler(work);
2919 break;
2920 default:
2921 ret = -EINVAL;
2922 break;
2923 }
2924 if (ret)
2925 cm_free_work(work);
2926}
2927
2928int ib_cm_establish(struct ib_cm_id *cm_id)
2929{
2930 struct cm_id_private *cm_id_priv;
2931 struct cm_work *work;
2932 unsigned long flags;
2933 int ret = 0;
2934
2935 work = kmalloc(sizeof *work, GFP_ATOMIC);
2936 if (!work)
2937 return -ENOMEM;
2938
2939 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2940 spin_lock_irqsave(&cm_id_priv->lock, flags);
2941 switch (cm_id->state)
2942 {
2943 case IB_CM_REP_SENT:
2944 case IB_CM_MRA_REP_RCVD:
2945 cm_id->state = IB_CM_ESTABLISHED;
2946 break;
2947 case IB_CM_ESTABLISHED:
2948 ret = -EISCONN;
2949 break;
2950 default:
2951 ret = -EINVAL;
2952 break;
2953 }
2954 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2955
2956 if (ret) {
2957 kfree(work);
2958 goto out;
2959 }
2960
2961 /*
2962 * The CM worker thread may try to destroy the cm_id before it
2963 * can execute this work item. To prevent potential deadlock,
2964 * we need to find the cm_id once we're in the context of the
2965 * worker thread, rather than holding a reference on it.
2966 */
2967 INIT_WORK(&work->work, cm_work_handler, work);
2968 work->local_id = cm_id->local_id;
2969 work->remote_id = cm_id->remote_id;
2970 work->mad_recv_wc = NULL;
2971 work->cm_event.event = IB_CM_USER_ESTABLISHED;
2972 queue_work(cm.wq, &work->work);
2973out:
2974 return ret;
2975}
2976EXPORT_SYMBOL(ib_cm_establish);
2977
2978static void cm_recv_handler(struct ib_mad_agent *mad_agent,
2979 struct ib_mad_recv_wc *mad_recv_wc)
2980{
2981 struct cm_work *work;
2982 enum ib_cm_event_type event;
2983 int paths = 0;
2984
2985 switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
2986 case CM_REQ_ATTR_ID:
2987 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
2988 alt_local_lid != 0);
2989 event = IB_CM_REQ_RECEIVED;
2990 break;
2991 case CM_MRA_ATTR_ID:
2992 event = IB_CM_MRA_RECEIVED;
2993 break;
2994 case CM_REJ_ATTR_ID:
2995 event = IB_CM_REJ_RECEIVED;
2996 break;
2997 case CM_REP_ATTR_ID:
2998 event = IB_CM_REP_RECEIVED;
2999 break;
3000 case CM_RTU_ATTR_ID:
3001 event = IB_CM_RTU_RECEIVED;
3002 break;
3003 case CM_DREQ_ATTR_ID:
3004 event = IB_CM_DREQ_RECEIVED;
3005 break;
3006 case CM_DREP_ATTR_ID:
3007 event = IB_CM_DREP_RECEIVED;
3008 break;
3009 case CM_SIDR_REQ_ATTR_ID:
3010 event = IB_CM_SIDR_REQ_RECEIVED;
3011 break;
3012 case CM_SIDR_REP_ATTR_ID:
3013 event = IB_CM_SIDR_REP_RECEIVED;
3014 break;
3015 case CM_LAP_ATTR_ID:
3016 paths = 1;
3017 event = IB_CM_LAP_RECEIVED;
3018 break;
3019 case CM_APR_ATTR_ID:
3020 event = IB_CM_APR_RECEIVED;
3021 break;
3022 default:
3023 ib_free_recv_mad(mad_recv_wc);
3024 return;
3025 }
3026
3027 work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3028 GFP_KERNEL);
3029 if (!work) {
3030 ib_free_recv_mad(mad_recv_wc);
3031 return;
3032 }
3033
3034 INIT_WORK(&work->work, cm_work_handler, work);
3035 work->cm_event.event = event;
3036 work->mad_recv_wc = mad_recv_wc;
3037 work->port = (struct cm_port *)mad_agent->context;
3038 queue_work(cm.wq, &work->work);
3039}
3040
3041static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3042 struct ib_qp_attr *qp_attr,
3043 int *qp_attr_mask)
3044{
3045 unsigned long flags;
3046 int ret;
3047
3048 spin_lock_irqsave(&cm_id_priv->lock, flags);
3049 switch (cm_id_priv->id.state) {
3050 case IB_CM_REQ_SENT:
3051 case IB_CM_MRA_REQ_RCVD:
3052 case IB_CM_REQ_RCVD:
3053 case IB_CM_MRA_REQ_SENT:
3054 case IB_CM_REP_RCVD:
3055 case IB_CM_MRA_REP_SENT:
3056 case IB_CM_REP_SENT:
3057 case IB_CM_MRA_REP_RCVD:
3058 case IB_CM_ESTABLISHED:
3059 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3060 IB_QP_PKEY_INDEX | IB_QP_PORT;
3061 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
3062 if (cm_id_priv->responder_resources)
3063 qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE |
3064 IB_ACCESS_REMOTE_READ;
3065 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3066 qp_attr->port_num = cm_id_priv->av.port->port_num;
3067 ret = 0;
3068 break;
3069 default:
3070 ret = -EINVAL;
3071 break;
3072 }
3073 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3074 return ret;
3075}
3076
3077static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3078 struct ib_qp_attr *qp_attr,
3079 int *qp_attr_mask)
3080{
3081 unsigned long flags;
3082 int ret;
3083
3084 spin_lock_irqsave(&cm_id_priv->lock, flags);
3085 switch (cm_id_priv->id.state) {
3086 case IB_CM_REQ_RCVD:
3087 case IB_CM_MRA_REQ_SENT:
3088 case IB_CM_REP_RCVD:
3089 case IB_CM_MRA_REP_SENT:
3090 case IB_CM_REP_SENT:
3091 case IB_CM_MRA_REP_RCVD:
3092 case IB_CM_ESTABLISHED:
3093 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3094 IB_QP_DEST_QPN | IB_QP_RQ_PSN |
3095 IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
3096 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3097 qp_attr->path_mtu = cm_id_priv->path_mtu;
3098 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3099 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3100 qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources;
3101 qp_attr->min_rnr_timer = 0;
3102 if (cm_id_priv->alt_av.ah_attr.dlid) {
3103 *qp_attr_mask |= IB_QP_ALT_PATH;
3104 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3105 }
3106 ret = 0;
3107 break;
3108 default:
3109 ret = -EINVAL;
3110 break;
3111 }
3112 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3113 return ret;
3114}
3115
3116static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3117 struct ib_qp_attr *qp_attr,
3118 int *qp_attr_mask)
3119{
3120 unsigned long flags;
3121 int ret;
3122
3123 spin_lock_irqsave(&cm_id_priv->lock, flags);
3124 switch (cm_id_priv->id.state) {
3125 case IB_CM_REP_RCVD:
3126 case IB_CM_MRA_REP_SENT:
3127 case IB_CM_REP_SENT:
3128 case IB_CM_MRA_REP_RCVD:
3129 case IB_CM_ESTABLISHED:
3130 *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3131 IB_QP_RNR_RETRY | IB_QP_SQ_PSN |
3132 IB_QP_MAX_QP_RD_ATOMIC;
3133 qp_attr->timeout = cm_id_priv->local_ack_timeout;
3134 qp_attr->retry_cnt = cm_id_priv->retry_count;
3135 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3136 qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3137 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3138 if (cm_id_priv->alt_av.ah_attr.dlid) {
3139 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3140 qp_attr->path_mig_state = IB_MIG_REARM;
3141 }
3142 ret = 0;
3143 break;
3144 default:
3145 ret = -EINVAL;
3146 break;
3147 }
3148 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3149 return ret;
3150}
3151
3152int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3153 struct ib_qp_attr *qp_attr,
3154 int *qp_attr_mask)
3155{
3156 struct cm_id_private *cm_id_priv;
3157 int ret;
3158
3159 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3160 switch (qp_attr->qp_state) {
3161 case IB_QPS_INIT:
3162 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3163 break;
3164 case IB_QPS_RTR:
3165 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3166 break;
3167 case IB_QPS_RTS:
3168 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3169 break;
3170 default:
3171 ret = -EINVAL;
3172 break;
3173 }
3174 return ret;
3175}
3176EXPORT_SYMBOL(ib_cm_init_qp_attr);
3177
3178static u64 cm_get_ca_guid(struct ib_device *device)
3179{
3180 struct ib_device_attr *device_attr;
3181 u64 guid;
3182 int ret;
3183
3184 device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
3185 if (!device_attr)
3186 return 0;
3187
3188 ret = ib_query_device(device, device_attr);
3189 guid = ret ? 0 : device_attr->node_guid;
3190 kfree(device_attr);
3191 return guid;
3192}
3193
3194static void cm_add_one(struct ib_device *device)
3195{
3196 struct cm_device *cm_dev;
3197 struct cm_port *port;
3198 struct ib_mad_reg_req reg_req = {
3199 .mgmt_class = IB_MGMT_CLASS_CM,
3200 .mgmt_class_version = IB_CM_CLASS_VERSION
3201 };
3202 struct ib_port_modify port_modify = {
3203 .set_port_cap_mask = IB_PORT_CM_SUP
3204 };
3205 unsigned long flags;
3206 int ret;
3207 u8 i;
3208
3209 cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
3210 device->phys_port_cnt, GFP_KERNEL);
3211 if (!cm_dev)
3212 return;
3213
3214 cm_dev->device = device;
3215 cm_dev->ca_guid = cm_get_ca_guid(device);
3216 if (!cm_dev->ca_guid)
3217 goto error1;
3218
3219 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3220 for (i = 1; i <= device->phys_port_cnt; i++) {
3221 port = &cm_dev->port[i-1];
3222 port->cm_dev = cm_dev;
3223 port->port_num = i;
3224 port->mad_agent = ib_register_mad_agent(device, i,
3225 IB_QPT_GSI,
3226 &reg_req,
3227 0,
3228 cm_send_handler,
3229 cm_recv_handler,
3230 port);
3231 if (IS_ERR(port->mad_agent))
3232 goto error2;
3233
3234 ret = ib_modify_port(device, i, 0, &port_modify);
3235 if (ret)
3236 goto error3;
3237 }
3238 ib_set_client_data(device, &cm_client, cm_dev);
3239
3240 write_lock_irqsave(&cm.device_lock, flags);
3241 list_add_tail(&cm_dev->list, &cm.device_list);
3242 write_unlock_irqrestore(&cm.device_lock, flags);
3243 return;
3244
3245error3:
3246 ib_unregister_mad_agent(port->mad_agent);
3247error2:
3248 port_modify.set_port_cap_mask = 0;
3249 port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3250 while (--i) {
3251 port = &cm_dev->port[i-1];
3252 ib_modify_port(device, port->port_num, 0, &port_modify);
3253 ib_unregister_mad_agent(port->mad_agent);
3254 }
3255error1:
3256 kfree(cm_dev);
3257}
3258
3259static void cm_remove_one(struct ib_device *device)
3260{
3261 struct cm_device *cm_dev;
3262 struct cm_port *port;
3263 struct ib_port_modify port_modify = {
3264 .clr_port_cap_mask = IB_PORT_CM_SUP
3265 };
3266 unsigned long flags;
3267 int i;
3268
3269 cm_dev = ib_get_client_data(device, &cm_client);
3270 if (!cm_dev)
3271 return;
3272
3273 write_lock_irqsave(&cm.device_lock, flags);
3274 list_del(&cm_dev->list);
3275 write_unlock_irqrestore(&cm.device_lock, flags);
3276
3277 for (i = 1; i <= device->phys_port_cnt; i++) {
3278 port = &cm_dev->port[i-1];
3279 ib_modify_port(device, port->port_num, 0, &port_modify);
3280 ib_unregister_mad_agent(port->mad_agent);
3281 }
3282 kfree(cm_dev);
3283}
3284
3285static int __init ib_cm_init(void)
3286{
3287 int ret;
3288
3289 memset(&cm, 0, sizeof cm);
3290 INIT_LIST_HEAD(&cm.device_list);
3291 rwlock_init(&cm.device_lock);
3292 spin_lock_init(&cm.lock);
3293 cm.listen_service_table = RB_ROOT;
3294 cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3295 cm.remote_id_table = RB_ROOT;
3296 cm.remote_qp_table = RB_ROOT;
3297 cm.remote_sidr_table = RB_ROOT;
3298 idr_init(&cm.local_id_table);
3299 idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3300
3301 cm.wq = create_workqueue("ib_cm");
3302 if (!cm.wq)
3303 return -ENOMEM;
3304
3305 ret = ib_register_client(&cm_client);
3306 if (ret)
3307 goto error;
3308
3309 return 0;
3310error:
3311 destroy_workqueue(cm.wq);
3312 return ret;
3313}
3314
3315static void __exit ib_cm_cleanup(void)
3316{
3317 flush_workqueue(cm.wq);
3318 destroy_workqueue(cm.wq);
3319 ib_unregister_client(&cm_client);
3320}
3321
3322module_init(ib_cm_init);
3323module_exit(ib_cm_cleanup);
3324
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
new file mode 100644
index 000000000000..15a309a77b2b
--- /dev/null
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -0,0 +1,819 @@
1/*
2 * Copyright (c) 2004 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING the madirectory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use source and binary forms, with or
13 * withmodification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retathe above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
32 * SOFTWARE.
33 */
34#if !defined(CM_MSGS_H)
35#define CM_MSGS_H
36
37#include <ib_mad.h>
38
39/*
40 * Parameters to routines below should be in network-byte order, and values
41 * are returned in network-byte order.
42 */
43
44#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
45
46enum cm_msg_attr_id {
47 CM_REQ_ATTR_ID = __constant_htons(0x0010),
48 CM_MRA_ATTR_ID = __constant_htons(0x0011),
49 CM_REJ_ATTR_ID = __constant_htons(0x0012),
50 CM_REP_ATTR_ID = __constant_htons(0x0013),
51 CM_RTU_ATTR_ID = __constant_htons(0x0014),
52 CM_DREQ_ATTR_ID = __constant_htons(0x0015),
53 CM_DREP_ATTR_ID = __constant_htons(0x0016),
54 CM_SIDR_REQ_ATTR_ID = __constant_htons(0x0017),
55 CM_SIDR_REP_ATTR_ID = __constant_htons(0x0018),
56 CM_LAP_ATTR_ID = __constant_htons(0x0019),
57 CM_APR_ATTR_ID = __constant_htons(0x001A)
58};
59
60enum cm_msg_sequence {
61 CM_MSG_SEQUENCE_REQ,
62 CM_MSG_SEQUENCE_LAP,
63 CM_MSG_SEQUENCE_DREQ,
64 CM_MSG_SEQUENCE_SIDR
65};
66
67struct cm_req_msg {
68 struct ib_mad_hdr hdr;
69
70 u32 local_comm_id;
71 u32 rsvd4;
72 u64 service_id;
73 u64 local_ca_guid;
74 u32 rsvd24;
75 u32 local_qkey;
76 /* local QPN:24, responder resources:8 */
77 u32 offset32;
78 /* local EECN:24, initiator depth:8 */
79 u32 offset36;
80 /*
81 * remote EECN:24, remote CM response timeout:5,
82 * transport service type:2, end-to-end flow control:1
83 */
84 u32 offset40;
85 /* starting PSN:24, local CM response timeout:5, retry count:3 */
86 u32 offset44;
87 u16 pkey;
88 /* path MTU:4, RDC exists:1, RNR retry count:3. */
89 u8 offset50;
90 /* max CM Retries:4, SRQ:1, rsvd:3 */
91 u8 offset51;
92
93 u16 primary_local_lid;
94 u16 primary_remote_lid;
95 union ib_gid primary_local_gid;
96 union ib_gid primary_remote_gid;
97 /* flow label:20, rsvd:6, packet rate:6 */
98 u32 primary_offset88;
99 u8 primary_traffic_class;
100 u8 primary_hop_limit;
101 /* SL:4, subnet local:1, rsvd:3 */
102 u8 primary_offset94;
103 /* local ACK timeout:5, rsvd:3 */
104 u8 primary_offset95;
105
106 u16 alt_local_lid;
107 u16 alt_remote_lid;
108 union ib_gid alt_local_gid;
109 union ib_gid alt_remote_gid;
110 /* flow label:20, rsvd:6, packet rate:6 */
111 u32 alt_offset132;
112 u8 alt_traffic_class;
113 u8 alt_hop_limit;
114 /* SL:4, subnet local:1, rsvd:3 */
115 u8 alt_offset138;
116 /* local ACK timeout:5, rsvd:3 */
117 u8 alt_offset139;
118
119 u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
120
121} __attribute__ ((packed));
122
123static inline u32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
124{
125 return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
126}
127
128static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, u32 qpn)
129{
130 req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
131 (be32_to_cpu(req_msg->offset32) &
132 0x000000FF));
133}
134
135static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
136{
137 return (u8) be32_to_cpu(req_msg->offset32);
138}
139
140static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
141{
142 req_msg->offset32 = cpu_to_be32(resp_res |
143 (be32_to_cpu(req_msg->offset32) &
144 0xFFFFFF00));
145}
146
147static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
148{
149 return (u8) be32_to_cpu(req_msg->offset36);
150}
151
152static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
153 u8 init_depth)
154{
155 req_msg->offset36 = cpu_to_be32(init_depth |
156 (be32_to_cpu(req_msg->offset36) &
157 0xFFFFFF00));
158}
159
160static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
161{
162 return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
163}
164
165static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
166 u8 resp_timeout)
167{
168 req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
169 (be32_to_cpu(req_msg->offset40) &
170 0xFFFFFF07));
171}
172
173static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
174{
175 u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
176 switch(transport_type) {
177 case 0: return IB_QPT_RC;
178 case 1: return IB_QPT_UC;
179 default: return 0;
180 }
181}
182
183static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
184 enum ib_qp_type qp_type)
185{
186 switch(qp_type) {
187 case IB_QPT_UC:
188 req_msg->offset40 = cpu_to_be32((be32_to_cpu(
189 req_msg->offset40) &
190 0xFFFFFFF9) | 0x2);
191 default:
192 req_msg->offset40 = cpu_to_be32(be32_to_cpu(
193 req_msg->offset40) &
194 0xFFFFFFF9);
195 }
196}
197
198static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
199{
200 return be32_to_cpu(req_msg->offset40) & 0x1;
201}
202
203static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
204 u8 flow_ctrl)
205{
206 req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
207 (be32_to_cpu(req_msg->offset40) &
208 0xFFFFFFFE));
209}
210
211static inline u32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
212{
213 return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
214}
215
216static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
217 u32 starting_psn)
218{
219 req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
220 (be32_to_cpu(req_msg->offset44) & 0x000000FF));
221}
222
223static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
224{
225 return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
226}
227
228static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
229 u8 resp_timeout)
230{
231 req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
232 (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
233}
234
235static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
236{
237 return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
238}
239
240static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
241 u8 retry_count)
242{
243 req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
244 (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
245}
246
247static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
248{
249 return req_msg->offset50 >> 4;
250}
251
252static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
253{
254 req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
255}
256
257static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
258{
259 return req_msg->offset50 & 0x7;
260}
261
262static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
263 u8 rnr_retry_count)
264{
265 req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
266 (rnr_retry_count & 0x7));
267}
268
269static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
270{
271 return req_msg->offset51 >> 4;
272}
273
274static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
275 u8 retries)
276{
277 req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
278}
279
280static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
281{
282 return (req_msg->offset51 & 0x8) >> 3;
283}
284
285static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
286{
287 req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
288 ((srq & 0x1) << 3));
289}
290
291static inline u32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
292{
293 return cpu_to_be32((be32_to_cpu(req_msg->primary_offset88) >> 12));
294}
295
296static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
297 u32 flow_label)
298{
299 req_msg->primary_offset88 = cpu_to_be32(
300 (be32_to_cpu(req_msg->primary_offset88) &
301 0x00000FFF) |
302 (be32_to_cpu(flow_label) << 12));
303}
304
305static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
306{
307 return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
308}
309
310static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
311 u8 rate)
312{
313 req_msg->primary_offset88 = cpu_to_be32(
314 (be32_to_cpu(req_msg->primary_offset88) &
315 0xFFFFFFC0) | (rate & 0x3F));
316}
317
318static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
319{
320 return (u8) (req_msg->primary_offset94 >> 4);
321}
322
323static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
324{
325 req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
326 (sl << 4));
327}
328
329static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
330{
331 return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
332}
333
334static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
335 u8 subnet_local)
336{
337 req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
338 ((subnet_local & 0x1) << 3));
339}
340
341static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
342{
343 return (u8) (req_msg->primary_offset95 >> 3);
344}
345
346static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
347 u8 local_ack_timeout)
348{
349 req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
350 (local_ack_timeout << 3));
351}
352
353static inline u32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
354{
355 return cpu_to_be32((be32_to_cpu(req_msg->alt_offset132) >> 12));
356}
357
358static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
359 u32 flow_label)
360{
361 req_msg->alt_offset132 = cpu_to_be32(
362 (be32_to_cpu(req_msg->alt_offset132) &
363 0x00000FFF) |
364 (be32_to_cpu(flow_label) << 12));
365}
366
367static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
368{
369 return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
370}
371
372static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
373 u8 rate)
374{
375 req_msg->alt_offset132 = cpu_to_be32(
376 (be32_to_cpu(req_msg->alt_offset132) &
377 0xFFFFFFC0) | (rate & 0x3F));
378}
379
380static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
381{
382 return (u8) (req_msg->alt_offset138 >> 4);
383}
384
385static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
386{
387 req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
388 (sl << 4));
389}
390
391static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
392{
393 return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
394}
395
396static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
397 u8 subnet_local)
398{
399 req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
400 ((subnet_local & 0x1) << 3));
401}
402
403static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
404{
405 return (u8) (req_msg->alt_offset139 >> 3);
406}
407
408static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
409 u8 local_ack_timeout)
410{
411 req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
412 (local_ack_timeout << 3));
413}
414
415/* Message REJected or MRAed */
416enum cm_msg_response {
417 CM_MSG_RESPONSE_REQ = 0x0,
418 CM_MSG_RESPONSE_REP = 0x1,
419 CM_MSG_RESPONSE_OTHER = 0x2
420};
421
422 struct cm_mra_msg {
423 struct ib_mad_hdr hdr;
424
425 u32 local_comm_id;
426 u32 remote_comm_id;
427 /* message MRAed:2, rsvd:6 */
428 u8 offset8;
429 /* service timeout:5, rsvd:3 */
430 u8 offset9;
431
432 u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
433
434} __attribute__ ((packed));
435
436static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
437{
438 return (u8) (mra_msg->offset8 >> 6);
439}
440
441static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
442{
443 mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
444}
445
446static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
447{
448 return (u8) (mra_msg->offset9 >> 3);
449}
450
451static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
452 u8 service_timeout)
453{
454 mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
455 (service_timeout << 3));
456}
457
458struct cm_rej_msg {
459 struct ib_mad_hdr hdr;
460
461 u32 local_comm_id;
462 u32 remote_comm_id;
463 /* message REJected:2, rsvd:6 */
464 u8 offset8;
465 /* reject info length:7, rsvd:1. */
466 u8 offset9;
467 u16 reason;
468 u8 ari[IB_CM_REJ_ARI_LENGTH];
469
470 u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
471
472} __attribute__ ((packed));
473
474static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
475{
476 return (u8) (rej_msg->offset8 >> 6);
477}
478
479static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
480{
481 rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
482}
483
484static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
485{
486 return (u8) (rej_msg->offset9 >> 1);
487}
488
489static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
490 u8 len)
491{
492 rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
493}
494
495struct cm_rep_msg {
496 struct ib_mad_hdr hdr;
497
498 u32 local_comm_id;
499 u32 remote_comm_id;
500 u32 local_qkey;
501 /* local QPN:24, rsvd:8 */
502 u32 offset12;
503 /* local EECN:24, rsvd:8 */
504 u32 offset16;
505 /* starting PSN:24 rsvd:8 */
506 u32 offset20;
507 u8 resp_resources;
508 u8 initiator_depth;
509 /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
510 u8 offset26;
511 /* RNR retry count:3, SRQ:1, rsvd:5 */
512 u8 offset27;
513 u64 local_ca_guid;
514
515 u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
516
517} __attribute__ ((packed));
518
519static inline u32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
520{
521 return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
522}
523
524static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, u32 qpn)
525{
526 rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
527 (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
528}
529
530static inline u32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
531{
532 return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
533}
534
535static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
536 u32 starting_psn)
537{
538 rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
539 (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
540}
541
542static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
543{
544 return (u8) (rep_msg->offset26 >> 3);
545}
546
547static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
548 u8 target_ack_delay)
549{
550 rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
551 (target_ack_delay << 3));
552}
553
554static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
555{
556 return (u8) ((rep_msg->offset26 & 0x06) >> 1);
557}
558
559static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
560{
561 rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
562 ((failover & 0x3) << 1));
563}
564
565static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
566{
567 return (u8) (rep_msg->offset26 & 0x01);
568}
569
570static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
571 u8 flow_ctrl)
572{
573 rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
574 (flow_ctrl & 0x1));
575}
576
577static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
578{
579 return (u8) (rep_msg->offset27 >> 5);
580}
581
582static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
583 u8 rnr_retry_count)
584{
585 rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
586 (rnr_retry_count << 5));
587}
588
589static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
590{
591 return (u8) ((rep_msg->offset27 >> 4) & 0x1);
592}
593
594static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
595{
596 rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
597 ((srq & 0x1) << 4));
598}
599
600struct cm_rtu_msg {
601 struct ib_mad_hdr hdr;
602
603 u32 local_comm_id;
604 u32 remote_comm_id;
605
606 u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
607
608} __attribute__ ((packed));
609
610struct cm_dreq_msg {
611 struct ib_mad_hdr hdr;
612
613 u32 local_comm_id;
614 u32 remote_comm_id;
615 /* remote QPN/EECN:24, rsvd:8 */
616 u32 offset8;
617
618 u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
619
620} __attribute__ ((packed));
621
622static inline u32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
623{
624 return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
625}
626
627static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn)
628{
629 dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
630 (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
631}
632
633struct cm_drep_msg {
634 struct ib_mad_hdr hdr;
635
636 u32 local_comm_id;
637 u32 remote_comm_id;
638
639 u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
640
641} __attribute__ ((packed));
642
643struct cm_lap_msg {
644 struct ib_mad_hdr hdr;
645
646 u32 local_comm_id;
647 u32 remote_comm_id;
648
649 u32 rsvd8;
650 /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
651 u32 offset12;
652 u32 rsvd16;
653
654 u16 alt_local_lid;
655 u16 alt_remote_lid;
656 union ib_gid alt_local_gid;
657 union ib_gid alt_remote_gid;
658 /* flow label:20, rsvd:4, traffic class:8 */
659 u32 offset56;
660 u8 alt_hop_limit;
661 /* rsvd:2, packet rate:6 */
662 uint8_t offset61;
663 /* SL:4, subnet local:1, rsvd:3 */
664 uint8_t offset62;
665 /* local ACK timeout:5, rsvd:3 */
666 uint8_t offset63;
667
668 u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
669} __attribute__ ((packed));
670
671static inline u32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
672{
673 return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
674}
675
676static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, u32 qpn)
677{
678 lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
679 (be32_to_cpu(lap_msg->offset12) &
680 0x000000FF));
681}
682
683static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
684{
685 return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
686}
687
688static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
689 u8 resp_timeout)
690{
691 lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
692 (be32_to_cpu(lap_msg->offset12) &
693 0xFFFFFF07));
694}
695
696static inline u32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
697{
698 return be32_to_cpu(lap_msg->offset56) >> 12;
699}
700
701static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
702 u32 flow_label)
703{
704 lap_msg->offset56 = cpu_to_be32((flow_label << 12) |
705 (be32_to_cpu(lap_msg->offset56) &
706 0x00000FFF));
707}
708
709static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
710{
711 return (u8) be32_to_cpu(lap_msg->offset56);
712}
713
714static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
715 u8 traffic_class)
716{
717 lap_msg->offset56 = cpu_to_be32(traffic_class |
718 (be32_to_cpu(lap_msg->offset56) &
719 0xFFFFFF00));
720}
721
722static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
723{
724 return lap_msg->offset61 & 0x3F;
725}
726
727static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
728 u8 packet_rate)
729{
730 lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
731}
732
733static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
734{
735 return lap_msg->offset62 >> 4;
736}
737
738static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
739{
740 lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
741}
742
743static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
744{
745 return (lap_msg->offset62 >> 3) & 0x1;
746}
747
748static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
749 u8 subnet_local)
750{
751 lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
752 (lap_msg->offset61 & 0xF7);
753}
754static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
755{
756 return lap_msg->offset63 >> 3;
757}
758
759static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
760 u8 local_ack_timeout)
761{
762 lap_msg->offset63 = (local_ack_timeout << 3) |
763 (lap_msg->offset63 & 0x07);
764}
765
766struct cm_apr_msg {
767 struct ib_mad_hdr hdr;
768
769 u32 local_comm_id;
770 u32 remote_comm_id;
771
772 u8 info_length;
773 u8 ap_status;
774 u8 info[IB_CM_APR_INFO_LENGTH];
775
776 u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
777} __attribute__ ((packed));
778
779struct cm_sidr_req_msg {
780 struct ib_mad_hdr hdr;
781
782 u32 request_id;
783 u16 pkey;
784 u16 rsvd;
785 u64 service_id;
786
787 u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
788} __attribute__ ((packed));
789
790struct cm_sidr_rep_msg {
791 struct ib_mad_hdr hdr;
792
793 u32 request_id;
794 u8 status;
795 u8 info_length;
796 u16 rsvd;
797 /* QPN:24, rsvd:8 */
798 u32 offset8;
799 u64 service_id;
800 u32 qkey;
801 u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
802
803 u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
804} __attribute__ ((packed));
805
806static inline u32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
807{
808 return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
809}
810
811static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
812 u32 qpn)
813{
814 sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
815 (be32_to_cpu(sidr_rep_msg->offset8) &
816 0x000000FF));
817}
818
819#endif /* CM_MSGS_H */
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 328feae2a5be..7763b31abba7 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +30,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 31 * SOFTWARE.
31 * 32 *
32 * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $ 33 * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $
33 */ 34 */
34 35
35#include <linux/errno.h> 36#include <linux/errno.h>
@@ -329,7 +330,7 @@ EXPORT_SYMBOL(ib_create_fmr_pool);
329 * 330 *
330 * Destroy an FMR pool and free all associated resources. 331 * Destroy an FMR pool and free all associated resources.
331 */ 332 */
332int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) 333void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
333{ 334{
334 struct ib_pool_fmr *fmr; 335 struct ib_pool_fmr *fmr;
335 struct ib_pool_fmr *tmp; 336 struct ib_pool_fmr *tmp;
@@ -352,8 +353,6 @@ int ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
352 353
353 kfree(pool->cache_bucket); 354 kfree(pool->cache_bucket);
354 kfree(pool); 355 kfree(pool);
355
356 return 0;
357} 356}
358EXPORT_SYMBOL(ib_destroy_fmr_pool); 357EXPORT_SYMBOL(ib_destroy_fmr_pool);
359 358
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 23628c622a50..b97e210ce9c8 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. 2 * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,12 +31,12 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 32 * SOFTWARE.
31 * 33 *
32 * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $ 34 * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
33 */ 35 */
34
35#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
36 37
37#include "mad_priv.h" 38#include "mad_priv.h"
39#include "mad_rmpp.h"
38#include "smi.h" 40#include "smi.h"
39#include "agent.h" 41#include "agent.h"
40 42
@@ -45,6 +47,7 @@ MODULE_AUTHOR("Sean Hefty");
45 47
46 48
47kmem_cache_t *ib_mad_cache; 49kmem_cache_t *ib_mad_cache;
50
48static struct list_head ib_mad_port_list; 51static struct list_head ib_mad_port_list;
49static u32 ib_mad_client_id = 0; 52static u32 ib_mad_client_id = 0;
50 53
@@ -58,16 +61,12 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
58static void remove_mad_reg_req(struct ib_mad_agent_private *priv); 61static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
59static struct ib_mad_agent_private *find_mad_agent( 62static struct ib_mad_agent_private *find_mad_agent(
60 struct ib_mad_port_private *port_priv, 63 struct ib_mad_port_private *port_priv,
61 struct ib_mad *mad, int solicited); 64 struct ib_mad *mad);
62static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, 65static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
63 struct ib_mad_private *mad); 66 struct ib_mad_private *mad);
64static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); 67static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
65static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
66 struct ib_mad_send_wc *mad_send_wc);
67static void timeout_sends(void *data); 68static void timeout_sends(void *data);
68static void cancel_sends(void *data);
69static void local_completions(void *data); 69static void local_completions(void *data);
70static int solicited_mad(struct ib_mad *mad);
71static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, 70static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
72 struct ib_mad_agent_private *agent_priv, 71 struct ib_mad_agent_private *agent_priv,
73 u8 mgmt_class); 72 u8 mgmt_class);
@@ -197,8 +196,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
197 if (qpn == -1) 196 if (qpn == -1)
198 goto error1; 197 goto error1;
199 198
200 if (rmpp_version) 199 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
201 goto error1; /* XXX: until RMPP implemented */ 200 goto error1;
202 201
203 /* Validate MAD registration request if supplied */ 202 /* Validate MAD registration request if supplied */
204 if (mad_reg_req) { 203 if (mad_reg_req) {
@@ -261,22 +260,29 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
261 ret = ERR_PTR(-ENOMEM); 260 ret = ERR_PTR(-ENOMEM);
262 goto error1; 261 goto error1;
263 } 262 }
263 memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
264
265 mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
266 IB_ACCESS_LOCAL_WRITE);
267 if (IS_ERR(mad_agent_priv->agent.mr)) {
268 ret = ERR_PTR(-ENOMEM);
269 goto error2;
270 }
264 271
265 if (mad_reg_req) { 272 if (mad_reg_req) {
266 reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); 273 reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
267 if (!reg_req) { 274 if (!reg_req) {
268 ret = ERR_PTR(-ENOMEM); 275 ret = ERR_PTR(-ENOMEM);
269 goto error2; 276 goto error3;
270 } 277 }
271 /* Make a copy of the MAD registration request */ 278 /* Make a copy of the MAD registration request */
272 memcpy(reg_req, mad_reg_req, sizeof *reg_req); 279 memcpy(reg_req, mad_reg_req, sizeof *reg_req);
273 } 280 }
274 281
275 /* Now, fill in the various structures */ 282 /* Now, fill in the various structures */
276 memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
277 mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; 283 mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
278 mad_agent_priv->reg_req = reg_req; 284 mad_agent_priv->reg_req = reg_req;
279 mad_agent_priv->rmpp_version = rmpp_version; 285 mad_agent_priv->agent.rmpp_version = rmpp_version;
280 mad_agent_priv->agent.device = device; 286 mad_agent_priv->agent.device = device;
281 mad_agent_priv->agent.recv_handler = recv_handler; 287 mad_agent_priv->agent.recv_handler = recv_handler;
282 mad_agent_priv->agent.send_handler = send_handler; 288 mad_agent_priv->agent.send_handler = send_handler;
@@ -301,7 +307,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
301 if (method) { 307 if (method) {
302 if (method_in_use(&method, 308 if (method_in_use(&method,
303 mad_reg_req)) 309 mad_reg_req))
304 goto error3; 310 goto error4;
305 } 311 }
306 } 312 }
307 ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, 313 ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -317,14 +323,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
317 if (is_vendor_method_in_use( 323 if (is_vendor_method_in_use(
318 vendor_class, 324 vendor_class,
319 mad_reg_req)) 325 mad_reg_req))
320 goto error3; 326 goto error4;
321 } 327 }
322 } 328 }
323 ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); 329 ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
324 } 330 }
325 if (ret2) { 331 if (ret2) {
326 ret = ERR_PTR(ret2); 332 ret = ERR_PTR(ret2);
327 goto error3; 333 goto error4;
328 } 334 }
329 } 335 }
330 336
@@ -335,22 +341,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
335 spin_lock_init(&mad_agent_priv->lock); 341 spin_lock_init(&mad_agent_priv->lock);
336 INIT_LIST_HEAD(&mad_agent_priv->send_list); 342 INIT_LIST_HEAD(&mad_agent_priv->send_list);
337 INIT_LIST_HEAD(&mad_agent_priv->wait_list); 343 INIT_LIST_HEAD(&mad_agent_priv->wait_list);
344 INIT_LIST_HEAD(&mad_agent_priv->done_list);
345 INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
338 INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); 346 INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
339 INIT_LIST_HEAD(&mad_agent_priv->local_list); 347 INIT_LIST_HEAD(&mad_agent_priv->local_list);
340 INIT_WORK(&mad_agent_priv->local_work, local_completions, 348 INIT_WORK(&mad_agent_priv->local_work, local_completions,
341 mad_agent_priv); 349 mad_agent_priv);
342 INIT_LIST_HEAD(&mad_agent_priv->canceled_list);
343 INIT_WORK(&mad_agent_priv->canceled_work, cancel_sends, mad_agent_priv);
344 atomic_set(&mad_agent_priv->refcount, 1); 350 atomic_set(&mad_agent_priv->refcount, 1);
345 init_waitqueue_head(&mad_agent_priv->wait); 351 init_waitqueue_head(&mad_agent_priv->wait);
346 352
347 return &mad_agent_priv->agent; 353 return &mad_agent_priv->agent;
348 354
349error3: 355error4:
350 spin_unlock_irqrestore(&port_priv->reg_lock, flags); 356 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
351 kfree(reg_req); 357 kfree(reg_req);
352error2: 358error3:
353 kfree(mad_agent_priv); 359 kfree(mad_agent_priv);
360error2:
361 ib_dereg_mr(mad_agent_priv->agent.mr);
354error1: 362error1:
355 return ret; 363 return ret;
356} 364}
@@ -487,18 +495,16 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
487 * MADs, preventing us from queuing additional work 495 * MADs, preventing us from queuing additional work
488 */ 496 */
489 cancel_mads(mad_agent_priv); 497 cancel_mads(mad_agent_priv);
490
491 port_priv = mad_agent_priv->qp_info->port_priv; 498 port_priv = mad_agent_priv->qp_info->port_priv;
492
493 cancel_delayed_work(&mad_agent_priv->timed_work); 499 cancel_delayed_work(&mad_agent_priv->timed_work);
494 flush_workqueue(port_priv->wq);
495 500
496 spin_lock_irqsave(&port_priv->reg_lock, flags); 501 spin_lock_irqsave(&port_priv->reg_lock, flags);
497 remove_mad_reg_req(mad_agent_priv); 502 remove_mad_reg_req(mad_agent_priv);
498 list_del(&mad_agent_priv->agent_list); 503 list_del(&mad_agent_priv->agent_list);
499 spin_unlock_irqrestore(&port_priv->reg_lock, flags); 504 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
500 505
501 /* XXX: Cleanup pending RMPP receives for this agent */ 506 flush_workqueue(port_priv->wq);
507 ib_cancel_rmpp_recvs(mad_agent_priv);
502 508
503 atomic_dec(&mad_agent_priv->refcount); 509 atomic_dec(&mad_agent_priv->refcount);
504 wait_event(mad_agent_priv->wait, 510 wait_event(mad_agent_priv->wait,
@@ -506,6 +512,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
506 512
507 if (mad_agent_priv->reg_req) 513 if (mad_agent_priv->reg_req)
508 kfree(mad_agent_priv->reg_req); 514 kfree(mad_agent_priv->reg_req);
515 ib_dereg_mr(mad_agent_priv->agent.mr);
509 kfree(mad_agent_priv); 516 kfree(mad_agent_priv);
510} 517}
511 518
@@ -551,6 +558,13 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
551} 558}
552EXPORT_SYMBOL(ib_unregister_mad_agent); 559EXPORT_SYMBOL(ib_unregister_mad_agent);
553 560
561static inline int response_mad(struct ib_mad *mad)
562{
563 /* Trap represses are responses although response bit is reset */
564 return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
565 (mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
566}
567
554static void dequeue_mad(struct ib_mad_list_head *mad_list) 568static void dequeue_mad(struct ib_mad_list_head *mad_list)
555{ 569{
556 struct ib_mad_queue *mad_queue; 570 struct ib_mad_queue *mad_queue;
@@ -643,7 +657,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
643 struct ib_smp *smp, 657 struct ib_smp *smp,
644 struct ib_send_wr *send_wr) 658 struct ib_send_wr *send_wr)
645{ 659{
646 int ret, solicited; 660 int ret;
647 unsigned long flags; 661 unsigned long flags;
648 struct ib_mad_local_private *local; 662 struct ib_mad_local_private *local;
649 struct ib_mad_private *mad_priv; 663 struct ib_mad_private *mad_priv;
@@ -689,11 +703,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
689 switch (ret) 703 switch (ret)
690 { 704 {
691 case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: 705 case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
692 /* 706 if (response_mad(&mad_priv->mad.mad) &&
693 * See if response is solicited and
694 * there is a recv handler
695 */
696 if (solicited_mad(&mad_priv->mad.mad) &&
697 mad_agent_priv->agent.recv_handler) { 707 mad_agent_priv->agent.recv_handler) {
698 local->mad_priv = mad_priv; 708 local->mad_priv = mad_priv;
699 local->recv_mad_agent = mad_agent_priv; 709 local->recv_mad_agent = mad_agent_priv;
@@ -710,15 +720,13 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
710 break; 720 break;
711 case IB_MAD_RESULT_SUCCESS: 721 case IB_MAD_RESULT_SUCCESS:
712 /* Treat like an incoming receive MAD */ 722 /* Treat like an incoming receive MAD */
713 solicited = solicited_mad(&mad_priv->mad.mad);
714 port_priv = ib_get_mad_port(mad_agent_priv->agent.device, 723 port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
715 mad_agent_priv->agent.port_num); 724 mad_agent_priv->agent.port_num);
716 if (port_priv) { 725 if (port_priv) {
717 mad_priv->mad.mad.mad_hdr.tid = 726 mad_priv->mad.mad.mad_hdr.tid =
718 ((struct ib_mad *)smp)->mad_hdr.tid; 727 ((struct ib_mad *)smp)->mad_hdr.tid;
719 recv_mad_agent = find_mad_agent(port_priv, 728 recv_mad_agent = find_mad_agent(port_priv,
720 &mad_priv->mad.mad, 729 &mad_priv->mad.mad);
721 solicited);
722 } 730 }
723 if (!port_priv || !recv_mad_agent) { 731 if (!port_priv || !recv_mad_agent) {
724 kmem_cache_free(ib_mad_cache, mad_priv); 732 kmem_cache_free(ib_mad_cache, mad_priv);
@@ -750,43 +758,133 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
750 list_add_tail(&local->completion_list, &mad_agent_priv->local_list); 758 list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
751 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 759 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
752 queue_work(mad_agent_priv->qp_info->port_priv->wq, 760 queue_work(mad_agent_priv->qp_info->port_priv->wq,
753 &mad_agent_priv->local_work); 761 &mad_agent_priv->local_work);
754 ret = 1; 762 ret = 1;
755out: 763out:
756 return ret; 764 return ret;
757} 765}
758 766
759static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, 767static int get_buf_length(int hdr_len, int data_len)
760 struct ib_mad_send_wr_private *mad_send_wr) 768{
769 int seg_size, pad;
770
771 seg_size = sizeof(struct ib_mad) - hdr_len;
772 if (data_len && seg_size) {
773 pad = seg_size - data_len % seg_size;
774 if (pad == seg_size)
775 pad = 0;
776 } else
777 pad = seg_size;
778 return hdr_len + data_len + pad;
779}
780
781struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
782 u32 remote_qpn, u16 pkey_index,
783 struct ib_ah *ah, int rmpp_active,
784 int hdr_len, int data_len,
785 unsigned int __nocast gfp_mask)
786{
787 struct ib_mad_agent_private *mad_agent_priv;
788 struct ib_mad_send_buf *send_buf;
789 int buf_size;
790 void *buf;
791
792 mad_agent_priv = container_of(mad_agent,
793 struct ib_mad_agent_private, agent);
794 buf_size = get_buf_length(hdr_len, data_len);
795
796 if ((!mad_agent->rmpp_version &&
797 (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
798 (!rmpp_active && buf_size > sizeof(struct ib_mad)))
799 return ERR_PTR(-EINVAL);
800
801 buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
802 if (!buf)
803 return ERR_PTR(-ENOMEM);
804 memset(buf, 0, sizeof *send_buf + buf_size);
805
806 send_buf = buf + buf_size;
807 send_buf->mad = buf;
808
809 send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
810 buf, buf_size, DMA_TO_DEVICE);
811 pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
812 send_buf->sge.length = buf_size;
813 send_buf->sge.lkey = mad_agent->mr->lkey;
814
815 send_buf->send_wr.wr_id = (unsigned long) send_buf;
816 send_buf->send_wr.sg_list = &send_buf->sge;
817 send_buf->send_wr.num_sge = 1;
818 send_buf->send_wr.opcode = IB_WR_SEND;
819 send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
820 send_buf->send_wr.wr.ud.ah = ah;
821 send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
822 send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
823 send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
824 send_buf->send_wr.wr.ud.pkey_index = pkey_index;
825
826 if (rmpp_active) {
827 struct ib_rmpp_mad *rmpp_mad;
828 rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
829 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
830 offsetof(struct ib_rmpp_mad, data) + data_len);
831 rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
832 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
833 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
834 IB_MGMT_RMPP_FLAG_ACTIVE);
835 }
836
837 send_buf->mad_agent = mad_agent;
838 atomic_inc(&mad_agent_priv->refcount);
839 return send_buf;
840}
841EXPORT_SYMBOL(ib_create_send_mad);
842
843void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
844{
845 struct ib_mad_agent_private *mad_agent_priv;
846
847 mad_agent_priv = container_of(send_buf->mad_agent,
848 struct ib_mad_agent_private, agent);
849
850 dma_unmap_single(send_buf->mad_agent->device->dma_device,
851 pci_unmap_addr(send_buf, mapping),
852 send_buf->sge.length, DMA_TO_DEVICE);
853 kfree(send_buf->mad);
854
855 if (atomic_dec_and_test(&mad_agent_priv->refcount))
856 wake_up(&mad_agent_priv->wait);
857}
858EXPORT_SYMBOL(ib_free_send_mad);
859
860int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
761{ 861{
762 struct ib_mad_qp_info *qp_info; 862 struct ib_mad_qp_info *qp_info;
763 struct ib_send_wr *bad_send_wr; 863 struct ib_send_wr *bad_send_wr;
864 struct list_head *list;
764 unsigned long flags; 865 unsigned long flags;
765 int ret; 866 int ret;
766 867
767 /* Replace user's WR ID with our own to find WR upon completion */ 868 /* Set WR ID to find mad_send_wr upon completion */
768 qp_info = mad_agent_priv->qp_info; 869 qp_info = mad_send_wr->mad_agent_priv->qp_info;
769 mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id;
770 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; 870 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
771 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; 871 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
772 872
773 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 873 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
774 if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) { 874 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
775 list_add_tail(&mad_send_wr->mad_list.list, 875 ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
776 &qp_info->send_queue.list);
777 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
778 ret = ib_post_send(mad_agent_priv->agent.qp,
779 &mad_send_wr->send_wr, &bad_send_wr); 876 &mad_send_wr->send_wr, &bad_send_wr);
780 if (ret) { 877 list = &qp_info->send_queue.list;
781 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
782 dequeue_mad(&mad_send_wr->mad_list);
783 }
784 } else { 878 } else {
785 list_add_tail(&mad_send_wr->mad_list.list,
786 &qp_info->overflow_list);
787 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
788 ret = 0; 879 ret = 0;
880 list = &qp_info->overflow_list;
789 } 881 }
882
883 if (!ret) {
884 qp_info->send_queue.count++;
885 list_add_tail(&mad_send_wr->mad_list.list, list);
886 }
887 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
790 return ret; 888 return ret;
791} 889}
792 890
@@ -860,18 +958,19 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
860 ret = -ENOMEM; 958 ret = -ENOMEM;
861 goto error2; 959 goto error2;
862 } 960 }
961 memset(mad_send_wr, 0, sizeof *mad_send_wr);
863 962
864 mad_send_wr->send_wr = *send_wr; 963 mad_send_wr->send_wr = *send_wr;
865 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; 964 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
866 memcpy(mad_send_wr->sg_list, send_wr->sg_list, 965 memcpy(mad_send_wr->sg_list, send_wr->sg_list,
867 sizeof *send_wr->sg_list * send_wr->num_sge); 966 sizeof *send_wr->sg_list * send_wr->num_sge);
868 mad_send_wr->send_wr.next = NULL; 967 mad_send_wr->wr_id = send_wr->wr_id;
869 mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; 968 mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
870 mad_send_wr->agent = mad_agent; 969 mad_send_wr->mad_agent_priv = mad_agent_priv;
871 /* Timeout will be updated after send completes */ 970 /* Timeout will be updated after send completes */
872 mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. 971 mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
873 ud.timeout_ms); 972 ud.timeout_ms);
874 mad_send_wr->retry = 0; 973 mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
875 /* One reference for each work request to QP + response */ 974 /* One reference for each work request to QP + response */
876 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); 975 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
877 mad_send_wr->status = IB_WC_SUCCESS; 976 mad_send_wr->status = IB_WC_SUCCESS;
@@ -883,8 +982,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
883 &mad_agent_priv->send_list); 982 &mad_agent_priv->send_list);
884 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 983 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
885 984
886 ret = ib_send_mad(mad_agent_priv, mad_send_wr); 985 if (mad_agent_priv->agent.rmpp_version) {
887 if (ret) { 986 ret = ib_send_rmpp_mad(mad_send_wr);
987 if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
988 ret = ib_send_mad(mad_send_wr);
989 } else
990 ret = ib_send_mad(mad_send_wr);
991 if (ret < 0) {
888 /* Fail send request */ 992 /* Fail send request */
889 spin_lock_irqsave(&mad_agent_priv->lock, flags); 993 spin_lock_irqsave(&mad_agent_priv->lock, flags);
890 list_del(&mad_send_wr->agent_list); 994 list_del(&mad_send_wr->agent_list);
@@ -910,41 +1014,28 @@ EXPORT_SYMBOL(ib_post_send_mad);
910 */ 1014 */
911void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) 1015void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
912{ 1016{
913 struct ib_mad_recv_buf *entry; 1017 struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
914 struct ib_mad_private_header *mad_priv_hdr; 1018 struct ib_mad_private_header *mad_priv_hdr;
915 struct ib_mad_private *priv; 1019 struct ib_mad_private *priv;
1020 struct list_head free_list;
916 1021
917 mad_priv_hdr = container_of(mad_recv_wc, 1022 INIT_LIST_HEAD(&free_list);
918 struct ib_mad_private_header, 1023 list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
919 recv_wc);
920 priv = container_of(mad_priv_hdr, struct ib_mad_private, header);
921 1024
922 /* 1025 list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
923 * Walk receive buffer list associated with this WC 1026 &free_list, list) {
924 * No need to remove them from list of receive buffers 1027 mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
925 */ 1028 recv_buf);
926 list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) {
927 /* Free previous receive buffer */
928 kmem_cache_free(ib_mad_cache, priv);
929 mad_priv_hdr = container_of(mad_recv_wc, 1029 mad_priv_hdr = container_of(mad_recv_wc,
930 struct ib_mad_private_header, 1030 struct ib_mad_private_header,
931 recv_wc); 1031 recv_wc);
932 priv = container_of(mad_priv_hdr, struct ib_mad_private, 1032 priv = container_of(mad_priv_hdr, struct ib_mad_private,
933 header); 1033 header);
1034 kmem_cache_free(ib_mad_cache, priv);
934 } 1035 }
935
936 /* Free last buffer */
937 kmem_cache_free(ib_mad_cache, priv);
938} 1036}
939EXPORT_SYMBOL(ib_free_recv_mad); 1037EXPORT_SYMBOL(ib_free_recv_mad);
940 1038
941void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc,
942 void *buf)
943{
944 printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n");
945}
946EXPORT_SYMBOL(ib_coalesce_recv_mad);
947
948struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, 1039struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
949 u8 rmpp_version, 1040 u8 rmpp_version,
950 ib_mad_send_handler send_handler, 1041 ib_mad_send_handler send_handler,
@@ -1338,42 +1429,15 @@ out:
1338 return; 1429 return;
1339} 1430}
1340 1431
1341static int response_mad(struct ib_mad *mad)
1342{
1343 /* Trap represses are responses although response bit is reset */
1344 return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
1345 (mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
1346}
1347
1348static int solicited_mad(struct ib_mad *mad)
1349{
1350 /* CM MADs are never solicited */
1351 if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) {
1352 return 0;
1353 }
1354
1355 /* XXX: Determine whether MAD is using RMPP */
1356
1357 /* Not using RMPP */
1358 /* Is this MAD a response to a previous MAD ? */
1359 return response_mad(mad);
1360}
1361
1362static struct ib_mad_agent_private * 1432static struct ib_mad_agent_private *
1363find_mad_agent(struct ib_mad_port_private *port_priv, 1433find_mad_agent(struct ib_mad_port_private *port_priv,
1364 struct ib_mad *mad, 1434 struct ib_mad *mad)
1365 int solicited)
1366{ 1435{
1367 struct ib_mad_agent_private *mad_agent = NULL; 1436 struct ib_mad_agent_private *mad_agent = NULL;
1368 unsigned long flags; 1437 unsigned long flags;
1369 1438
1370 spin_lock_irqsave(&port_priv->reg_lock, flags); 1439 spin_lock_irqsave(&port_priv->reg_lock, flags);
1371 1440 if (response_mad(mad)) {
1372 /*
1373 * Whether MAD was solicited determines type of routing to
1374 * MAD client.
1375 */
1376 if (solicited) {
1377 u32 hi_tid; 1441 u32 hi_tid;
1378 struct ib_mad_agent_private *entry; 1442 struct ib_mad_agent_private *entry;
1379 1443
@@ -1477,21 +1541,20 @@ out:
1477 return valid; 1541 return valid;
1478} 1542}
1479 1543
1480/* 1544static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1481 * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet 1545 struct ib_mad_hdr *mad_hdr)
1482 */
1483static struct ib_mad_private *
1484reassemble_recv(struct ib_mad_agent_private *mad_agent_priv,
1485 struct ib_mad_private *recv)
1486{ 1546{
1487 /* Until we have RMPP, all receives are reassembled!... */ 1547 struct ib_rmpp_mad *rmpp_mad;
1488 INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list); 1548
1489 return recv; 1549 rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1550 return !mad_agent_priv->agent.rmpp_version ||
1551 !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1552 IB_MGMT_RMPP_FLAG_ACTIVE) ||
1553 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1490} 1554}
1491 1555
1492static struct ib_mad_send_wr_private* 1556struct ib_mad_send_wr_private*
1493find_send_req(struct ib_mad_agent_private *mad_agent_priv, 1557ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid)
1494 u64 tid)
1495{ 1558{
1496 struct ib_mad_send_wr_private *mad_send_wr; 1559 struct ib_mad_send_wr_private *mad_send_wr;
1497 1560
@@ -1507,7 +1570,9 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv,
1507 */ 1570 */
1508 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1571 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1509 agent_list) { 1572 agent_list) {
1510 if (mad_send_wr->tid == tid && mad_send_wr->timeout) { 1573 if (is_data_mad(mad_agent_priv,
1574 mad_send_wr->send_wr.wr.ud.mad_hdr) &&
1575 mad_send_wr->tid == tid && mad_send_wr->timeout) {
1511 /* Verify request has not been canceled */ 1576 /* Verify request has not been canceled */
1512 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1577 return (mad_send_wr->status == IB_WC_SUCCESS) ?
1513 mad_send_wr : NULL; 1578 mad_send_wr : NULL;
@@ -1516,43 +1581,55 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv,
1516 return NULL; 1581 return NULL;
1517} 1582}
1518 1583
1584void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1585{
1586 mad_send_wr->timeout = 0;
1587 if (mad_send_wr->refcount == 1) {
1588 list_del(&mad_send_wr->agent_list);
1589 list_add_tail(&mad_send_wr->agent_list,
1590 &mad_send_wr->mad_agent_priv->done_list);
1591 }
1592}
1593
1519static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, 1594static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1520 struct ib_mad_private *recv, 1595 struct ib_mad_recv_wc *mad_recv_wc)
1521 int solicited)
1522{ 1596{
1523 struct ib_mad_send_wr_private *mad_send_wr; 1597 struct ib_mad_send_wr_private *mad_send_wr;
1524 struct ib_mad_send_wc mad_send_wc; 1598 struct ib_mad_send_wc mad_send_wc;
1525 unsigned long flags; 1599 unsigned long flags;
1526 1600 u64 tid;
1527 /* Fully reassemble receive before processing */ 1601
1528 recv = reassemble_recv(mad_agent_priv, recv); 1602 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1529 if (!recv) { 1603 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1530 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 1604 if (mad_agent_priv->agent.rmpp_version) {
1531 wake_up(&mad_agent_priv->wait); 1605 mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1532 return; 1606 mad_recv_wc);
1607 if (!mad_recv_wc) {
1608 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1609 wake_up(&mad_agent_priv->wait);
1610 return;
1611 }
1533 } 1612 }
1534 1613
1535 /* Complete corresponding request */ 1614 /* Complete corresponding request */
1536 if (solicited) { 1615 if (response_mad(mad_recv_wc->recv_buf.mad)) {
1616 tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid;
1537 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1617 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1538 mad_send_wr = find_send_req(mad_agent_priv, 1618 mad_send_wr = ib_find_send_mad(mad_agent_priv, tid);
1539 recv->mad.mad.mad_hdr.tid);
1540 if (!mad_send_wr) { 1619 if (!mad_send_wr) {
1541 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1620 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1542 ib_free_recv_mad(&recv->header.recv_wc); 1621 ib_free_recv_mad(mad_recv_wc);
1543 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 1622 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1544 wake_up(&mad_agent_priv->wait); 1623 wake_up(&mad_agent_priv->wait);
1545 return; 1624 return;
1546 } 1625 }
1547 /* Timeout = 0 means that we won't wait for a response */ 1626 ib_mark_mad_done(mad_send_wr);
1548 mad_send_wr->timeout = 0;
1549 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1627 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1550 1628
1551 /* Defined behavior is to complete response before request */ 1629 /* Defined behavior is to complete response before request */
1552 recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id; 1630 mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
1553 mad_agent_priv->agent.recv_handler( 1631 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1554 &mad_agent_priv->agent, 1632 mad_recv_wc);
1555 &recv->header.recv_wc);
1556 atomic_dec(&mad_agent_priv->refcount); 1633 atomic_dec(&mad_agent_priv->refcount);
1557 1634
1558 mad_send_wc.status = IB_WC_SUCCESS; 1635 mad_send_wc.status = IB_WC_SUCCESS;
@@ -1560,9 +1637,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1560 mad_send_wc.wr_id = mad_send_wr->wr_id; 1637 mad_send_wc.wr_id = mad_send_wr->wr_id;
1561 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 1638 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1562 } else { 1639 } else {
1563 mad_agent_priv->agent.recv_handler( 1640 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1564 &mad_agent_priv->agent, 1641 mad_recv_wc);
1565 &recv->header.recv_wc);
1566 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 1642 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1567 wake_up(&mad_agent_priv->wait); 1643 wake_up(&mad_agent_priv->wait);
1568 } 1644 }
@@ -1576,7 +1652,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1576 struct ib_mad_private *recv, *response; 1652 struct ib_mad_private *recv, *response;
1577 struct ib_mad_list_head *mad_list; 1653 struct ib_mad_list_head *mad_list;
1578 struct ib_mad_agent_private *mad_agent; 1654 struct ib_mad_agent_private *mad_agent;
1579 int solicited;
1580 1655
1581 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); 1656 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1582 if (!response) 1657 if (!response)
@@ -1662,11 +1737,9 @@ local:
1662 } 1737 }
1663 } 1738 }
1664 1739
1665 /* Determine corresponding MAD agent for incoming receive MAD */ 1740 mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
1666 solicited = solicited_mad(&recv->mad.mad);
1667 mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited);
1668 if (mad_agent) { 1741 if (mad_agent) {
1669 ib_mad_complete_recv(mad_agent, recv, solicited); 1742 ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
1670 /* 1743 /*
1671 * recv is freed up in error cases in ib_mad_complete_recv 1744 * recv is freed up in error cases in ib_mad_complete_recv
1672 * or via recv_handler in ib_mad_complete_recv() 1745 * or via recv_handler in ib_mad_complete_recv()
@@ -1710,26 +1783,31 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1710 } 1783 }
1711} 1784}
1712 1785
1713static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv, 1786static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
1714 struct ib_mad_send_wr_private *mad_send_wr )
1715{ 1787{
1788 struct ib_mad_agent_private *mad_agent_priv;
1716 struct ib_mad_send_wr_private *temp_mad_send_wr; 1789 struct ib_mad_send_wr_private *temp_mad_send_wr;
1717 struct list_head *list_item; 1790 struct list_head *list_item;
1718 unsigned long delay; 1791 unsigned long delay;
1719 1792
1793 mad_agent_priv = mad_send_wr->mad_agent_priv;
1720 list_del(&mad_send_wr->agent_list); 1794 list_del(&mad_send_wr->agent_list);
1721 1795
1722 delay = mad_send_wr->timeout; 1796 delay = mad_send_wr->timeout;
1723 mad_send_wr->timeout += jiffies; 1797 mad_send_wr->timeout += jiffies;
1724 1798
1725 list_for_each_prev(list_item, &mad_agent_priv->wait_list) { 1799 if (delay) {
1726 temp_mad_send_wr = list_entry(list_item, 1800 list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
1727 struct ib_mad_send_wr_private, 1801 temp_mad_send_wr = list_entry(list_item,
1728 agent_list); 1802 struct ib_mad_send_wr_private,
1729 if (time_after(mad_send_wr->timeout, 1803 agent_list);
1730 temp_mad_send_wr->timeout)) 1804 if (time_after(mad_send_wr->timeout,
1731 break; 1805 temp_mad_send_wr->timeout))
1806 break;
1807 }
1732 } 1808 }
1809 else
1810 list_item = &mad_agent_priv->wait_list;
1733 list_add(&mad_send_wr->agent_list, list_item); 1811 list_add(&mad_send_wr->agent_list, list_item);
1734 1812
1735 /* Reschedule a work item if we have a shorter timeout */ 1813 /* Reschedule a work item if we have a shorter timeout */
@@ -1740,19 +1818,32 @@ static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv,
1740 } 1818 }
1741} 1819}
1742 1820
1821void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
1822 int timeout_ms)
1823{
1824 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
1825 wait_for_response(mad_send_wr);
1826}
1827
1743/* 1828/*
1744 * Process a send work completion 1829 * Process a send work completion
1745 */ 1830 */
1746static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, 1831void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1747 struct ib_mad_send_wc *mad_send_wc) 1832 struct ib_mad_send_wc *mad_send_wc)
1748{ 1833{
1749 struct ib_mad_agent_private *mad_agent_priv; 1834 struct ib_mad_agent_private *mad_agent_priv;
1750 unsigned long flags; 1835 unsigned long flags;
1836 int ret;
1751 1837
1752 mad_agent_priv = container_of(mad_send_wr->agent, 1838 mad_agent_priv = mad_send_wr->mad_agent_priv;
1753 struct ib_mad_agent_private, agent);
1754
1755 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1839 spin_lock_irqsave(&mad_agent_priv->lock, flags);
1840 if (mad_agent_priv->agent.rmpp_version) {
1841 ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
1842 if (ret == IB_RMPP_RESULT_CONSUMED)
1843 goto done;
1844 } else
1845 ret = IB_RMPP_RESULT_UNHANDLED;
1846
1756 if (mad_send_wc->status != IB_WC_SUCCESS && 1847 if (mad_send_wc->status != IB_WC_SUCCESS &&
1757 mad_send_wr->status == IB_WC_SUCCESS) { 1848 mad_send_wr->status == IB_WC_SUCCESS) {
1758 mad_send_wr->status = mad_send_wc->status; 1849 mad_send_wr->status = mad_send_wc->status;
@@ -1762,10 +1853,9 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1762 if (--mad_send_wr->refcount > 0) { 1853 if (--mad_send_wr->refcount > 0) {
1763 if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && 1854 if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
1764 mad_send_wr->status == IB_WC_SUCCESS) { 1855 mad_send_wr->status == IB_WC_SUCCESS) {
1765 wait_for_response(mad_agent_priv, mad_send_wr); 1856 wait_for_response(mad_send_wr);
1766 } 1857 }
1767 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1858 goto done;
1768 return;
1769 } 1859 }
1770 1860
1771 /* Remove send from MAD agent and notify client of completion */ 1861 /* Remove send from MAD agent and notify client of completion */
@@ -1775,14 +1865,18 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1775 1865
1776 if (mad_send_wr->status != IB_WC_SUCCESS ) 1866 if (mad_send_wr->status != IB_WC_SUCCESS )
1777 mad_send_wc->status = mad_send_wr->status; 1867 mad_send_wc->status = mad_send_wr->status;
1778 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 1868 if (ret != IB_RMPP_RESULT_INTERNAL)
1779 mad_send_wc); 1869 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
1870 mad_send_wc);
1780 1871
1781 /* Release reference on agent taken when sending */ 1872 /* Release reference on agent taken when sending */
1782 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 1873 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1783 wake_up(&mad_agent_priv->wait); 1874 wake_up(&mad_agent_priv->wait);
1784 1875
1785 kfree(mad_send_wr); 1876 kfree(mad_send_wr);
1877 return;
1878done:
1879 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1786} 1880}
1787 1881
1788static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, 1882static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
@@ -1961,6 +2055,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
1961 2055
1962 /* Empty wait list to prevent receives from finding a request */ 2056 /* Empty wait list to prevent receives from finding a request */
1963 list_splice_init(&mad_agent_priv->wait_list, &cancel_list); 2057 list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2058 /* Empty local completion list as well */
2059 list_splice_init(&mad_agent_priv->local_list, &cancel_list);
1964 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2060 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1965 2061
1966 /* Report all cancelled requests */ 2062 /* Report all cancelled requests */
@@ -1980,8 +2076,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
1980} 2076}
1981 2077
1982static struct ib_mad_send_wr_private* 2078static struct ib_mad_send_wr_private*
1983find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, 2079find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
1984 u64 wr_id)
1985{ 2080{
1986 struct ib_mad_send_wr_private *mad_send_wr; 2081 struct ib_mad_send_wr_private *mad_send_wr;
1987 2082
@@ -1993,79 +2088,50 @@ find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv,
1993 2088
1994 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 2089 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1995 agent_list) { 2090 agent_list) {
1996 if (mad_send_wr->wr_id == wr_id) 2091 if (is_data_mad(mad_agent_priv,
2092 mad_send_wr->send_wr.wr.ud.mad_hdr) &&
2093 mad_send_wr->wr_id == wr_id)
1997 return mad_send_wr; 2094 return mad_send_wr;
1998 } 2095 }
1999 return NULL; 2096 return NULL;
2000} 2097}
2001 2098
2002void cancel_sends(void *data) 2099int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
2003{
2004 struct ib_mad_agent_private *mad_agent_priv;
2005 struct ib_mad_send_wr_private *mad_send_wr;
2006 struct ib_mad_send_wc mad_send_wc;
2007 unsigned long flags;
2008
2009 mad_agent_priv = data;
2010
2011 mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2012 mad_send_wc.vendor_err = 0;
2013
2014 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2015 while (!list_empty(&mad_agent_priv->canceled_list)) {
2016 mad_send_wr = list_entry(mad_agent_priv->canceled_list.next,
2017 struct ib_mad_send_wr_private,
2018 agent_list);
2019
2020 list_del(&mad_send_wr->agent_list);
2021 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2022
2023 mad_send_wc.wr_id = mad_send_wr->wr_id;
2024 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2025 &mad_send_wc);
2026
2027 kfree(mad_send_wr);
2028 if (atomic_dec_and_test(&mad_agent_priv->refcount))
2029 wake_up(&mad_agent_priv->wait);
2030 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2031 }
2032 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2033}
2034
2035void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2036 u64 wr_id)
2037{ 2100{
2038 struct ib_mad_agent_private *mad_agent_priv; 2101 struct ib_mad_agent_private *mad_agent_priv;
2039 struct ib_mad_send_wr_private *mad_send_wr; 2102 struct ib_mad_send_wr_private *mad_send_wr;
2040 unsigned long flags; 2103 unsigned long flags;
2104 int active;
2041 2105
2042 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 2106 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2043 agent); 2107 agent);
2044 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2108 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2045 mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); 2109 mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
2046 if (!mad_send_wr) { 2110 if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2047 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2111 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2048 goto out; 2112 return -EINVAL;
2049 } 2113 }
2050 2114
2051 if (mad_send_wr->status == IB_WC_SUCCESS) 2115 active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2052 mad_send_wr->refcount -= (mad_send_wr->timeout > 0); 2116 if (!timeout_ms) {
2053
2054 if (mad_send_wr->refcount != 0) {
2055 mad_send_wr->status = IB_WC_WR_FLUSH_ERR; 2117 mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2056 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2118 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2057 goto out;
2058 } 2119 }
2059 2120
2060 list_del(&mad_send_wr->agent_list); 2121 mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
2061 list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->canceled_list); 2122 if (active)
2062 adjust_timeout(mad_agent_priv); 2123 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2124 else
2125 ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2126
2063 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2127 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2128 return 0;
2129}
2130EXPORT_SYMBOL(ib_modify_mad);
2064 2131
2065 queue_work(mad_agent_priv->qp_info->port_priv->wq, 2132void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
2066 &mad_agent_priv->canceled_work); 2133{
2067out: 2134 ib_modify_mad(mad_agent, wr_id, 0);
2068 return;
2069} 2135}
2070EXPORT_SYMBOL(ib_cancel_mad); 2136EXPORT_SYMBOL(ib_cancel_mad);
2071 2137
@@ -2075,6 +2141,7 @@ static void local_completions(void *data)
2075 struct ib_mad_local_private *local; 2141 struct ib_mad_local_private *local;
2076 struct ib_mad_agent_private *recv_mad_agent; 2142 struct ib_mad_agent_private *recv_mad_agent;
2077 unsigned long flags; 2143 unsigned long flags;
2144 int recv = 0;
2078 struct ib_wc wc; 2145 struct ib_wc wc;
2079 struct ib_mad_send_wc mad_send_wc; 2146 struct ib_mad_send_wc mad_send_wc;
2080 2147
@@ -2090,10 +2157,10 @@ static void local_completions(void *data)
2090 recv_mad_agent = local->recv_mad_agent; 2157 recv_mad_agent = local->recv_mad_agent;
2091 if (!recv_mad_agent) { 2158 if (!recv_mad_agent) {
2092 printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); 2159 printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
2093 kmem_cache_free(ib_mad_cache, local->mad_priv);
2094 goto local_send_completion; 2160 goto local_send_completion;
2095 } 2161 }
2096 2162
2163 recv = 1;
2097 /* 2164 /*
2098 * Defined behavior is to complete response 2165 * Defined behavior is to complete response
2099 * before request 2166 * before request
@@ -2105,7 +2172,9 @@ static void local_completions(void *data)
2105 local->mad_priv->header.recv_wc.wc = &wc; 2172 local->mad_priv->header.recv_wc.wc = &wc;
2106 local->mad_priv->header.recv_wc.mad_len = 2173 local->mad_priv->header.recv_wc.mad_len =
2107 sizeof(struct ib_mad); 2174 sizeof(struct ib_mad);
2108 INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list); 2175 INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2176 list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2177 &local->mad_priv->header.recv_wc.rmpp_list);
2109 local->mad_priv->header.recv_wc.recv_buf.grh = NULL; 2178 local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2110 local->mad_priv->header.recv_wc.recv_buf.mad = 2179 local->mad_priv->header.recv_wc.recv_buf.mad =
2111 &local->mad_priv->mad.mad; 2180 &local->mad_priv->mad.mad;
@@ -2136,11 +2205,47 @@ local_send_completion:
2136 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2205 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2137 list_del(&local->completion_list); 2206 list_del(&local->completion_list);
2138 atomic_dec(&mad_agent_priv->refcount); 2207 atomic_dec(&mad_agent_priv->refcount);
2208 if (!recv)
2209 kmem_cache_free(ib_mad_cache, local->mad_priv);
2139 kfree(local); 2210 kfree(local);
2140 } 2211 }
2141 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2212 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2142} 2213}
2143 2214
2215static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2216{
2217 int ret;
2218
2219 if (!mad_send_wr->retries--)
2220 return -ETIMEDOUT;
2221
2222 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
2223 wr.ud.timeout_ms);
2224
2225 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
2226 ret = ib_retry_rmpp(mad_send_wr);
2227 switch (ret) {
2228 case IB_RMPP_RESULT_UNHANDLED:
2229 ret = ib_send_mad(mad_send_wr);
2230 break;
2231 case IB_RMPP_RESULT_CONSUMED:
2232 ret = 0;
2233 break;
2234 default:
2235 ret = -ECOMM;
2236 break;
2237 }
2238 } else
2239 ret = ib_send_mad(mad_send_wr);
2240
2241 if (!ret) {
2242 mad_send_wr->refcount++;
2243 list_add_tail(&mad_send_wr->agent_list,
2244 &mad_send_wr->mad_agent_priv->send_list);
2245 }
2246 return ret;
2247}
2248
2144static void timeout_sends(void *data) 2249static void timeout_sends(void *data)
2145{ 2250{
2146 struct ib_mad_agent_private *mad_agent_priv; 2251 struct ib_mad_agent_private *mad_agent_priv;
@@ -2149,8 +2254,6 @@ static void timeout_sends(void *data)
2149 unsigned long flags, delay; 2254 unsigned long flags, delay;
2150 2255
2151 mad_agent_priv = (struct ib_mad_agent_private *)data; 2256 mad_agent_priv = (struct ib_mad_agent_private *)data;
2152
2153 mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2154 mad_send_wc.vendor_err = 0; 2257 mad_send_wc.vendor_err = 0;
2155 2258
2156 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2259 spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -2170,8 +2273,16 @@ static void timeout_sends(void *data)
2170 } 2273 }
2171 2274
2172 list_del(&mad_send_wr->agent_list); 2275 list_del(&mad_send_wr->agent_list);
2276 if (mad_send_wr->status == IB_WC_SUCCESS &&
2277 !retry_send(mad_send_wr))
2278 continue;
2279
2173 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2280 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2174 2281
2282 if (mad_send_wr->status == IB_WC_SUCCESS)
2283 mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2284 else
2285 mad_send_wc.status = mad_send_wr->status;
2175 mad_send_wc.wr_id = mad_send_wr->wr_id; 2286 mad_send_wc.wr_id = mad_send_wr->wr_id;
2176 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2287 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2177 &mad_send_wc); 2288 &mad_send_wc);
@@ -2447,14 +2558,6 @@ static int ib_mad_port_open(struct ib_device *device,
2447 unsigned long flags; 2558 unsigned long flags;
2448 char name[sizeof "ib_mad123"]; 2559 char name[sizeof "ib_mad123"];
2449 2560
2450 /* First, check if port already open at MAD layer */
2451 port_priv = ib_get_mad_port(device, port_num);
2452 if (port_priv) {
2453 printk(KERN_DEBUG PFX "%s port %d already open\n",
2454 device->name, port_num);
2455 return 0;
2456 }
2457
2458 /* Create new device info */ 2561 /* Create new device info */
2459 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); 2562 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
2460 if (!port_priv) { 2563 if (!port_priv) {
@@ -2579,7 +2682,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
2579 2682
2580static void ib_mad_init_device(struct ib_device *device) 2683static void ib_mad_init_device(struct ib_device *device)
2581{ 2684{
2582 int ret, num_ports, cur_port, i, ret2; 2685 int num_ports, cur_port, i;
2583 2686
2584 if (device->node_type == IB_NODE_SWITCH) { 2687 if (device->node_type == IB_NODE_SWITCH) {
2585 num_ports = 1; 2688 num_ports = 1;
@@ -2589,47 +2692,37 @@ static void ib_mad_init_device(struct ib_device *device)
2589 cur_port = 1; 2692 cur_port = 1;
2590 } 2693 }
2591 for (i = 0; i < num_ports; i++, cur_port++) { 2694 for (i = 0; i < num_ports; i++, cur_port++) {
2592 ret = ib_mad_port_open(device, cur_port); 2695 if (ib_mad_port_open(device, cur_port)) {
2593 if (ret) {
2594 printk(KERN_ERR PFX "Couldn't open %s port %d\n", 2696 printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2595 device->name, cur_port); 2697 device->name, cur_port);
2596 goto error_device_open; 2698 goto error_device_open;
2597 } 2699 }
2598 ret = ib_agent_port_open(device, cur_port); 2700 if (ib_agent_port_open(device, cur_port)) {
2599 if (ret) {
2600 printk(KERN_ERR PFX "Couldn't open %s port %d " 2701 printk(KERN_ERR PFX "Couldn't open %s port %d "
2601 "for agents\n", 2702 "for agents\n",
2602 device->name, cur_port); 2703 device->name, cur_port);
2603 goto error_device_open; 2704 goto error_device_open;
2604 } 2705 }
2605 } 2706 }
2606 2707 return;
2607 goto error_device_query;
2608 2708
2609error_device_open: 2709error_device_open:
2610 while (i > 0) { 2710 while (i > 0) {
2611 cur_port--; 2711 cur_port--;
2612 ret2 = ib_agent_port_close(device, cur_port); 2712 if (ib_agent_port_close(device, cur_port))
2613 if (ret2) {
2614 printk(KERN_ERR PFX "Couldn't close %s port %d " 2713 printk(KERN_ERR PFX "Couldn't close %s port %d "
2615 "for agents\n", 2714 "for agents\n",
2616 device->name, cur_port); 2715 device->name, cur_port);
2617 } 2716 if (ib_mad_port_close(device, cur_port))
2618 ret2 = ib_mad_port_close(device, cur_port);
2619 if (ret2) {
2620 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 2717 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2621 device->name, cur_port); 2718 device->name, cur_port);
2622 }
2623 i--; 2719 i--;
2624 } 2720 }
2625
2626error_device_query:
2627 return;
2628} 2721}
2629 2722
2630static void ib_mad_remove_device(struct ib_device *device) 2723static void ib_mad_remove_device(struct ib_device *device)
2631{ 2724{
2632 int ret = 0, i, num_ports, cur_port, ret2; 2725 int i, num_ports, cur_port;
2633 2726
2634 if (device->node_type == IB_NODE_SWITCH) { 2727 if (device->node_type == IB_NODE_SWITCH) {
2635 num_ports = 1; 2728 num_ports = 1;
@@ -2639,21 +2732,13 @@ static void ib_mad_remove_device(struct ib_device *device)
2639 cur_port = 1; 2732 cur_port = 1;
2640 } 2733 }
2641 for (i = 0; i < num_ports; i++, cur_port++) { 2734 for (i = 0; i < num_ports; i++, cur_port++) {
2642 ret2 = ib_agent_port_close(device, cur_port); 2735 if (ib_agent_port_close(device, cur_port))
2643 if (ret2) {
2644 printk(KERN_ERR PFX "Couldn't close %s port %d " 2736 printk(KERN_ERR PFX "Couldn't close %s port %d "
2645 "for agents\n", 2737 "for agents\n",
2646 device->name, cur_port); 2738 device->name, cur_port);
2647 if (!ret) 2739 if (ib_mad_port_close(device, cur_port))
2648 ret = ret2;
2649 }
2650 ret2 = ib_mad_port_close(device, cur_port);
2651 if (ret2) {
2652 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 2740 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2653 device->name, cur_port); 2741 device->name, cur_port);
2654 if (!ret)
2655 ret = ret2;
2656 }
2657 } 2742 }
2658} 2743}
2659 2744
@@ -2709,3 +2794,4 @@ static void __exit ib_mad_cleanup_module(void)
2709 2794
2710module_init(ib_mad_init_module); 2795module_init(ib_mad_init_module);
2711module_exit(ib_mad_cleanup_module); 2796module_exit(ib_mad_cleanup_module);
2797
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 008cbcb94b15..568da10b05ab 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved. 2 * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +31,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 32 * SOFTWARE.
31 * 33 *
32 * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $ 34 * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $
33 */ 35 */
34 36
35#ifndef __IB_MAD_PRIV_H__ 37#ifndef __IB_MAD_PRIV_H__
@@ -92,16 +94,15 @@ struct ib_mad_agent_private {
92 spinlock_t lock; 94 spinlock_t lock;
93 struct list_head send_list; 95 struct list_head send_list;
94 struct list_head wait_list; 96 struct list_head wait_list;
97 struct list_head done_list;
95 struct work_struct timed_work; 98 struct work_struct timed_work;
96 unsigned long timeout; 99 unsigned long timeout;
97 struct list_head local_list; 100 struct list_head local_list;
98 struct work_struct local_work; 101 struct work_struct local_work;
99 struct list_head canceled_list; 102 struct list_head rmpp_list;
100 struct work_struct canceled_work;
101 103
102 atomic_t refcount; 104 atomic_t refcount;
103 wait_queue_head_t wait; 105 wait_queue_head_t wait;
104 u8 rmpp_version;
105}; 106};
106 107
107struct ib_mad_snoop_private { 108struct ib_mad_snoop_private {
@@ -116,15 +117,24 @@ struct ib_mad_snoop_private {
116struct ib_mad_send_wr_private { 117struct ib_mad_send_wr_private {
117 struct ib_mad_list_head mad_list; 118 struct ib_mad_list_head mad_list;
118 struct list_head agent_list; 119 struct list_head agent_list;
119 struct ib_mad_agent *agent; 120 struct ib_mad_agent_private *mad_agent_priv;
120 struct ib_send_wr send_wr; 121 struct ib_send_wr send_wr;
121 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 122 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
122 u64 wr_id; /* client WR ID */ 123 u64 wr_id; /* client WR ID */
123 u64 tid; 124 u64 tid;
124 unsigned long timeout; 125 unsigned long timeout;
126 int retries;
125 int retry; 127 int retry;
126 int refcount; 128 int refcount;
127 enum ib_wc_status status; 129 enum ib_wc_status status;
130
131 /* RMPP control */
132 int last_ack;
133 int seg_num;
134 int newwin;
135 int total_seg;
136 int data_offset;
137 int pad;
128}; 138};
129 139
130struct ib_mad_local_private { 140struct ib_mad_local_private {
@@ -197,4 +207,17 @@ struct ib_mad_port_private {
197 207
198extern kmem_cache_t *ib_mad_cache; 208extern kmem_cache_t *ib_mad_cache;
199 209
210int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
211
212struct ib_mad_send_wr_private *
213ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid);
214
215void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
216 struct ib_mad_send_wc *mad_send_wc);
217
218void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
219
220void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
221 int timeout_ms);
222
200#endif /* __IB_MAD_PRIV_H__ */ 223#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
new file mode 100644
index 000000000000..8f1eb80e421f
--- /dev/null
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -0,0 +1,765 @@
1/*
2 * Copyright (c) 2005 Intel Inc. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
34 */
35
36#include <linux/dma-mapping.h>
37
38#include "mad_priv.h"
39#include "mad_rmpp.h"
40
41enum rmpp_state {
42 RMPP_STATE_ACTIVE,
43 RMPP_STATE_TIMEOUT,
44 RMPP_STATE_COMPLETE
45};
46
47struct mad_rmpp_recv {
48 struct ib_mad_agent_private *agent;
49 struct list_head list;
50 struct work_struct timeout_work;
51 struct work_struct cleanup_work;
52 wait_queue_head_t wait;
53 enum rmpp_state state;
54 spinlock_t lock;
55 atomic_t refcount;
56
57 struct ib_ah *ah;
58 struct ib_mad_recv_wc *rmpp_wc;
59 struct ib_mad_recv_buf *cur_seg_buf;
60 int last_ack;
61 int seg_num;
62 int newwin;
63
64 u64 tid;
65 u32 src_qp;
66 u16 slid;
67 u8 mgmt_class;
68 u8 class_version;
69 u8 method;
70};
71
72static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
73{
74 atomic_dec(&rmpp_recv->refcount);
75 wait_event(rmpp_recv->wait, !atomic_read(&rmpp_recv->refcount));
76 ib_destroy_ah(rmpp_recv->ah);
77 kfree(rmpp_recv);
78}
79
80void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
81{
82 struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv;
83 unsigned long flags;
84
85 spin_lock_irqsave(&agent->lock, flags);
86 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
87 cancel_delayed_work(&rmpp_recv->timeout_work);
88 cancel_delayed_work(&rmpp_recv->cleanup_work);
89 }
90 spin_unlock_irqrestore(&agent->lock, flags);
91
92 flush_workqueue(agent->qp_info->port_priv->wq);
93
94 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
95 &agent->rmpp_list, list) {
96 list_del(&rmpp_recv->list);
97 if (rmpp_recv->state != RMPP_STATE_COMPLETE)
98 ib_free_recv_mad(rmpp_recv->rmpp_wc);
99 destroy_rmpp_recv(rmpp_recv);
100 }
101}
102
103static void recv_timeout_handler(void *data)
104{
105 struct mad_rmpp_recv *rmpp_recv = data;
106 struct ib_mad_recv_wc *rmpp_wc;
107 unsigned long flags;
108
109 spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
110 if (rmpp_recv->state != RMPP_STATE_ACTIVE) {
111 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
112 return;
113 }
114 rmpp_recv->state = RMPP_STATE_TIMEOUT;
115 list_del(&rmpp_recv->list);
116 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
117
118 /* TODO: send abort. */
119 rmpp_wc = rmpp_recv->rmpp_wc;
120 destroy_rmpp_recv(rmpp_recv);
121 ib_free_recv_mad(rmpp_wc);
122}
123
124static void recv_cleanup_handler(void *data)
125{
126 struct mad_rmpp_recv *rmpp_recv = data;
127 unsigned long flags;
128
129 spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
130 list_del(&rmpp_recv->list);
131 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
132 destroy_rmpp_recv(rmpp_recv);
133}
134
135static struct mad_rmpp_recv *
136create_rmpp_recv(struct ib_mad_agent_private *agent,
137 struct ib_mad_recv_wc *mad_recv_wc)
138{
139 struct mad_rmpp_recv *rmpp_recv;
140 struct ib_mad_hdr *mad_hdr;
141
142 rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL);
143 if (!rmpp_recv)
144 return NULL;
145
146 rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd,
147 mad_recv_wc->wc,
148 mad_recv_wc->recv_buf.grh,
149 agent->agent.port_num);
150 if (IS_ERR(rmpp_recv->ah))
151 goto error;
152
153 rmpp_recv->agent = agent;
154 init_waitqueue_head(&rmpp_recv->wait);
155 INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv);
156 INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv);
157 spin_lock_init(&rmpp_recv->lock);
158 rmpp_recv->state = RMPP_STATE_ACTIVE;
159 atomic_set(&rmpp_recv->refcount, 1);
160
161 rmpp_recv->rmpp_wc = mad_recv_wc;
162 rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
163 rmpp_recv->newwin = 1;
164 rmpp_recv->seg_num = 1;
165 rmpp_recv->last_ack = 0;
166
167 mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
168 rmpp_recv->tid = mad_hdr->tid;
169 rmpp_recv->src_qp = mad_recv_wc->wc->src_qp;
170 rmpp_recv->slid = mad_recv_wc->wc->slid;
171 rmpp_recv->mgmt_class = mad_hdr->mgmt_class;
172 rmpp_recv->class_version = mad_hdr->class_version;
173 rmpp_recv->method = mad_hdr->method;
174 return rmpp_recv;
175
176error: kfree(rmpp_recv);
177 return NULL;
178}
179
180static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
181{
182 if (atomic_dec_and_test(&rmpp_recv->refcount))
183 wake_up(&rmpp_recv->wait);
184}
185
186static struct mad_rmpp_recv *
187find_rmpp_recv(struct ib_mad_agent_private *agent,
188 struct ib_mad_recv_wc *mad_recv_wc)
189{
190 struct mad_rmpp_recv *rmpp_recv;
191 struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
192
193 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
194 if (rmpp_recv->tid == mad_hdr->tid &&
195 rmpp_recv->src_qp == mad_recv_wc->wc->src_qp &&
196 rmpp_recv->slid == mad_recv_wc->wc->slid &&
197 rmpp_recv->mgmt_class == mad_hdr->mgmt_class &&
198 rmpp_recv->class_version == mad_hdr->class_version &&
199 rmpp_recv->method == mad_hdr->method)
200 return rmpp_recv;
201 }
202 return NULL;
203}
204
205static struct mad_rmpp_recv *
206acquire_rmpp_recv(struct ib_mad_agent_private *agent,
207 struct ib_mad_recv_wc *mad_recv_wc)
208{
209 struct mad_rmpp_recv *rmpp_recv;
210 unsigned long flags;
211
212 spin_lock_irqsave(&agent->lock, flags);
213 rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
214 if (rmpp_recv)
215 atomic_inc(&rmpp_recv->refcount);
216 spin_unlock_irqrestore(&agent->lock, flags);
217 return rmpp_recv;
218}
219
220static struct mad_rmpp_recv *
221insert_rmpp_recv(struct ib_mad_agent_private *agent,
222 struct mad_rmpp_recv *rmpp_recv)
223{
224 struct mad_rmpp_recv *cur_rmpp_recv;
225
226 cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc);
227 if (!cur_rmpp_recv)
228 list_add_tail(&rmpp_recv->list, &agent->rmpp_list);
229
230 return cur_rmpp_recv;
231}
232
233static int data_offset(u8 mgmt_class)
234{
235 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
236 return offsetof(struct ib_sa_mad, data);
237 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
238 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
239 return offsetof(struct ib_vendor_mad, data);
240 else
241 return offsetof(struct ib_rmpp_mad, data);
242}
243
244static void format_ack(struct ib_rmpp_mad *ack,
245 struct ib_rmpp_mad *data,
246 struct mad_rmpp_recv *rmpp_recv)
247{
248 unsigned long flags;
249
250 memcpy(&ack->mad_hdr, &data->mad_hdr,
251 data_offset(data->mad_hdr.mgmt_class));
252
253 ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
254 ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
255 ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
256
257 spin_lock_irqsave(&rmpp_recv->lock, flags);
258 rmpp_recv->last_ack = rmpp_recv->seg_num;
259 ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
260 ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
261 spin_unlock_irqrestore(&rmpp_recv->lock, flags);
262}
263
264static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
265 struct ib_mad_recv_wc *recv_wc)
266{
267 struct ib_mad_send_buf *msg;
268 struct ib_send_wr *bad_send_wr;
269 int hdr_len, ret;
270
271 hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
272 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
273 recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
274 hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
275 GFP_KERNEL);
276 if (!msg)
277 return;
278
279 format_ack((struct ib_rmpp_mad *) msg->mad,
280 (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
281 ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
282 &bad_send_wr);
283 if (ret)
284 ib_free_send_mad(msg);
285}
286
287static inline int get_last_flag(struct ib_mad_recv_buf *seg)
288{
289 struct ib_rmpp_mad *rmpp_mad;
290
291 rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
292 return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST;
293}
294
295static inline int get_seg_num(struct ib_mad_recv_buf *seg)
296{
297 struct ib_rmpp_mad *rmpp_mad;
298
299 rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
300 return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
301}
302
303static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
304 struct ib_mad_recv_buf *seg)
305{
306 if (seg->list.next == rmpp_list)
307 return NULL;
308
309 return container_of(seg->list.next, struct ib_mad_recv_buf, list);
310}
311
312static inline int window_size(struct ib_mad_agent_private *agent)
313{
314 return max(agent->qp_info->recv_queue.max_active >> 3, 1);
315}
316
317static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
318 int seg_num)
319{
320 struct ib_mad_recv_buf *seg_buf;
321 int cur_seg_num;
322
323 list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
324 cur_seg_num = get_seg_num(seg_buf);
325 if (seg_num > cur_seg_num)
326 return seg_buf;
327 if (seg_num == cur_seg_num)
328 break;
329 }
330 return NULL;
331}
332
333static void update_seg_num(struct mad_rmpp_recv *rmpp_recv,
334 struct ib_mad_recv_buf *new_buf)
335{
336 struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list;
337
338 while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) {
339 rmpp_recv->cur_seg_buf = new_buf;
340 rmpp_recv->seg_num++;
341 new_buf = get_next_seg(rmpp_list, new_buf);
342 }
343}
344
345static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
346{
347 struct ib_rmpp_mad *rmpp_mad;
348 int hdr_size, data_size, pad;
349
350 rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
351
352 hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class);
353 data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
354 pad = data_size - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
355 if (pad > data_size || pad < 0)
356 pad = 0;
357
358 return hdr_size + rmpp_recv->seg_num * data_size - pad;
359}
360
361static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
362{
363 struct ib_mad_recv_wc *rmpp_wc;
364
365 ack_recv(rmpp_recv, rmpp_recv->rmpp_wc);
366 if (rmpp_recv->seg_num > 1)
367 cancel_delayed_work(&rmpp_recv->timeout_work);
368
369 rmpp_wc = rmpp_recv->rmpp_wc;
370 rmpp_wc->mad_len = get_mad_len(rmpp_recv);
371 /* 10 seconds until we can find the packet lifetime */
372 queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq,
373 &rmpp_recv->cleanup_work, msecs_to_jiffies(10000));
374 return rmpp_wc;
375}
376
377void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf)
378{
379 struct ib_mad_recv_buf *seg_buf;
380 struct ib_rmpp_mad *rmpp_mad;
381 void *data;
382 int size, len, offset;
383 u8 flags;
384
385 len = mad_recv_wc->mad_len;
386 if (len <= sizeof(struct ib_mad)) {
387 memcpy(buf, mad_recv_wc->recv_buf.mad, len);
388 return;
389 }
390
391 offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
392
393 list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) {
394 rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad;
395 flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr);
396
397 if (flags & IB_MGMT_RMPP_FLAG_FIRST) {
398 data = rmpp_mad;
399 size = sizeof(*rmpp_mad);
400 } else {
401 data = (void *) rmpp_mad + offset;
402 if (flags & IB_MGMT_RMPP_FLAG_LAST)
403 size = len;
404 else
405 size = sizeof(*rmpp_mad) - offset;
406 }
407
408 memcpy(buf, data, size);
409 len -= size;
410 buf += size;
411 }
412}
413EXPORT_SYMBOL(ib_coalesce_recv_mad);
414
415static struct ib_mad_recv_wc *
416continue_rmpp(struct ib_mad_agent_private *agent,
417 struct ib_mad_recv_wc *mad_recv_wc)
418{
419 struct mad_rmpp_recv *rmpp_recv;
420 struct ib_mad_recv_buf *prev_buf;
421 struct ib_mad_recv_wc *done_wc;
422 int seg_num;
423 unsigned long flags;
424
425 rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc);
426 if (!rmpp_recv)
427 goto drop1;
428
429 seg_num = get_seg_num(&mad_recv_wc->recv_buf);
430
431 spin_lock_irqsave(&rmpp_recv->lock, flags);
432 if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) ||
433 (seg_num > rmpp_recv->newwin))
434 goto drop3;
435
436 if ((seg_num <= rmpp_recv->last_ack) ||
437 (rmpp_recv->state == RMPP_STATE_COMPLETE)) {
438 spin_unlock_irqrestore(&rmpp_recv->lock, flags);
439 ack_recv(rmpp_recv, mad_recv_wc);
440 goto drop2;
441 }
442
443 prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num);
444 if (!prev_buf)
445 goto drop3;
446
447 done_wc = NULL;
448 list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list);
449 if (rmpp_recv->cur_seg_buf == prev_buf) {
450 update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf);
451 if (get_last_flag(rmpp_recv->cur_seg_buf)) {
452 rmpp_recv->state = RMPP_STATE_COMPLETE;
453 spin_unlock_irqrestore(&rmpp_recv->lock, flags);
454 done_wc = complete_rmpp(rmpp_recv);
455 goto out;
456 } else if (rmpp_recv->seg_num == rmpp_recv->newwin) {
457 rmpp_recv->newwin += window_size(agent);
458 spin_unlock_irqrestore(&rmpp_recv->lock, flags);
459 ack_recv(rmpp_recv, mad_recv_wc);
460 goto out;
461 }
462 }
463 spin_unlock_irqrestore(&rmpp_recv->lock, flags);
464out:
465 deref_rmpp_recv(rmpp_recv);
466 return done_wc;
467
468drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags);
469drop2: deref_rmpp_recv(rmpp_recv);
470drop1: ib_free_recv_mad(mad_recv_wc);
471 return NULL;
472}
473
474static struct ib_mad_recv_wc *
475start_rmpp(struct ib_mad_agent_private *agent,
476 struct ib_mad_recv_wc *mad_recv_wc)
477{
478 struct mad_rmpp_recv *rmpp_recv;
479 unsigned long flags;
480
481 rmpp_recv = create_rmpp_recv(agent, mad_recv_wc);
482 if (!rmpp_recv) {
483 ib_free_recv_mad(mad_recv_wc);
484 return NULL;
485 }
486
487 spin_lock_irqsave(&agent->lock, flags);
488 if (insert_rmpp_recv(agent, rmpp_recv)) {
489 spin_unlock_irqrestore(&agent->lock, flags);
490 /* duplicate first MAD */
491 destroy_rmpp_recv(rmpp_recv);
492 return continue_rmpp(agent, mad_recv_wc);
493 }
494 atomic_inc(&rmpp_recv->refcount);
495
496 if (get_last_flag(&mad_recv_wc->recv_buf)) {
497 rmpp_recv->state = RMPP_STATE_COMPLETE;
498 spin_unlock_irqrestore(&agent->lock, flags);
499 complete_rmpp(rmpp_recv);
500 } else {
501 spin_unlock_irqrestore(&agent->lock, flags);
502 /* 40 seconds until we can find the packet lifetimes */
503 queue_delayed_work(agent->qp_info->port_priv->wq,
504 &rmpp_recv->timeout_work,
505 msecs_to_jiffies(40000));
506 rmpp_recv->newwin += window_size(agent);
507 ack_recv(rmpp_recv, mad_recv_wc);
508 mad_recv_wc = NULL;
509 }
510 deref_rmpp_recv(rmpp_recv);
511 return mad_recv_wc;
512}
513
514static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
515{
516 return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
517 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
518 (mad_send_wr->seg_num - 1);
519}
520
521static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
522{
523 struct ib_rmpp_mad *rmpp_mad;
524 int timeout;
525
526 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
527 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
528 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
529
530 if (mad_send_wr->seg_num == 1) {
531 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
532 rmpp_mad->rmpp_hdr.paylen_newwin =
533 cpu_to_be32(mad_send_wr->total_seg *
534 (sizeof(struct ib_rmpp_mad) -
535 offsetof(struct ib_rmpp_mad, data)));
536 mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
537 } else {
538 mad_send_wr->send_wr.num_sge = 2;
539 mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
540 mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
541 mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
542 mad_send_wr->data_offset;
543 mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
544 }
545
546 if (mad_send_wr->seg_num == mad_send_wr->total_seg) {
547 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
548 rmpp_mad->rmpp_hdr.paylen_newwin =
549 cpu_to_be32(sizeof(struct ib_rmpp_mad) -
550 offsetof(struct ib_rmpp_mad, data) -
551 mad_send_wr->pad);
552 }
553
554 /* 2 seconds for an ACK until we can find the packet lifetime */
555 timeout = mad_send_wr->send_wr.wr.ud.timeout_ms;
556 if (!timeout || timeout > 2000)
557 mad_send_wr->timeout = msecs_to_jiffies(2000);
558 mad_send_wr->seg_num++;
559 return ib_send_mad(mad_send_wr);
560}
561
562static void process_rmpp_ack(struct ib_mad_agent_private *agent,
563 struct ib_mad_recv_wc *mad_recv_wc)
564{
565 struct ib_mad_send_wr_private *mad_send_wr;
566 struct ib_rmpp_mad *rmpp_mad;
567 unsigned long flags;
568 int seg_num, newwin, ret;
569
570 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
571 if (rmpp_mad->rmpp_hdr.rmpp_status)
572 return;
573
574 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
575 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
576
577 spin_lock_irqsave(&agent->lock, flags);
578 mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid);
579 if (!mad_send_wr)
580 goto out; /* Unmatched ACK */
581
582 if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
583 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
584 goto out; /* Send is already done */
585
586 if (seg_num > mad_send_wr->total_seg)
587 goto out; /* Bad ACK */
588
589 if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack)
590 goto out; /* Old ACK */
591
592 if (seg_num > mad_send_wr->last_ack) {
593 mad_send_wr->last_ack = seg_num;
594 mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
595 }
596 mad_send_wr->newwin = newwin;
597 if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
598 /* If no response is expected, the ACK completes the send */
599 if (!mad_send_wr->send_wr.wr.ud.timeout_ms) {
600 struct ib_mad_send_wc wc;
601
602 ib_mark_mad_done(mad_send_wr);
603 spin_unlock_irqrestore(&agent->lock, flags);
604
605 wc.status = IB_WC_SUCCESS;
606 wc.vendor_err = 0;
607 wc.wr_id = mad_send_wr->wr_id;
608 ib_mad_complete_send_wr(mad_send_wr, &wc);
609 return;
610 }
611 if (mad_send_wr->refcount == 1)
612 ib_reset_mad_timeout(mad_send_wr, mad_send_wr->
613 send_wr.wr.ud.timeout_ms);
614 } else if (mad_send_wr->refcount == 1 &&
615 mad_send_wr->seg_num < mad_send_wr->newwin &&
616 mad_send_wr->seg_num <= mad_send_wr->total_seg) {
617 /* Send failure will just result in a timeout/retry */
618 ret = send_next_seg(mad_send_wr);
619 if (ret)
620 goto out;
621
622 mad_send_wr->refcount++;
623 list_del(&mad_send_wr->agent_list);
624 list_add_tail(&mad_send_wr->agent_list,
625 &mad_send_wr->mad_agent_priv->send_list);
626 }
627out:
628 spin_unlock_irqrestore(&agent->lock, flags);
629}
630
631struct ib_mad_recv_wc *
632ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
633 struct ib_mad_recv_wc *mad_recv_wc)
634{
635 struct ib_rmpp_mad *rmpp_mad;
636
637 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
638 if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE))
639 return mad_recv_wc;
640
641 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION)
642 goto out;
643
644 switch (rmpp_mad->rmpp_hdr.rmpp_type) {
645 case IB_MGMT_RMPP_TYPE_DATA:
646 if (rmpp_mad->rmpp_hdr.seg_num == __constant_htonl(1))
647 return start_rmpp(agent, mad_recv_wc);
648 else
649 return continue_rmpp(agent, mad_recv_wc);
650 case IB_MGMT_RMPP_TYPE_ACK:
651 process_rmpp_ack(agent, mad_recv_wc);
652 break;
653 case IB_MGMT_RMPP_TYPE_STOP:
654 case IB_MGMT_RMPP_TYPE_ABORT:
655 /* TODO: process_rmpp_nack(agent, mad_recv_wc); */
656 break;
657 default:
658 break;
659 }
660out:
661 ib_free_recv_mad(mad_recv_wc);
662 return NULL;
663}
664
665int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
666{
667 struct ib_rmpp_mad *rmpp_mad;
668 int i, total_len, ret;
669
670 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
671 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
672 IB_MGMT_RMPP_FLAG_ACTIVE))
673 return IB_RMPP_RESULT_UNHANDLED;
674
675 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
676 return IB_RMPP_RESULT_INTERNAL;
677
678 if (mad_send_wr->send_wr.num_sge > 1)
679 return -EINVAL; /* TODO: support num_sge > 1 */
680
681 mad_send_wr->seg_num = 1;
682 mad_send_wr->newwin = 1;
683 mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
684
685 total_len = 0;
686 for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
687 total_len += mad_send_wr->send_wr.sg_list[i].length;
688
689 mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
690 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
691 mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) -
692 be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
693
694 /* We need to wait for the final ACK even if there isn't a response */
695 mad_send_wr->refcount += (mad_send_wr->timeout == 0);
696 ret = send_next_seg(mad_send_wr);
697 if (!ret)
698 return IB_RMPP_RESULT_CONSUMED;
699 return ret;
700}
701
702int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
703 struct ib_mad_send_wc *mad_send_wc)
704{
705 struct ib_rmpp_mad *rmpp_mad;
706 struct ib_mad_send_buf *msg;
707 int ret;
708
709 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
710 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
711 IB_MGMT_RMPP_FLAG_ACTIVE))
712 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
713
714 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
715 msg = (struct ib_mad_send_buf *) (unsigned long)
716 mad_send_wc->wr_id;
717 ib_free_send_mad(msg);
718 return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
719 }
720
721 if (mad_send_wc->status != IB_WC_SUCCESS ||
722 mad_send_wr->status != IB_WC_SUCCESS)
723 return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
724
725 if (!mad_send_wr->timeout)
726 return IB_RMPP_RESULT_PROCESSED; /* Response received */
727
728 if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
729 mad_send_wr->timeout =
730 msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms);
731 return IB_RMPP_RESULT_PROCESSED; /* Send done */
732 }
733
734 if (mad_send_wr->seg_num > mad_send_wr->newwin ||
735 mad_send_wr->seg_num > mad_send_wr->total_seg)
736 return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
737
738 ret = send_next_seg(mad_send_wr);
739 if (ret) {
740 mad_send_wc->status = IB_WC_GENERAL_ERR;
741 return IB_RMPP_RESULT_PROCESSED;
742 }
743 return IB_RMPP_RESULT_CONSUMED;
744}
745
746int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
747{
748 struct ib_rmpp_mad *rmpp_mad;
749 int ret;
750
751 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
752 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
753 IB_MGMT_RMPP_FLAG_ACTIVE))
754 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
755
756 if (mad_send_wr->last_ack == mad_send_wr->total_seg)
757 return IB_RMPP_RESULT_PROCESSED;
758
759 mad_send_wr->seg_num = mad_send_wr->last_ack + 1;
760 ret = send_next_seg(mad_send_wr);
761 if (ret)
762 return IB_RMPP_RESULT_PROCESSED;
763
764 return IB_RMPP_RESULT_CONSUMED;
765}
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
new file mode 100644
index 000000000000..c4924dfb8e75
--- /dev/null
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -0,0 +1,58 @@
1/*
2 * Copyright (c) 2005 Intel Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $
33 */
34
35#ifndef __MAD_RMPP_H__
36#define __MAD_RMPP_H__
37
38enum {
39 IB_RMPP_RESULT_PROCESSED,
40 IB_RMPP_RESULT_CONSUMED,
41 IB_RMPP_RESULT_INTERNAL,
42 IB_RMPP_RESULT_UNHANDLED
43};
44
45int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr);
46
47struct ib_mad_recv_wc *
48ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
49 struct ib_mad_recv_wc *mad_recv_wc);
50
51int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
52 struct ib_mad_send_wc *mad_send_wc);
53
54void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
55
56int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
57
58#endif /* __MAD_RMPP_H__ */
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 5f15feffeae2..eb5ff54c10d7 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -96,7 +96,7 @@ void ib_pack(const struct ib_field *desc,
96 else 96 else
97 val = 0; 97 val = 0;
98 98
99 mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift); 99 mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
100 addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); 100 addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
101 *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); 101 *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
102 } else { 102 } else {
@@ -176,7 +176,7 @@ void ib_unpack(const struct ib_field *desc,
176 __be64 *addr; 176 __be64 *addr;
177 177
178 shift = 64 - desc[i].offset_bits - desc[i].size_bits; 178 shift = 64 - desc[i].offset_bits - desc[i].size_bits;
179 mask = ((1ull << desc[i].size_bits) - 1) << shift; 179 mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
180 addr = (__be64 *) buf + desc[i].offset_words; 180 addr = (__be64 *) buf + desc[i].offset_words;
181 val = (be64_to_cpup(addr) & mask) >> shift; 181 val = (be64_to_cpup(addr) & mask) >> shift;
182 value_write(desc[i].struct_offset_bytes, 182 value_write(desc[i].struct_offset_bytes,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 276e1a53010d..795184931c83 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +30,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 31 * SOFTWARE.
31 * 32 *
32 * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $ 33 * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
33 */ 34 */
34 35
35#include <linux/module.h> 36#include <linux/module.h>
@@ -50,26 +51,6 @@ MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("InfiniBand subnet administration query support"); 51MODULE_DESCRIPTION("InfiniBand subnet administration query support");
51MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
52 53
53/*
54 * These two structures must be packed because they have 64-bit fields
55 * that are only 32-bit aligned. 64-bit architectures will lay them
56 * out wrong otherwise. (And unfortunately they are sent on the wire
57 * so we can't change the layout)
58 */
59struct ib_sa_hdr {
60 u64 sm_key;
61 u16 attr_offset;
62 u16 reserved;
63 ib_sa_comp_mask comp_mask;
64} __attribute__ ((packed));
65
66struct ib_sa_mad {
67 struct ib_mad_hdr mad_hdr;
68 struct ib_rmpp_hdr rmpp_hdr;
69 struct ib_sa_hdr sa_hdr;
70 u8 data[200];
71} __attribute__ ((packed));
72
73struct ib_sa_sm_ah { 54struct ib_sa_sm_ah {
74 struct ib_ah *ah; 55 struct ib_ah *ah;
75 struct kref ref; 56 struct kref ref;
@@ -77,7 +58,6 @@ struct ib_sa_sm_ah {
77 58
78struct ib_sa_port { 59struct ib_sa_port {
79 struct ib_mad_agent *agent; 60 struct ib_mad_agent *agent;
80 struct ib_mr *mr;
81 struct ib_sa_sm_ah *sm_ah; 61 struct ib_sa_sm_ah *sm_ah;
82 struct work_struct update_task; 62 struct work_struct update_task;
83 spinlock_t ah_lock; 63 spinlock_t ah_lock;
@@ -100,6 +80,12 @@ struct ib_sa_query {
100 int id; 80 int id;
101}; 81};
102 82
83struct ib_sa_service_query {
84 void (*callback)(int, struct ib_sa_service_rec *, void *);
85 void *context;
86 struct ib_sa_query sa_query;
87};
88
103struct ib_sa_path_query { 89struct ib_sa_path_query {
104 void (*callback)(int, struct ib_sa_path_rec *, void *); 90 void (*callback)(int, struct ib_sa_path_rec *, void *);
105 void *context; 91 void *context;
@@ -341,6 +327,54 @@ static const struct ib_field mcmember_rec_table[] = {
341 .size_bits = 23 }, 327 .size_bits = 23 },
342}; 328};
343 329
330#define SERVICE_REC_FIELD(field) \
331 .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
332 .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
333 .field_name = "sa_service_rec:" #field
334
335static const struct ib_field service_rec_table[] = {
336 { SERVICE_REC_FIELD(id),
337 .offset_words = 0,
338 .offset_bits = 0,
339 .size_bits = 64 },
340 { SERVICE_REC_FIELD(gid),
341 .offset_words = 2,
342 .offset_bits = 0,
343 .size_bits = 128 },
344 { SERVICE_REC_FIELD(pkey),
345 .offset_words = 6,
346 .offset_bits = 0,
347 .size_bits = 16 },
348 { SERVICE_REC_FIELD(lease),
349 .offset_words = 7,
350 .offset_bits = 0,
351 .size_bits = 32 },
352 { SERVICE_REC_FIELD(key),
353 .offset_words = 8,
354 .offset_bits = 0,
355 .size_bits = 128 },
356 { SERVICE_REC_FIELD(name),
357 .offset_words = 12,
358 .offset_bits = 0,
359 .size_bits = 64*8 },
360 { SERVICE_REC_FIELD(data8),
361 .offset_words = 28,
362 .offset_bits = 0,
363 .size_bits = 16*8 },
364 { SERVICE_REC_FIELD(data16),
365 .offset_words = 32,
366 .offset_bits = 0,
367 .size_bits = 8*16 },
368 { SERVICE_REC_FIELD(data32),
369 .offset_words = 36,
370 .offset_bits = 0,
371 .size_bits = 4*32 },
372 { SERVICE_REC_FIELD(data64),
373 .offset_words = 40,
374 .offset_bits = 0,
375 .size_bits = 2*64 },
376};
377
344static void free_sm_ah(struct kref *kref) 378static void free_sm_ah(struct kref *kref)
345{ 379{
346 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); 380 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -463,7 +497,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms)
463 .mad_hdr = &query->mad->mad_hdr, 497 .mad_hdr = &query->mad->mad_hdr,
464 .remote_qpn = 1, 498 .remote_qpn = 1,
465 .remote_qkey = IB_QP1_QKEY, 499 .remote_qkey = IB_QP1_QKEY,
466 .timeout_ms = timeout_ms 500 .timeout_ms = timeout_ms,
467 } 501 }
468 } 502 }
469 }; 503 };
@@ -492,7 +526,7 @@ retry:
492 sizeof (struct ib_sa_mad), 526 sizeof (struct ib_sa_mad),
493 DMA_TO_DEVICE); 527 DMA_TO_DEVICE);
494 gather_list.length = sizeof (struct ib_sa_mad); 528 gather_list.length = sizeof (struct ib_sa_mad);
495 gather_list.lkey = port->mr->lkey; 529 gather_list.lkey = port->agent->mr->lkey;
496 pci_unmap_addr_set(query, mapping, gather_list.addr); 530 pci_unmap_addr_set(query, mapping, gather_list.addr);
497 531
498 ret = ib_post_send_mad(port->agent, &wr, &bad_wr); 532 ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
@@ -507,7 +541,13 @@ retry:
507 spin_unlock_irqrestore(&idr_lock, flags); 541 spin_unlock_irqrestore(&idr_lock, flags);
508 } 542 }
509 543
510 return ret; 544 /*
545 * It's not safe to dereference query any more, because the
546 * send may already have completed and freed the query in
547 * another context. So use wr.wr_id, which has a copy of the
548 * query's id.
549 */
550 return ret ? ret : wr.wr_id;
511} 551}
512 552
513static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, 553static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -560,7 +600,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
560int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, 600int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
561 struct ib_sa_path_rec *rec, 601 struct ib_sa_path_rec *rec,
562 ib_sa_comp_mask comp_mask, 602 ib_sa_comp_mask comp_mask,
563 int timeout_ms, int gfp_mask, 603 int timeout_ms, unsigned int __nocast gfp_mask,
564 void (*callback)(int status, 604 void (*callback)(int status,
565 struct ib_sa_path_rec *resp, 605 struct ib_sa_path_rec *resp,
566 void *context), 606 void *context),
@@ -598,17 +638,126 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
598 rec, query->sa_query.mad->data); 638 rec, query->sa_query.mad->data);
599 639
600 *sa_query = &query->sa_query; 640 *sa_query = &query->sa_query;
641
601 ret = send_mad(&query->sa_query, timeout_ms); 642 ret = send_mad(&query->sa_query, timeout_ms);
602 if (ret) { 643 if (ret < 0) {
603 *sa_query = NULL; 644 *sa_query = NULL;
604 kfree(query->sa_query.mad); 645 kfree(query->sa_query.mad);
605 kfree(query); 646 kfree(query);
606 } 647 }
607 648
608 return ret ? ret : query->sa_query.id; 649 return ret;
609} 650}
610EXPORT_SYMBOL(ib_sa_path_rec_get); 651EXPORT_SYMBOL(ib_sa_path_rec_get);
611 652
653static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
654 int status,
655 struct ib_sa_mad *mad)
656{
657 struct ib_sa_service_query *query =
658 container_of(sa_query, struct ib_sa_service_query, sa_query);
659
660 if (mad) {
661 struct ib_sa_service_rec rec;
662
663 ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
664 mad->data, &rec);
665 query->callback(status, &rec, query->context);
666 } else
667 query->callback(status, NULL, query->context);
668}
669
670static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
671{
672 kfree(sa_query->mad);
673 kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
674}
675
676/**
677 * ib_sa_service_rec_query - Start Service Record operation
678 * @device:device to send request on
679 * @port_num: port number to send request on
680 * @method:SA method - should be get, set, or delete
681 * @rec:Service Record to send in request
682 * @comp_mask:component mask to send in request
683 * @timeout_ms:time to wait for response
684 * @gfp_mask:GFP mask to use for internal allocations
685 * @callback:function called when request completes, times out or is
686 * canceled
687 * @context:opaque user context passed to callback
688 * @sa_query:request context, used to cancel request
689 *
690 * Send a Service Record set/get/delete to the SA to register,
691 * unregister or query a service record.
692 * The callback function will be called when the request completes (or
693 * fails); status is 0 for a successful response, -EINTR if the query
694 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
695 * occurred sending the query. The resp parameter of the callback is
696 * only valid if status is 0.
697 *
698 * If the return value of ib_sa_service_rec_query() is negative, it is an
699 * error code. Otherwise it is a request ID that can be used to cancel
700 * the query.
701 */
702int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
703 struct ib_sa_service_rec *rec,
704 ib_sa_comp_mask comp_mask,
705 int timeout_ms, unsigned int __nocast gfp_mask,
706 void (*callback)(int status,
707 struct ib_sa_service_rec *resp,
708 void *context),
709 void *context,
710 struct ib_sa_query **sa_query)
711{
712 struct ib_sa_service_query *query;
713 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
714 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
715 struct ib_mad_agent *agent = port->agent;
716 int ret;
717
718 if (method != IB_MGMT_METHOD_GET &&
719 method != IB_MGMT_METHOD_SET &&
720 method != IB_SA_METHOD_DELETE)
721 return -EINVAL;
722
723 query = kmalloc(sizeof *query, gfp_mask);
724 if (!query)
725 return -ENOMEM;
726 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
727 if (!query->sa_query.mad) {
728 kfree(query);
729 return -ENOMEM;
730 }
731
732 query->callback = callback;
733 query->context = context;
734
735 init_mad(query->sa_query.mad, agent);
736
737 query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
738 query->sa_query.release = ib_sa_service_rec_release;
739 query->sa_query.port = port;
740 query->sa_query.mad->mad_hdr.method = method;
741 query->sa_query.mad->mad_hdr.attr_id =
742 cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
743 query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
744
745 ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
746 rec, query->sa_query.mad->data);
747
748 *sa_query = &query->sa_query;
749
750 ret = send_mad(&query->sa_query, timeout_ms);
751 if (ret < 0) {
752 *sa_query = NULL;
753 kfree(query->sa_query.mad);
754 kfree(query);
755 }
756
757 return ret;
758}
759EXPORT_SYMBOL(ib_sa_service_rec_query);
760
612static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, 761static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
613 int status, 762 int status,
614 struct ib_sa_mad *mad) 763 struct ib_sa_mad *mad)
@@ -636,7 +785,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
636 u8 method, 785 u8 method,
637 struct ib_sa_mcmember_rec *rec, 786 struct ib_sa_mcmember_rec *rec,
638 ib_sa_comp_mask comp_mask, 787 ib_sa_comp_mask comp_mask,
639 int timeout_ms, int gfp_mask, 788 int timeout_ms, unsigned int __nocast gfp_mask,
640 void (*callback)(int status, 789 void (*callback)(int status,
641 struct ib_sa_mcmember_rec *resp, 790 struct ib_sa_mcmember_rec *resp,
642 void *context), 791 void *context),
@@ -674,14 +823,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
674 rec, query->sa_query.mad->data); 823 rec, query->sa_query.mad->data);
675 824
676 *sa_query = &query->sa_query; 825 *sa_query = &query->sa_query;
826
677 ret = send_mad(&query->sa_query, timeout_ms); 827 ret = send_mad(&query->sa_query, timeout_ms);
678 if (ret) { 828 if (ret < 0) {
679 *sa_query = NULL; 829 *sa_query = NULL;
680 kfree(query->sa_query.mad); 830 kfree(query->sa_query.mad);
681 kfree(query); 831 kfree(query);
682 } 832 }
683 833
684 return ret ? ret : query->sa_query.id; 834 return ret;
685} 835}
686EXPORT_SYMBOL(ib_sa_mcmember_rec_query); 836EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
687 837
@@ -772,7 +922,6 @@ static void ib_sa_add_one(struct ib_device *device)
772 sa_dev->end_port = e; 922 sa_dev->end_port = e;
773 923
774 for (i = 0; i <= e - s; ++i) { 924 for (i = 0; i <= e - s; ++i) {
775 sa_dev->port[i].mr = NULL;
776 sa_dev->port[i].sm_ah = NULL; 925 sa_dev->port[i].sm_ah = NULL;
777 sa_dev->port[i].port_num = i + s; 926 sa_dev->port[i].port_num = i + s;
778 spin_lock_init(&sa_dev->port[i].ah_lock); 927 spin_lock_init(&sa_dev->port[i].ah_lock);
@@ -784,13 +933,6 @@ static void ib_sa_add_one(struct ib_device *device)
784 if (IS_ERR(sa_dev->port[i].agent)) 933 if (IS_ERR(sa_dev->port[i].agent))
785 goto err; 934 goto err;
786 935
787 sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd,
788 IB_ACCESS_LOCAL_WRITE);
789 if (IS_ERR(sa_dev->port[i].mr)) {
790 ib_unregister_mad_agent(sa_dev->port[i].agent);
791 goto err;
792 }
793
794 INIT_WORK(&sa_dev->port[i].update_task, 936 INIT_WORK(&sa_dev->port[i].update_task,
795 update_sm_ah, &sa_dev->port[i]); 937 update_sm_ah, &sa_dev->port[i]);
796 } 938 }
@@ -814,10 +956,8 @@ static void ib_sa_add_one(struct ib_device *device)
814 return; 956 return;
815 957
816err: 958err:
817 while (--i >= 0) { 959 while (--i >= 0)
818 ib_dereg_mr(sa_dev->port[i].mr);
819 ib_unregister_mad_agent(sa_dev->port[i].agent); 960 ib_unregister_mad_agent(sa_dev->port[i].agent);
820 }
821 961
822 kfree(sa_dev); 962 kfree(sa_dev);
823 963
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
new file mode 100644
index 000000000000..61d07c732f49
--- /dev/null
+++ b/drivers/infiniband/core/ucm.c
@@ -0,0 +1,1387 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $
33 */
34#include <linux/init.h>
35#include <linux/fs.h>
36#include <linux/module.h>
37#include <linux/device.h>
38#include <linux/err.h>
39#include <linux/poll.h>
40#include <linux/file.h>
41#include <linux/mount.h>
42#include <linux/cdev.h>
43
44#include <asm/uaccess.h>
45
46#include "ucm.h"
47
48MODULE_AUTHOR("Libor Michalek");
49MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
50MODULE_LICENSE("Dual BSD/GPL");
51
52static int ucm_debug_level;
53
54module_param_named(debug_level, ucm_debug_level, int, 0644);
55MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
56
57enum {
58 IB_UCM_MAJOR = 231,
59 IB_UCM_MINOR = 255
60};
61
62#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
63
64#define PFX "UCM: "
65
66#define ucm_dbg(format, arg...) \
67 do { \
68 if (ucm_debug_level > 0) \
69 printk(KERN_DEBUG PFX format, ## arg); \
70 } while (0)
71
72static struct semaphore ctx_id_mutex;
73static struct idr ctx_id_table;
74static int ctx_id_rover = 0;
75
76static struct ib_ucm_context *ib_ucm_ctx_get(int id)
77{
78 struct ib_ucm_context *ctx;
79
80 down(&ctx_id_mutex);
81 ctx = idr_find(&ctx_id_table, id);
82 if (ctx)
83 ctx->ref++;
84 up(&ctx_id_mutex);
85
86 return ctx;
87}
88
89static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
90{
91 struct ib_ucm_event *uevent;
92
93 down(&ctx_id_mutex);
94
95 ctx->ref--;
96 if (!ctx->ref)
97 idr_remove(&ctx_id_table, ctx->id);
98
99 up(&ctx_id_mutex);
100
101 if (ctx->ref)
102 return;
103
104 down(&ctx->file->mutex);
105
106 list_del(&ctx->file_list);
107 while (!list_empty(&ctx->events)) {
108
109 uevent = list_entry(ctx->events.next,
110 struct ib_ucm_event, ctx_list);
111 list_del(&uevent->file_list);
112 list_del(&uevent->ctx_list);
113
114 /* clear incoming connections. */
115 if (uevent->cm_id)
116 ib_destroy_cm_id(uevent->cm_id);
117
118 kfree(uevent);
119 }
120
121 up(&ctx->file->mutex);
122
123 ucm_dbg("Destroyed CM ID <%d>\n", ctx->id);
124
125 ib_destroy_cm_id(ctx->cm_id);
126 kfree(ctx);
127}
128
129static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
130{
131 struct ib_ucm_context *ctx;
132 int result;
133
134 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
135 if (!ctx)
136 return NULL;
137
138 ctx->ref = 1; /* user reference */
139 ctx->file = file;
140
141 INIT_LIST_HEAD(&ctx->events);
142 init_MUTEX(&ctx->mutex);
143
144 list_add_tail(&ctx->file_list, &file->ctxs);
145
146 ctx_id_rover = (ctx_id_rover + 1) & INT_MAX;
147retry:
148 result = idr_pre_get(&ctx_id_table, GFP_KERNEL);
149 if (!result)
150 goto error;
151
152 down(&ctx_id_mutex);
153 result = idr_get_new_above(&ctx_id_table, ctx, ctx_id_rover, &ctx->id);
154 up(&ctx_id_mutex);
155
156 if (result == -EAGAIN)
157 goto retry;
158 if (result)
159 goto error;
160
161 ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
162
163 return ctx;
164error:
165 list_del(&ctx->file_list);
166 kfree(ctx);
167
168 return NULL;
169}
170/*
171 * Event portion of the API, handle CM events
172 * and allow event polling.
173 */
174static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
175 struct ib_sa_path_rec *kpath)
176{
177 if (!kpath || !upath)
178 return;
179
180 memcpy(upath->dgid, kpath->dgid.raw, sizeof(union ib_gid));
181 memcpy(upath->sgid, kpath->sgid.raw, sizeof(union ib_gid));
182
183 upath->dlid = kpath->dlid;
184 upath->slid = kpath->slid;
185 upath->raw_traffic = kpath->raw_traffic;
186 upath->flow_label = kpath->flow_label;
187 upath->hop_limit = kpath->hop_limit;
188 upath->traffic_class = kpath->traffic_class;
189 upath->reversible = kpath->reversible;
190 upath->numb_path = kpath->numb_path;
191 upath->pkey = kpath->pkey;
192 upath->sl = kpath->sl;
193 upath->mtu_selector = kpath->mtu_selector;
194 upath->mtu = kpath->mtu;
195 upath->rate_selector = kpath->rate_selector;
196 upath->rate = kpath->rate;
197 upath->packet_life_time = kpath->packet_life_time;
198 upath->preference = kpath->preference;
199
200 upath->packet_life_time_selector =
201 kpath->packet_life_time_selector;
202}
203
204static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
205 struct ib_cm_req_event_param *kreq)
206{
207 ureq->listen_id = (long)kreq->listen_id->context;
208
209 ureq->remote_ca_guid = kreq->remote_ca_guid;
210 ureq->remote_qkey = kreq->remote_qkey;
211 ureq->remote_qpn = kreq->remote_qpn;
212 ureq->qp_type = kreq->qp_type;
213 ureq->starting_psn = kreq->starting_psn;
214 ureq->responder_resources = kreq->responder_resources;
215 ureq->initiator_depth = kreq->initiator_depth;
216 ureq->local_cm_response_timeout = kreq->local_cm_response_timeout;
217 ureq->flow_control = kreq->flow_control;
218 ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout;
219 ureq->retry_count = kreq->retry_count;
220 ureq->rnr_retry_count = kreq->rnr_retry_count;
221 ureq->srq = kreq->srq;
222
223 ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
224 ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
225}
226
227static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
228 struct ib_cm_rep_event_param *krep)
229{
230 urep->remote_ca_guid = krep->remote_ca_guid;
231 urep->remote_qkey = krep->remote_qkey;
232 urep->remote_qpn = krep->remote_qpn;
233 urep->starting_psn = krep->starting_psn;
234 urep->responder_resources = krep->responder_resources;
235 urep->initiator_depth = krep->initiator_depth;
236 urep->target_ack_delay = krep->target_ack_delay;
237 urep->failover_accepted = krep->failover_accepted;
238 urep->flow_control = krep->flow_control;
239 urep->rnr_retry_count = krep->rnr_retry_count;
240 urep->srq = krep->srq;
241}
242
243static void ib_ucm_event_rej_get(struct ib_ucm_rej_event_resp *urej,
244 struct ib_cm_rej_event_param *krej)
245{
246 urej->reason = krej->reason;
247}
248
249static void ib_ucm_event_mra_get(struct ib_ucm_mra_event_resp *umra,
250 struct ib_cm_mra_event_param *kmra)
251{
252 umra->timeout = kmra->service_timeout;
253}
254
255static void ib_ucm_event_lap_get(struct ib_ucm_lap_event_resp *ulap,
256 struct ib_cm_lap_event_param *klap)
257{
258 ib_ucm_event_path_get(&ulap->path, klap->alternate_path);
259}
260
261static void ib_ucm_event_apr_get(struct ib_ucm_apr_event_resp *uapr,
262 struct ib_cm_apr_event_param *kapr)
263{
264 uapr->status = kapr->ap_status;
265}
266
267static void ib_ucm_event_sidr_req_get(struct ib_ucm_sidr_req_event_resp *ureq,
268 struct ib_cm_sidr_req_event_param *kreq)
269{
270 ureq->listen_id = (long)kreq->listen_id->context;
271 ureq->pkey = kreq->pkey;
272}
273
274static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
275 struct ib_cm_sidr_rep_event_param *krep)
276{
277 urep->status = krep->status;
278 urep->qkey = krep->qkey;
279 urep->qpn = krep->qpn;
280};
281
282static int ib_ucm_event_process(struct ib_cm_event *evt,
283 struct ib_ucm_event *uvt)
284{
285 void *info = NULL;
286 int result;
287
288 switch (evt->event) {
289 case IB_CM_REQ_RECEIVED:
290 ib_ucm_event_req_get(&uvt->resp.u.req_resp,
291 &evt->param.req_rcvd);
292 uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE;
293 uvt->resp.present |= (evt->param.req_rcvd.primary_path ?
294 IB_UCM_PRES_PRIMARY : 0);
295 uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
296 IB_UCM_PRES_ALTERNATE : 0);
297 break;
298 case IB_CM_REP_RECEIVED:
299 ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
300 &evt->param.rep_rcvd);
301 uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
302
303 break;
304 case IB_CM_RTU_RECEIVED:
305 uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
306 uvt->resp.u.send_status = evt->param.send_status;
307
308 break;
309 case IB_CM_DREQ_RECEIVED:
310 uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
311 uvt->resp.u.send_status = evt->param.send_status;
312
313 break;
314 case IB_CM_DREP_RECEIVED:
315 uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
316 uvt->resp.u.send_status = evt->param.send_status;
317
318 break;
319 case IB_CM_MRA_RECEIVED:
320 ib_ucm_event_mra_get(&uvt->resp.u.mra_resp,
321 &evt->param.mra_rcvd);
322 uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
323
324 break;
325 case IB_CM_REJ_RECEIVED:
326 ib_ucm_event_rej_get(&uvt->resp.u.rej_resp,
327 &evt->param.rej_rcvd);
328 uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
329 uvt->info_len = evt->param.rej_rcvd.ari_length;
330 info = evt->param.rej_rcvd.ari;
331
332 break;
333 case IB_CM_LAP_RECEIVED:
334 ib_ucm_event_lap_get(&uvt->resp.u.lap_resp,
335 &evt->param.lap_rcvd);
336 uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
337 uvt->resp.present |= (evt->param.lap_rcvd.alternate_path ?
338 IB_UCM_PRES_ALTERNATE : 0);
339 break;
340 case IB_CM_APR_RECEIVED:
341 ib_ucm_event_apr_get(&uvt->resp.u.apr_resp,
342 &evt->param.apr_rcvd);
343 uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
344 uvt->info_len = evt->param.apr_rcvd.info_len;
345 info = evt->param.apr_rcvd.apr_info;
346
347 break;
348 case IB_CM_SIDR_REQ_RECEIVED:
349 ib_ucm_event_sidr_req_get(&uvt->resp.u.sidr_req_resp,
350 &evt->param.sidr_req_rcvd);
351 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
352
353 break;
354 case IB_CM_SIDR_REP_RECEIVED:
355 ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
356 &evt->param.sidr_rep_rcvd);
357 uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
358 uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
359 info = evt->param.sidr_rep_rcvd.info;
360
361 break;
362 default:
363 uvt->resp.u.send_status = evt->param.send_status;
364
365 break;
366 }
367
368 if (uvt->data_len && evt->private_data) {
369
370 uvt->data = kmalloc(uvt->data_len, GFP_KERNEL);
371 if (!uvt->data) {
372 result = -ENOMEM;
373 goto error;
374 }
375
376 memcpy(uvt->data, evt->private_data, uvt->data_len);
377 uvt->resp.present |= IB_UCM_PRES_DATA;
378 }
379
380 if (uvt->info_len && info) {
381
382 uvt->info = kmalloc(uvt->info_len, GFP_KERNEL);
383 if (!uvt->info) {
384 result = -ENOMEM;
385 goto error;
386 }
387
388 memcpy(uvt->info, info, uvt->info_len);
389 uvt->resp.present |= IB_UCM_PRES_INFO;
390 }
391
392 return 0;
393error:
394 kfree(uvt->info);
395 kfree(uvt->data);
396 return result;
397}
398
399static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
400 struct ib_cm_event *event)
401{
402 struct ib_ucm_event *uevent;
403 struct ib_ucm_context *ctx;
404 int result = 0;
405 int id;
406 /*
407 * lookup correct context based on event type.
408 */
409 switch (event->event) {
410 case IB_CM_REQ_RECEIVED:
411 id = (long)event->param.req_rcvd.listen_id->context;
412 break;
413 case IB_CM_SIDR_REQ_RECEIVED:
414 id = (long)event->param.sidr_req_rcvd.listen_id->context;
415 break;
416 default:
417 id = (long)cm_id->context;
418 break;
419 }
420
421 ucm_dbg("Event. CM ID <%d> event <%d>\n", id, event->event);
422
423 ctx = ib_ucm_ctx_get(id);
424 if (!ctx)
425 return -ENOENT;
426
427 if (event->event == IB_CM_REQ_RECEIVED ||
428 event->event == IB_CM_SIDR_REQ_RECEIVED)
429 id = IB_UCM_CM_ID_INVALID;
430
431 uevent = kmalloc(sizeof(*uevent), GFP_KERNEL);
432 if (!uevent) {
433 result = -ENOMEM;
434 goto done;
435 }
436
437 memset(uevent, 0, sizeof(*uevent));
438
439 uevent->resp.id = id;
440 uevent->resp.event = event->event;
441
442 result = ib_ucm_event_process(event, uevent);
443 if (result)
444 goto done;
445
446 uevent->ctx = ctx;
447 uevent->cm_id = ((event->event == IB_CM_REQ_RECEIVED ||
448 event->event == IB_CM_SIDR_REQ_RECEIVED ) ?
449 cm_id : NULL);
450
451 down(&ctx->file->mutex);
452
453 list_add_tail(&uevent->file_list, &ctx->file->events);
454 list_add_tail(&uevent->ctx_list, &ctx->events);
455
456 wake_up_interruptible(&ctx->file->poll_wait);
457
458 up(&ctx->file->mutex);
459done:
460 ctx->error = result;
461 ib_ucm_ctx_put(ctx); /* func reference */
462 return result;
463}
464
465static ssize_t ib_ucm_event(struct ib_ucm_file *file,
466 const char __user *inbuf,
467 int in_len, int out_len)
468{
469 struct ib_ucm_context *ctx;
470 struct ib_ucm_event_get cmd;
471 struct ib_ucm_event *uevent = NULL;
472 int result = 0;
473 DEFINE_WAIT(wait);
474
475 if (out_len < sizeof(struct ib_ucm_event_resp))
476 return -ENOSPC;
477
478 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
479 return -EFAULT;
480 /*
481 * wait
482 */
483 down(&file->mutex);
484
485 while (list_empty(&file->events)) {
486
487 if (file->filp->f_flags & O_NONBLOCK) {
488 result = -EAGAIN;
489 break;
490 }
491
492 if (signal_pending(current)) {
493 result = -ERESTARTSYS;
494 break;
495 }
496
497 prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
498
499 up(&file->mutex);
500 schedule();
501 down(&file->mutex);
502
503 finish_wait(&file->poll_wait, &wait);
504 }
505
506 if (result)
507 goto done;
508
509 uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
510
511 if (!uevent->cm_id)
512 goto user;
513
514 ctx = ib_ucm_ctx_alloc(file);
515 if (!ctx) {
516 result = -ENOMEM;
517 goto done;
518 }
519
520 ctx->cm_id = uevent->cm_id;
521 ctx->cm_id->cm_handler = ib_ucm_event_handler;
522 ctx->cm_id->context = (void *)(unsigned long)ctx->id;
523
524 uevent->resp.id = ctx->id;
525
526user:
527 if (copy_to_user((void __user *)(unsigned long)cmd.response,
528 &uevent->resp, sizeof(uevent->resp))) {
529 result = -EFAULT;
530 goto done;
531 }
532
533 if (uevent->data) {
534
535 if (cmd.data_len < uevent->data_len) {
536 result = -ENOMEM;
537 goto done;
538 }
539
540 if (copy_to_user((void __user *)(unsigned long)cmd.data,
541 uevent->data, uevent->data_len)) {
542 result = -EFAULT;
543 goto done;
544 }
545 }
546
547 if (uevent->info) {
548
549 if (cmd.info_len < uevent->info_len) {
550 result = -ENOMEM;
551 goto done;
552 }
553
554 if (copy_to_user((void __user *)(unsigned long)cmd.info,
555 uevent->info, uevent->info_len)) {
556 result = -EFAULT;
557 goto done;
558 }
559 }
560
561 list_del(&uevent->file_list);
562 list_del(&uevent->ctx_list);
563
564 kfree(uevent->data);
565 kfree(uevent->info);
566 kfree(uevent);
567done:
568 up(&file->mutex);
569 return result;
570}
571
572
573static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
574 const char __user *inbuf,
575 int in_len, int out_len)
576{
577 struct ib_ucm_create_id cmd;
578 struct ib_ucm_create_id_resp resp;
579 struct ib_ucm_context *ctx;
580 int result;
581
582 if (out_len < sizeof(resp))
583 return -ENOSPC;
584
585 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
586 return -EFAULT;
587
588 ctx = ib_ucm_ctx_alloc(file);
589 if (!ctx)
590 return -ENOMEM;
591
592 ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler,
593 (void *)(unsigned long)ctx->id);
594 if (!ctx->cm_id) {
595 result = -ENOMEM;
596 goto err_cm;
597 }
598
599 resp.id = ctx->id;
600 if (copy_to_user((void __user *)(unsigned long)cmd.response,
601 &resp, sizeof(resp))) {
602 result = -EFAULT;
603 goto err_ret;
604 }
605
606 return 0;
607err_ret:
608 ib_destroy_cm_id(ctx->cm_id);
609err_cm:
610 ib_ucm_ctx_put(ctx); /* user reference */
611
612 return result;
613}
614
615static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
616 const char __user *inbuf,
617 int in_len, int out_len)
618{
619 struct ib_ucm_destroy_id cmd;
620 struct ib_ucm_context *ctx;
621
622 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
623 return -EFAULT;
624
625 ctx = ib_ucm_ctx_get(cmd.id);
626 if (!ctx)
627 return -ENOENT;
628
629 ib_ucm_ctx_put(ctx); /* user reference */
630 ib_ucm_ctx_put(ctx); /* func reference */
631
632 return 0;
633}
634
635static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
636 const char __user *inbuf,
637 int in_len, int out_len)
638{
639 struct ib_ucm_attr_id_resp resp;
640 struct ib_ucm_attr_id cmd;
641 struct ib_ucm_context *ctx;
642 int result = 0;
643
644 if (out_len < sizeof(resp))
645 return -ENOSPC;
646
647 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
648 return -EFAULT;
649
650 ctx = ib_ucm_ctx_get(cmd.id);
651 if (!ctx)
652 return -ENOENT;
653
654 down(&ctx->file->mutex);
655 if (ctx->file != file) {
656 result = -EINVAL;
657 goto done;
658 }
659
660 resp.service_id = ctx->cm_id->service_id;
661 resp.service_mask = ctx->cm_id->service_mask;
662 resp.local_id = ctx->cm_id->local_id;
663 resp.remote_id = ctx->cm_id->remote_id;
664
665 if (copy_to_user((void __user *)(unsigned long)cmd.response,
666 &resp, sizeof(resp)))
667 result = -EFAULT;
668
669done:
670 up(&ctx->file->mutex);
671 ib_ucm_ctx_put(ctx); /* func reference */
672 return result;
673}
674
675static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
676 const char __user *inbuf,
677 int in_len, int out_len)
678{
679 struct ib_ucm_listen cmd;
680 struct ib_ucm_context *ctx;
681 int result;
682
683 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
684 return -EFAULT;
685
686 ctx = ib_ucm_ctx_get(cmd.id);
687 if (!ctx)
688 return -ENOENT;
689
690 down(&ctx->file->mutex);
691 if (ctx->file != file)
692 result = -EINVAL;
693 else
694 result = ib_cm_listen(ctx->cm_id, cmd.service_id,
695 cmd.service_mask);
696
697 up(&ctx->file->mutex);
698 ib_ucm_ctx_put(ctx); /* func reference */
699 return result;
700}
701
702static ssize_t ib_ucm_establish(struct ib_ucm_file *file,
703 const char __user *inbuf,
704 int in_len, int out_len)
705{
706 struct ib_ucm_establish cmd;
707 struct ib_ucm_context *ctx;
708 int result;
709
710 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
711 return -EFAULT;
712
713 ctx = ib_ucm_ctx_get(cmd.id);
714 if (!ctx)
715 return -ENOENT;
716
717 down(&ctx->file->mutex);
718 if (ctx->file != file)
719 result = -EINVAL;
720 else
721 result = ib_cm_establish(ctx->cm_id);
722
723 up(&ctx->file->mutex);
724 ib_ucm_ctx_put(ctx); /* func reference */
725 return result;
726}
727
728static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
729{
730 void *data;
731
732 *dest = NULL;
733
734 if (!len)
735 return 0;
736
737 data = kmalloc(len, GFP_KERNEL);
738 if (!data)
739 return -ENOMEM;
740
741 if (copy_from_user(data, (void __user *)(unsigned long)src, len)) {
742 kfree(data);
743 return -EFAULT;
744 }
745
746 *dest = data;
747 return 0;
748}
749
750static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
751{
752 struct ib_ucm_path_rec ucm_path;
753 struct ib_sa_path_rec *sa_path;
754
755 *path = NULL;
756
757 if (!src)
758 return 0;
759
760 sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL);
761 if (!sa_path)
762 return -ENOMEM;
763
764 if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src,
765 sizeof(ucm_path))) {
766
767 kfree(sa_path);
768 return -EFAULT;
769 }
770
771 memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof(union ib_gid));
772 memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof(union ib_gid));
773
774 sa_path->dlid = ucm_path.dlid;
775 sa_path->slid = ucm_path.slid;
776 sa_path->raw_traffic = ucm_path.raw_traffic;
777 sa_path->flow_label = ucm_path.flow_label;
778 sa_path->hop_limit = ucm_path.hop_limit;
779 sa_path->traffic_class = ucm_path.traffic_class;
780 sa_path->reversible = ucm_path.reversible;
781 sa_path->numb_path = ucm_path.numb_path;
782 sa_path->pkey = ucm_path.pkey;
783 sa_path->sl = ucm_path.sl;
784 sa_path->mtu_selector = ucm_path.mtu_selector;
785 sa_path->mtu = ucm_path.mtu;
786 sa_path->rate_selector = ucm_path.rate_selector;
787 sa_path->rate = ucm_path.rate;
788 sa_path->packet_life_time = ucm_path.packet_life_time;
789 sa_path->preference = ucm_path.preference;
790
791 sa_path->packet_life_time_selector =
792 ucm_path.packet_life_time_selector;
793
794 *path = sa_path;
795 return 0;
796}
797
798static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
799 const char __user *inbuf,
800 int in_len, int out_len)
801{
802 struct ib_cm_req_param param;
803 struct ib_ucm_context *ctx;
804 struct ib_ucm_req cmd;
805 int result;
806
807 param.private_data = NULL;
808 param.primary_path = NULL;
809 param.alternate_path = NULL;
810
811 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
812 return -EFAULT;
813
814 result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
815 if (result)
816 goto done;
817
818 result = ib_ucm_path_get(&param.primary_path, cmd.primary_path);
819 if (result)
820 goto done;
821
822 result = ib_ucm_path_get(&param.alternate_path, cmd.alternate_path);
823 if (result)
824 goto done;
825
826 param.private_data_len = cmd.len;
827 param.service_id = cmd.sid;
828 param.qp_num = cmd.qpn;
829 param.qp_type = cmd.qp_type;
830 param.starting_psn = cmd.psn;
831 param.peer_to_peer = cmd.peer_to_peer;
832 param.responder_resources = cmd.responder_resources;
833 param.initiator_depth = cmd.initiator_depth;
834 param.remote_cm_response_timeout = cmd.remote_cm_response_timeout;
835 param.flow_control = cmd.flow_control;
836 param.local_cm_response_timeout = cmd.local_cm_response_timeout;
837 param.retry_count = cmd.retry_count;
838 param.rnr_retry_count = cmd.rnr_retry_count;
839 param.max_cm_retries = cmd.max_cm_retries;
840 param.srq = cmd.srq;
841
842 ctx = ib_ucm_ctx_get(cmd.id);
843 if (!ctx) {
844 result = -ENOENT;
845 goto done;
846 }
847
848 down(&ctx->file->mutex);
849 if (ctx->file != file)
850 result = -EINVAL;
851 else
852 result = ib_send_cm_req(ctx->cm_id, &param);
853
854 up(&ctx->file->mutex);
855 ib_ucm_ctx_put(ctx); /* func reference */
856done:
857 kfree(param.private_data);
858 kfree(param.primary_path);
859 kfree(param.alternate_path);
860
861 return result;
862}
863
864static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
865 const char __user *inbuf,
866 int in_len, int out_len)
867{
868 struct ib_cm_rep_param param;
869 struct ib_ucm_context *ctx;
870 struct ib_ucm_rep cmd;
871 int result;
872
873 param.private_data = NULL;
874
875 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
876 return -EFAULT;
877
878 result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
879 if (result)
880 return result;
881
882 param.qp_num = cmd.qpn;
883 param.starting_psn = cmd.psn;
884 param.private_data_len = cmd.len;
885 param.responder_resources = cmd.responder_resources;
886 param.initiator_depth = cmd.initiator_depth;
887 param.target_ack_delay = cmd.target_ack_delay;
888 param.failover_accepted = cmd.failover_accepted;
889 param.flow_control = cmd.flow_control;
890 param.rnr_retry_count = cmd.rnr_retry_count;
891 param.srq = cmd.srq;
892
893 ctx = ib_ucm_ctx_get(cmd.id);
894 if (!ctx) {
895 result = -ENOENT;
896 goto done;
897 }
898
899 down(&ctx->file->mutex);
900 if (ctx->file != file)
901 result = -EINVAL;
902 else
903 result = ib_send_cm_rep(ctx->cm_id, &param);
904
905 up(&ctx->file->mutex);
906 ib_ucm_ctx_put(ctx); /* func reference */
907done:
908 kfree(param.private_data);
909
910 return result;
911}
912
913static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
914 const char __user *inbuf, int in_len,
915 int (*func)(struct ib_cm_id *cm_id,
916 const void *private_data,
917 u8 private_data_len))
918{
919 struct ib_ucm_private_data cmd;
920 struct ib_ucm_context *ctx;
921 const void *private_data = NULL;
922 int result;
923
924 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
925 return -EFAULT;
926
927 result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len);
928 if (result)
929 return result;
930
931 ctx = ib_ucm_ctx_get(cmd.id);
932 if (!ctx) {
933 result = -ENOENT;
934 goto done;
935 }
936
937 down(&ctx->file->mutex);
938 if (ctx->file != file)
939 result = -EINVAL;
940 else
941 result = func(ctx->cm_id, private_data, cmd.len);
942
943 up(&ctx->file->mutex);
944 ib_ucm_ctx_put(ctx); /* func reference */
945done:
946 kfree(private_data);
947
948 return result;
949}
950
951static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file,
952 const char __user *inbuf,
953 int in_len, int out_len)
954{
955 return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu);
956}
957
958static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file,
959 const char __user *inbuf,
960 int in_len, int out_len)
961{
962 return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq);
963}
964
965static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file,
966 const char __user *inbuf,
967 int in_len, int out_len)
968{
969 return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep);
970}
971
972static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
973 const char __user *inbuf, int in_len,
974 int (*func)(struct ib_cm_id *cm_id,
975 int status,
976 const void *info,
977 u8 info_len,
978 const void *data,
979 u8 data_len))
980{
981 struct ib_ucm_context *ctx;
982 struct ib_ucm_info cmd;
983 const void *data = NULL;
984 const void *info = NULL;
985 int result;
986
987 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
988 return -EFAULT;
989
990 result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len);
991 if (result)
992 goto done;
993
994 result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len);
995 if (result)
996 goto done;
997
998 ctx = ib_ucm_ctx_get(cmd.id);
999 if (!ctx) {
1000 result = -ENOENT;
1001 goto done;
1002 }
1003
1004 down(&ctx->file->mutex);
1005 if (ctx->file != file)
1006 result = -EINVAL;
1007 else
1008 result = func(ctx->cm_id, cmd.status,
1009 info, cmd.info_len,
1010 data, cmd.data_len);
1011
1012 up(&ctx->file->mutex);
1013 ib_ucm_ctx_put(ctx); /* func reference */
1014done:
1015 kfree(data);
1016 kfree(info);
1017
1018 return result;
1019}
1020
1021static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file,
1022 const char __user *inbuf,
1023 int in_len, int out_len)
1024{
1025 return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej);
1026}
1027
1028static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file,
1029 const char __user *inbuf,
1030 int in_len, int out_len)
1031{
1032 return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr);
1033}
1034
1035static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
1036 const char __user *inbuf,
1037 int in_len, int out_len)
1038{
1039 struct ib_ucm_context *ctx;
1040 struct ib_ucm_mra cmd;
1041 const void *data = NULL;
1042 int result;
1043
1044 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1045 return -EFAULT;
1046
1047 result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
1048 if (result)
1049 return result;
1050
1051 ctx = ib_ucm_ctx_get(cmd.id);
1052 if (!ctx) {
1053 result = -ENOENT;
1054 goto done;
1055 }
1056
1057 down(&ctx->file->mutex);
1058 if (ctx->file != file)
1059 result = -EINVAL;
1060 else
1061 result = ib_send_cm_mra(ctx->cm_id, cmd.timeout,
1062 data, cmd.len);
1063
1064 up(&ctx->file->mutex);
1065 ib_ucm_ctx_put(ctx); /* func reference */
1066done:
1067 kfree(data);
1068
1069 return result;
1070}
1071
1072static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
1073 const char __user *inbuf,
1074 int in_len, int out_len)
1075{
1076 struct ib_ucm_context *ctx;
1077 struct ib_sa_path_rec *path = NULL;
1078 struct ib_ucm_lap cmd;
1079 const void *data = NULL;
1080 int result;
1081
1082 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1083 return -EFAULT;
1084
1085 result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
1086 if (result)
1087 goto done;
1088
1089 result = ib_ucm_path_get(&path, cmd.path);
1090 if (result)
1091 goto done;
1092
1093 ctx = ib_ucm_ctx_get(cmd.id);
1094 if (!ctx) {
1095 result = -ENOENT;
1096 goto done;
1097 }
1098
1099 down(&ctx->file->mutex);
1100 if (ctx->file != file)
1101 result = -EINVAL;
1102 else
1103 result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
1104
1105 up(&ctx->file->mutex);
1106 ib_ucm_ctx_put(ctx); /* func reference */
1107done:
1108 kfree(data);
1109 kfree(path);
1110
1111 return result;
1112}
1113
1114static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
1115 const char __user *inbuf,
1116 int in_len, int out_len)
1117{
1118 struct ib_cm_sidr_req_param param;
1119 struct ib_ucm_context *ctx;
1120 struct ib_ucm_sidr_req cmd;
1121 int result;
1122
1123 param.private_data = NULL;
1124 param.path = NULL;
1125
1126 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1127 return -EFAULT;
1128
1129 result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
1130 if (result)
1131 goto done;
1132
1133 result = ib_ucm_path_get(&param.path, cmd.path);
1134 if (result)
1135 goto done;
1136
1137 param.private_data_len = cmd.len;
1138 param.service_id = cmd.sid;
1139 param.timeout_ms = cmd.timeout;
1140 param.max_cm_retries = cmd.max_cm_retries;
1141 param.pkey = cmd.pkey;
1142
1143 ctx = ib_ucm_ctx_get(cmd.id);
1144 if (!ctx) {
1145 result = -ENOENT;
1146 goto done;
1147 }
1148
1149 down(&ctx->file->mutex);
1150 if (ctx->file != file)
1151 result = -EINVAL;
1152 else
1153 result = ib_send_cm_sidr_req(ctx->cm_id, &param);
1154
1155 up(&ctx->file->mutex);
1156 ib_ucm_ctx_put(ctx); /* func reference */
1157done:
1158 kfree(param.private_data);
1159 kfree(param.path);
1160
1161 return result;
1162}
1163
1164static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
1165 const char __user *inbuf,
1166 int in_len, int out_len)
1167{
1168 struct ib_cm_sidr_rep_param param;
1169 struct ib_ucm_sidr_rep cmd;
1170 struct ib_ucm_context *ctx;
1171 int result;
1172
1173 param.info = NULL;
1174
1175 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1176 return -EFAULT;
1177
1178 result = ib_ucm_alloc_data(&param.private_data,
1179 cmd.data, cmd.data_len);
1180 if (result)
1181 goto done;
1182
1183 result = ib_ucm_alloc_data(&param.info, cmd.info, cmd.info_len);
1184 if (result)
1185 goto done;
1186
1187 param.qp_num = cmd.qpn;
1188 param.qkey = cmd.qkey;
1189 param.status = cmd.status;
1190 param.info_length = cmd.info_len;
1191 param.private_data_len = cmd.data_len;
1192
1193 ctx = ib_ucm_ctx_get(cmd.id);
1194 if (!ctx) {
1195 result = -ENOENT;
1196 goto done;
1197 }
1198
1199 down(&ctx->file->mutex);
1200 if (ctx->file != file)
1201 result = -EINVAL;
1202 else
1203 result = ib_send_cm_sidr_rep(ctx->cm_id, &param);
1204
1205 up(&ctx->file->mutex);
1206 ib_ucm_ctx_put(ctx); /* func reference */
1207done:
1208 kfree(param.private_data);
1209 kfree(param.info);
1210
1211 return result;
1212}
1213
1214static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
1215 const char __user *inbuf,
1216 int in_len, int out_len) = {
1217 [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id,
1218 [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
1219 [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
1220 [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
1221 [IB_USER_CM_CMD_ESTABLISH] = ib_ucm_establish,
1222 [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
1223 [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
1224 [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
1225 [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq,
1226 [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep,
1227 [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej,
1228 [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra,
1229 [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap,
1230 [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr,
1231 [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req,
1232 [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep,
1233 [IB_USER_CM_CMD_EVENT] = ib_ucm_event,
1234};
1235
1236static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
1237 size_t len, loff_t *pos)
1238{
1239 struct ib_ucm_file *file = filp->private_data;
1240 struct ib_ucm_cmd_hdr hdr;
1241 ssize_t result;
1242
1243 if (len < sizeof(hdr))
1244 return -EINVAL;
1245
1246 if (copy_from_user(&hdr, buf, sizeof(hdr)))
1247 return -EFAULT;
1248
1249 ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
1250 hdr.cmd, hdr.in, hdr.out, len);
1251
1252 if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
1253 return -EINVAL;
1254
1255 if (hdr.in + sizeof(hdr) > len)
1256 return -EINVAL;
1257
1258 result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
1259 hdr.in, hdr.out);
1260 if (!result)
1261 result = len;
1262
1263 return result;
1264}
1265
1266static unsigned int ib_ucm_poll(struct file *filp,
1267 struct poll_table_struct *wait)
1268{
1269 struct ib_ucm_file *file = filp->private_data;
1270 unsigned int mask = 0;
1271
1272 poll_wait(filp, &file->poll_wait, wait);
1273
1274 if (!list_empty(&file->events))
1275 mask = POLLIN | POLLRDNORM;
1276
1277 return mask;
1278}
1279
1280static int ib_ucm_open(struct inode *inode, struct file *filp)
1281{
1282 struct ib_ucm_file *file;
1283
1284 file = kmalloc(sizeof(*file), GFP_KERNEL);
1285 if (!file)
1286 return -ENOMEM;
1287
1288 INIT_LIST_HEAD(&file->events);
1289 INIT_LIST_HEAD(&file->ctxs);
1290 init_waitqueue_head(&file->poll_wait);
1291
1292 init_MUTEX(&file->mutex);
1293
1294 filp->private_data = file;
1295 file->filp = filp;
1296
1297 ucm_dbg("Created struct\n");
1298
1299 return 0;
1300}
1301
1302static int ib_ucm_close(struct inode *inode, struct file *filp)
1303{
1304 struct ib_ucm_file *file = filp->private_data;
1305 struct ib_ucm_context *ctx;
1306
1307 down(&file->mutex);
1308
1309 while (!list_empty(&file->ctxs)) {
1310
1311 ctx = list_entry(file->ctxs.next,
1312 struct ib_ucm_context, file_list);
1313
1314 up(&ctx->file->mutex);
1315 ib_ucm_ctx_put(ctx); /* user reference */
1316 down(&file->mutex);
1317 }
1318
1319 up(&file->mutex);
1320
1321 kfree(file);
1322
1323 ucm_dbg("Deleted struct\n");
1324 return 0;
1325}
1326
1327static struct file_operations ib_ucm_fops = {
1328 .owner = THIS_MODULE,
1329 .open = ib_ucm_open,
1330 .release = ib_ucm_close,
1331 .write = ib_ucm_write,
1332 .poll = ib_ucm_poll,
1333};
1334
1335
1336static struct class *ib_ucm_class;
1337static struct cdev ib_ucm_cdev;
1338
1339static int __init ib_ucm_init(void)
1340{
1341 int result;
1342
1343 result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
1344 if (result) {
1345 ucm_dbg("Error <%d> registering dev\n", result);
1346 goto err_chr;
1347 }
1348
1349 cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
1350
1351 result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
1352 if (result) {
1353 ucm_dbg("Error <%d> adding cdev\n", result);
1354 goto err_cdev;
1355 }
1356
1357 ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
1358 if (IS_ERR(ib_ucm_class)) {
1359 result = PTR_ERR(ib_ucm_class);
1360 ucm_dbg("Error <%d> creating class\n", result);
1361 goto err_class;
1362 }
1363
1364 class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm");
1365
1366 idr_init(&ctx_id_table);
1367 init_MUTEX(&ctx_id_mutex);
1368
1369 return 0;
1370err_class:
1371 cdev_del(&ib_ucm_cdev);
1372err_cdev:
1373 unregister_chrdev_region(IB_UCM_DEV, 1);
1374err_chr:
1375 return result;
1376}
1377
1378static void __exit ib_ucm_cleanup(void)
1379{
1380 class_device_destroy(ib_ucm_class, IB_UCM_DEV);
1381 class_destroy(ib_ucm_class);
1382 cdev_del(&ib_ucm_cdev);
1383 unregister_chrdev_region(IB_UCM_DEV, 1);
1384}
1385
1386module_init(ib_ucm_init);
1387module_exit(ib_ucm_cleanup);
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
new file mode 100644
index 000000000000..6d36606151b2
--- /dev/null
+++ b/drivers/infiniband/core/ucm.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
33 */
34
35#ifndef UCM_H
36#define UCM_H
37
38#include <linux/fs.h>
39#include <linux/device.h>
40#include <linux/cdev.h>
41#include <linux/idr.h>
42
43#include <ib_cm.h>
44#include <ib_user_cm.h>
45
46#define IB_UCM_CM_ID_INVALID 0xffffffff
47
48struct ib_ucm_file {
49 struct semaphore mutex;
50 struct file *filp;
51 /*
52 * list of pending events
53 */
54 struct list_head ctxs; /* list of active connections */
55 struct list_head events; /* list of pending events */
56 wait_queue_head_t poll_wait;
57};
58
59struct ib_ucm_context {
60 int id;
61 int ref;
62 int error;
63
64 struct ib_ucm_file *file;
65 struct ib_cm_id *cm_id;
66 struct semaphore mutex;
67
68 struct list_head events; /* list of pending events. */
69 struct list_head file_list; /* member in file ctx list */
70};
71
72struct ib_ucm_event {
73 struct ib_ucm_context *ctx;
74 struct list_head file_list; /* member in file event list */
75 struct list_head ctx_list; /* member in ctx event list */
76
77 struct ib_ucm_event_resp resp;
78 void *data;
79 void *info;
80 int data_len;
81 int info_len;
82 /*
83 * new connection identifiers needs to be saved until
84 * userspace can get a handle on them.
85 */
86 struct ib_cm_id *cm_id;
87};
88
89#endif /* UCM_H */
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 9d912d6877ff..2e38792df533 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +31,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 32 * SOFTWARE.
31 * 33 *
32 * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $ 34 * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $
33 */ 35 */
34 36
35#include <linux/module.h> 37#include <linux/module.h>
@@ -94,10 +96,12 @@ struct ib_umad_file {
94}; 96};
95 97
96struct ib_umad_packet { 98struct ib_umad_packet {
97 struct ib_user_mad mad;
98 struct ib_ah *ah; 99 struct ib_ah *ah;
100 struct ib_mad_send_buf *msg;
99 struct list_head list; 101 struct list_head list;
102 int length;
100 DECLARE_PCI_UNMAP_ADDR(mapping) 103 DECLARE_PCI_UNMAP_ADDR(mapping)
104 struct ib_user_mad mad;
101}; 105};
102 106
103static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); 107static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
@@ -114,10 +118,10 @@ static int queue_packet(struct ib_umad_file *file,
114 int ret = 1; 118 int ret = 1;
115 119
116 down_read(&file->agent_mutex); 120 down_read(&file->agent_mutex);
117 for (packet->mad.id = 0; 121 for (packet->mad.hdr.id = 0;
118 packet->mad.id < IB_UMAD_MAX_AGENTS; 122 packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
119 packet->mad.id++) 123 packet->mad.hdr.id++)
120 if (agent == file->agent[packet->mad.id]) { 124 if (agent == file->agent[packet->mad.hdr.id]) {
121 spin_lock_irq(&file->recv_lock); 125 spin_lock_irq(&file->recv_lock);
122 list_add_tail(&packet->list, &file->recv_list); 126 list_add_tail(&packet->list, &file->recv_list);
123 spin_unlock_irq(&file->recv_lock); 127 spin_unlock_irq(&file->recv_lock);
@@ -135,22 +139,30 @@ static void send_handler(struct ib_mad_agent *agent,
135 struct ib_mad_send_wc *send_wc) 139 struct ib_mad_send_wc *send_wc)
136{ 140{
137 struct ib_umad_file *file = agent->context; 141 struct ib_umad_file *file = agent->context;
138 struct ib_umad_packet *packet = 142 struct ib_umad_packet *timeout, *packet =
139 (void *) (unsigned long) send_wc->wr_id; 143 (void *) (unsigned long) send_wc->wr_id;
140 144
141 dma_unmap_single(agent->device->dma_device, 145 ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
142 pci_unmap_addr(packet, mapping), 146 ib_free_send_mad(packet->msg);
143 sizeof packet->mad.data,
144 DMA_TO_DEVICE);
145 ib_destroy_ah(packet->ah);
146 147
147 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { 148 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
148 packet->mad.status = ETIMEDOUT; 149 timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr),
150 GFP_KERNEL);
151 if (!timeout)
152 goto out;
149 153
150 if (!queue_packet(file, agent, packet)) 154 memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr));
151 return;
152 }
153 155
156 timeout->length = sizeof (struct ib_mad_hdr);
157 timeout->mad.hdr.id = packet->mad.hdr.id;
158 timeout->mad.hdr.status = ETIMEDOUT;
159 memcpy(timeout->mad.data, packet->mad.data,
160 sizeof (struct ib_mad_hdr));
161
162 if (!queue_packet(file, agent, timeout))
163 return;
164 }
165out:
154 kfree(packet); 166 kfree(packet);
155} 167}
156 168
@@ -159,30 +171,35 @@ static void recv_handler(struct ib_mad_agent *agent,
159{ 171{
160 struct ib_umad_file *file = agent->context; 172 struct ib_umad_file *file = agent->context;
161 struct ib_umad_packet *packet; 173 struct ib_umad_packet *packet;
174 int length;
162 175
163 if (mad_recv_wc->wc->status != IB_WC_SUCCESS) 176 if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
164 goto out; 177 goto out;
165 178
166 packet = kmalloc(sizeof *packet, GFP_KERNEL); 179 length = mad_recv_wc->mad_len;
180 packet = kmalloc(sizeof *packet + length, GFP_KERNEL);
167 if (!packet) 181 if (!packet)
168 goto out; 182 goto out;
169 183
170 memset(packet, 0, sizeof *packet); 184 memset(packet, 0, sizeof *packet + length);
185 packet->length = length;
186
187 ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
171 188
172 memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); 189 packet->mad.hdr.status = 0;
173 packet->mad.status = 0; 190 packet->mad.hdr.length = length + sizeof (struct ib_user_mad);
174 packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 191 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
175 packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); 192 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
176 packet->mad.sl = mad_recv_wc->wc->sl; 193 packet->mad.hdr.sl = mad_recv_wc->wc->sl;
177 packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; 194 packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
178 packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); 195 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
179 if (packet->mad.grh_present) { 196 if (packet->mad.hdr.grh_present) {
180 /* XXX parse GRH */ 197 /* XXX parse GRH */
181 packet->mad.gid_index = 0; 198 packet->mad.hdr.gid_index = 0;
182 packet->mad.hop_limit = 0; 199 packet->mad.hdr.hop_limit = 0;
183 packet->mad.traffic_class = 0; 200 packet->mad.hdr.traffic_class = 0;
184 memset(packet->mad.gid, 0, 16); 201 memset(packet->mad.hdr.gid, 0, 16);
185 packet->mad.flow_label = 0; 202 packet->mad.hdr.flow_label = 0;
186 } 203 }
187 204
188 if (queue_packet(file, agent, packet)) 205 if (queue_packet(file, agent, packet))
@@ -199,7 +216,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
199 struct ib_umad_packet *packet; 216 struct ib_umad_packet *packet;
200 ssize_t ret; 217 ssize_t ret;
201 218
202 if (count < sizeof (struct ib_user_mad)) 219 if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad))
203 return -EINVAL; 220 return -EINVAL;
204 221
205 spin_lock_irq(&file->recv_lock); 222 spin_lock_irq(&file->recv_lock);
@@ -222,12 +239,25 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
222 239
223 spin_unlock_irq(&file->recv_lock); 240 spin_unlock_irq(&file->recv_lock);
224 241
225 if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) 242 if (count < packet->length + sizeof (struct ib_user_mad)) {
243 /* Return length needed (and first RMPP segment) if too small */
244 if (copy_to_user(buf, &packet->mad,
245 sizeof (struct ib_user_mad) + sizeof (struct ib_mad)))
246 ret = -EFAULT;
247 else
248 ret = -ENOSPC;
249 } else if (copy_to_user(buf, &packet->mad,
250 packet->length + sizeof (struct ib_user_mad)))
226 ret = -EFAULT; 251 ret = -EFAULT;
227 else 252 else
228 ret = sizeof packet->mad; 253 ret = packet->length + sizeof (struct ib_user_mad);
229 254 if (ret < 0) {
230 kfree(packet); 255 /* Requeue packet */
256 spin_lock_irq(&file->recv_lock);
257 list_add(&packet->list, &file->recv_list);
258 spin_unlock_irq(&file->recv_lock);
259 } else
260 kfree(packet);
231 return ret; 261 return ret;
232} 262}
233 263
@@ -238,69 +268,57 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
238 struct ib_umad_packet *packet; 268 struct ib_umad_packet *packet;
239 struct ib_mad_agent *agent; 269 struct ib_mad_agent *agent;
240 struct ib_ah_attr ah_attr; 270 struct ib_ah_attr ah_attr;
241 struct ib_sge gather_list; 271 struct ib_send_wr *bad_wr;
242 struct ib_send_wr *bad_wr, wr = { 272 struct ib_rmpp_mad *rmpp_mad;
243 .opcode = IB_WR_SEND,
244 .sg_list = &gather_list,
245 .num_sge = 1,
246 .send_flags = IB_SEND_SIGNALED,
247 };
248 u8 method; 273 u8 method;
249 u64 *tid; 274 u64 *tid;
250 int ret; 275 int ret, length, hdr_len, data_len, rmpp_hdr_size;
276 int rmpp_active = 0;
251 277
252 if (count < sizeof (struct ib_user_mad)) 278 if (count < sizeof (struct ib_user_mad))
253 return -EINVAL; 279 return -EINVAL;
254 280
255 packet = kmalloc(sizeof *packet, GFP_KERNEL); 281 length = count - sizeof (struct ib_user_mad);
282 packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
283 sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
256 if (!packet) 284 if (!packet)
257 return -ENOMEM; 285 return -ENOMEM;
258 286
259 if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { 287 if (copy_from_user(&packet->mad, buf,
260 kfree(packet); 288 sizeof (struct ib_user_mad) +
261 return -EFAULT; 289 sizeof(struct ib_mad_hdr) +
290 sizeof(struct ib_rmpp_hdr))) {
291 ret = -EFAULT;
292 goto err;
262 } 293 }
263 294
264 if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { 295 if (packet->mad.hdr.id < 0 ||
296 packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
265 ret = -EINVAL; 297 ret = -EINVAL;
266 goto err; 298 goto err;
267 } 299 }
268 300
301 packet->length = length;
302
269 down_read(&file->agent_mutex); 303 down_read(&file->agent_mutex);
270 304
271 agent = file->agent[packet->mad.id]; 305 agent = file->agent[packet->mad.hdr.id];
272 if (!agent) { 306 if (!agent) {
273 ret = -EINVAL; 307 ret = -EINVAL;
274 goto err_up; 308 goto err_up;
275 } 309 }
276 310
277 /*
278 * If userspace is generating a request that will generate a
279 * response, we need to make sure the high-order part of the
280 * transaction ID matches the agent being used to send the
281 * MAD.
282 */
283 method = ((struct ib_mad_hdr *) packet->mad.data)->method;
284
285 if (!(method & IB_MGMT_METHOD_RESP) &&
286 method != IB_MGMT_METHOD_TRAP_REPRESS &&
287 method != IB_MGMT_METHOD_SEND) {
288 tid = &((struct ib_mad_hdr *) packet->mad.data)->tid;
289 *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
290 (be64_to_cpup(tid) & 0xffffffff));
291 }
292
293 memset(&ah_attr, 0, sizeof ah_attr); 311 memset(&ah_attr, 0, sizeof ah_attr);
294 ah_attr.dlid = be16_to_cpu(packet->mad.lid); 312 ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
295 ah_attr.sl = packet->mad.sl; 313 ah_attr.sl = packet->mad.hdr.sl;
296 ah_attr.src_path_bits = packet->mad.path_bits; 314 ah_attr.src_path_bits = packet->mad.hdr.path_bits;
297 ah_attr.port_num = file->port->port_num; 315 ah_attr.port_num = file->port->port_num;
298 if (packet->mad.grh_present) { 316 if (packet->mad.hdr.grh_present) {
299 ah_attr.ah_flags = IB_AH_GRH; 317 ah_attr.ah_flags = IB_AH_GRH;
300 memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); 318 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
301 ah_attr.grh.flow_label = packet->mad.flow_label; 319 ah_attr.grh.flow_label = packet->mad.hdr.flow_label;
302 ah_attr.grh.hop_limit = packet->mad.hop_limit; 320 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
303 ah_attr.grh.traffic_class = packet->mad.traffic_class; 321 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
304 } 322 }
305 323
306 packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); 324 packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
@@ -309,34 +327,104 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
309 goto err_up; 327 goto err_up;
310 } 328 }
311 329
312 gather_list.addr = dma_map_single(agent->device->dma_device, 330 rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
313 packet->mad.data, 331 if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) {
314 sizeof packet->mad.data, 332 /* RMPP active */
315 DMA_TO_DEVICE); 333 if (!agent->rmpp_version) {
316 gather_list.length = sizeof packet->mad.data; 334 ret = -EINVAL;
317 gather_list.lkey = file->mr[packet->mad.id]->lkey; 335 goto err_ah;
318 pci_unmap_addr_set(packet, mapping, gather_list.addr); 336 }
337 /* Validate that management class can support RMPP */
338 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
339 hdr_len = offsetof(struct ib_sa_mad, data);
340 data_len = length;
341 } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
342 (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
343 hdr_len = offsetof(struct ib_vendor_mad, data);
344 data_len = length - hdr_len;
345 } else {
346 ret = -EINVAL;
347 goto err_ah;
348 }
349 rmpp_active = 1;
350 } else {
351 if (length > sizeof(struct ib_mad)) {
352 ret = -EINVAL;
353 goto err_ah;
354 }
355 hdr_len = offsetof(struct ib_mad, data);
356 data_len = length - hdr_len;
357 }
358
359 packet->msg = ib_create_send_mad(agent,
360 be32_to_cpu(packet->mad.hdr.qpn),
361 0, packet->ah, rmpp_active,
362 hdr_len, data_len,
363 GFP_KERNEL);
364 if (IS_ERR(packet->msg)) {
365 ret = PTR_ERR(packet->msg);
366 goto err_ah;
367 }
319 368
320 wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; 369 packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms;
321 wr.wr.ud.ah = packet->ah; 370 packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries;
322 wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn);
323 wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey);
324 wr.wr.ud.timeout_ms = packet->mad.timeout_ms;
325 371
326 wr.wr_id = (unsigned long) packet; 372 /* Override send WR WRID initialized in ib_create_send_mad */
373 packet->msg->send_wr.wr_id = (unsigned long) packet;
327 374
328 ret = ib_post_send_mad(agent, &wr, &bad_wr); 375 if (!rmpp_active) {
329 if (ret) { 376 /* Copy message from user into send buffer */
330 dma_unmap_single(agent->device->dma_device, 377 if (copy_from_user(packet->msg->mad,
331 pci_unmap_addr(packet, mapping), 378 buf + sizeof(struct ib_user_mad), length)) {
332 sizeof packet->mad.data, 379 ret = -EFAULT;
333 DMA_TO_DEVICE); 380 goto err_msg;
334 goto err_up; 381 }
382 } else {
383 rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
384 sizeof(struct ib_rmpp_hdr);
385
386 /* Only copy MAD headers (RMPP header in place) */
387 memcpy(packet->msg->mad, packet->mad.data,
388 sizeof(struct ib_mad_hdr));
389
390 /* Now, copy rest of message from user into send buffer */
391 if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data,
392 buf + sizeof (struct ib_user_mad) + rmpp_hdr_size,
393 length - rmpp_hdr_size)) {
394 ret = -EFAULT;
395 goto err_msg;
396 }
397 }
398
399 /*
400 * If userspace is generating a request that will generate a
401 * response, we need to make sure the high-order part of the
402 * transaction ID matches the agent being used to send the
403 * MAD.
404 */
405 method = packet->msg->mad->mad_hdr.method;
406
407 if (!(method & IB_MGMT_METHOD_RESP) &&
408 method != IB_MGMT_METHOD_TRAP_REPRESS &&
409 method != IB_MGMT_METHOD_SEND) {
410 tid = &packet->msg->mad->mad_hdr.tid;
411 *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
412 (be64_to_cpup(tid) & 0xffffffff));
335 } 413 }
336 414
415 ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
416 if (ret)
417 goto err_msg;
418
337 up_read(&file->agent_mutex); 419 up_read(&file->agent_mutex);
338 420
339 return sizeof packet->mad; 421 return sizeof (struct ib_user_mad_hdr) + packet->length;
422
423err_msg:
424 ib_free_send_mad(packet->msg);
425
426err_ah:
427 ib_destroy_ah(packet->ah);
340 428
341err_up: 429err_up:
342 up_read(&file->agent_mutex); 430 up_read(&file->agent_mutex);
@@ -399,7 +487,8 @@ found:
399 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, 487 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
400 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, 488 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
401 ureq.mgmt_class ? &req : NULL, 489 ureq.mgmt_class ? &req : NULL,
402 0, send_handler, recv_handler, file); 490 ureq.rmpp_version,
491 send_handler, recv_handler, file);
403 if (IS_ERR(agent)) { 492 if (IS_ERR(agent)) {
404 ret = PTR_ERR(agent); 493 ret = PTR_ERR(agent);
405 goto out; 494 goto out;
@@ -460,8 +549,8 @@ out:
460 return ret; 549 return ret;
461} 550}
462 551
463static long ib_umad_ioctl(struct file *filp, 552static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
464 unsigned int cmd, unsigned long arg) 553 unsigned long arg)
465{ 554{
466 switch (cmd) { 555 switch (cmd) {
467 case IB_USER_MAD_REGISTER_AGENT: 556 case IB_USER_MAD_REGISTER_AGENT:
@@ -517,14 +606,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
517} 606}
518 607
519static struct file_operations umad_fops = { 608static struct file_operations umad_fops = {
520 .owner = THIS_MODULE, 609 .owner = THIS_MODULE,
521 .read = ib_umad_read, 610 .read = ib_umad_read,
522 .write = ib_umad_write, 611 .write = ib_umad_write,
523 .poll = ib_umad_poll, 612 .poll = ib_umad_poll,
524 .unlocked_ioctl = ib_umad_ioctl, 613 .unlocked_ioctl = ib_umad_ioctl,
525 .compat_ioctl = ib_umad_ioctl, 614 .compat_ioctl = ib_umad_ioctl,
526 .open = ib_umad_open, 615 .open = ib_umad_open,
527 .release = ib_umad_close 616 .release = ib_umad_close
528}; 617};
529 618
530static int ib_umad_sm_open(struct inode *inode, struct file *filp) 619static int ib_umad_sm_open(struct inode *inode, struct file *filp)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
new file mode 100644
index 000000000000..7696022f9a4e
--- /dev/null
+++ b/drivers/infiniband/core/uverbs.h
@@ -0,0 +1,133 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
34 */
35
36#ifndef UVERBS_H
37#define UVERBS_H
38
39/* Include device.h and fs.h until cdev.h is self-sufficient */
40#include <linux/fs.h>
41#include <linux/device.h>
42#include <linux/cdev.h>
43#include <linux/kref.h>
44#include <linux/idr.h>
45
46#include <ib_verbs.h>
47#include <ib_user_verbs.h>
48
49struct ib_uverbs_device {
50 int devnum;
51 struct cdev dev;
52 struct class_device class_dev;
53 struct ib_device *ib_dev;
54 int num_comp;
55};
56
57struct ib_uverbs_event_file {
58 struct kref ref;
59 struct ib_uverbs_file *uverbs_file;
60 spinlock_t lock;
61 int fd;
62 int is_async;
63 wait_queue_head_t poll_wait;
64 struct fasync_struct *async_queue;
65 struct list_head event_list;
66};
67
68struct ib_uverbs_file {
69 struct kref ref;
70 struct ib_uverbs_device *device;
71 struct ib_ucontext *ucontext;
72 struct ib_event_handler event_handler;
73 struct ib_uverbs_event_file async_file;
74 struct ib_uverbs_event_file comp_file[1];
75};
76
77struct ib_uverbs_async_event {
78 struct ib_uverbs_async_event_desc desc;
79 struct list_head list;
80};
81
82struct ib_uverbs_comp_event {
83 struct ib_uverbs_comp_event_desc desc;
84 struct list_head list;
85};
86
87struct ib_uobject_mr {
88 struct ib_uobject uobj;
89 struct page *page_list;
90 struct scatterlist *sg_list;
91};
92
93extern struct semaphore ib_uverbs_idr_mutex;
94extern struct idr ib_uverbs_pd_idr;
95extern struct idr ib_uverbs_mr_idr;
96extern struct idr ib_uverbs_mw_idr;
97extern struct idr ib_uverbs_ah_idr;
98extern struct idr ib_uverbs_cq_idr;
99extern struct idr ib_uverbs_qp_idr;
100
101void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
102void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
103void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
104
105int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
106 void *addr, size_t size, int write);
107void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
108void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
109
110#define IB_UVERBS_DECLARE_CMD(name) \
111 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
112 const char __user *buf, int in_len, \
113 int out_len)
114
115IB_UVERBS_DECLARE_CMD(query_params);
116IB_UVERBS_DECLARE_CMD(get_context);
117IB_UVERBS_DECLARE_CMD(query_device);
118IB_UVERBS_DECLARE_CMD(query_port);
119IB_UVERBS_DECLARE_CMD(query_gid);
120IB_UVERBS_DECLARE_CMD(query_pkey);
121IB_UVERBS_DECLARE_CMD(alloc_pd);
122IB_UVERBS_DECLARE_CMD(dealloc_pd);
123IB_UVERBS_DECLARE_CMD(reg_mr);
124IB_UVERBS_DECLARE_CMD(dereg_mr);
125IB_UVERBS_DECLARE_CMD(create_cq);
126IB_UVERBS_DECLARE_CMD(destroy_cq);
127IB_UVERBS_DECLARE_CMD(create_qp);
128IB_UVERBS_DECLARE_CMD(modify_qp);
129IB_UVERBS_DECLARE_CMD(destroy_qp);
130IB_UVERBS_DECLARE_CMD(attach_mcast);
131IB_UVERBS_DECLARE_CMD(detach_mcast);
132
133#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
new file mode 100644
index 000000000000..5f2bbcda4c73
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -0,0 +1,1006 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
34 */
35
36#include <asm/uaccess.h>
37
38#include "uverbs.h"
39
40#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
41 do { \
42 (udata)->inbuf = (void __user *) (ibuf); \
43 (udata)->outbuf = (void __user *) (obuf); \
44 (udata)->inlen = (ilen); \
45 (udata)->outlen = (olen); \
46 } while (0)
47
48ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
49 const char __user *buf,
50 int in_len, int out_len)
51{
52 struct ib_uverbs_query_params cmd;
53 struct ib_uverbs_query_params_resp resp;
54
55 if (out_len < sizeof resp)
56 return -ENOSPC;
57
58 if (copy_from_user(&cmd, buf, sizeof cmd))
59 return -EFAULT;
60
61 memset(&resp, 0, sizeof resp);
62
63 resp.num_cq_events = file->device->num_comp;
64
65 if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
66 return -EFAULT;
67
68 return in_len;
69}
70
71ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
72 const char __user *buf,
73 int in_len, int out_len)
74{
75 struct ib_uverbs_get_context cmd;
76 struct ib_uverbs_get_context_resp resp;
77 struct ib_udata udata;
78 struct ib_device *ibdev = file->device->ib_dev;
79 int i;
80 int ret = in_len;
81
82 if (out_len < sizeof resp)
83 return -ENOSPC;
84
85 if (copy_from_user(&cmd, buf, sizeof cmd))
86 return -EFAULT;
87
88 INIT_UDATA(&udata, buf + sizeof cmd,
89 (unsigned long) cmd.response + sizeof resp,
90 in_len - sizeof cmd, out_len - sizeof resp);
91
92 file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
93 if (IS_ERR(file->ucontext)) {
94 ret = PTR_ERR(file->ucontext);
95 file->ucontext = NULL;
96 return ret;
97 }
98
99 file->ucontext->device = ibdev;
100 INIT_LIST_HEAD(&file->ucontext->pd_list);
101 INIT_LIST_HEAD(&file->ucontext->mr_list);
102 INIT_LIST_HEAD(&file->ucontext->mw_list);
103 INIT_LIST_HEAD(&file->ucontext->cq_list);
104 INIT_LIST_HEAD(&file->ucontext->qp_list);
105 INIT_LIST_HEAD(&file->ucontext->srq_list);
106 INIT_LIST_HEAD(&file->ucontext->ah_list);
107 spin_lock_init(&file->ucontext->lock);
108
109 resp.async_fd = file->async_file.fd;
110 for (i = 0; i < file->device->num_comp; ++i)
111 if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
112 i * sizeof (__u32),
113 &file->comp_file[i].fd, sizeof (__u32)))
114 goto err;
115
116 if (copy_to_user((void __user *) (unsigned long) cmd.response,
117 &resp, sizeof resp))
118 goto err;
119
120 return in_len;
121
122err:
123 ibdev->dealloc_ucontext(file->ucontext);
124 file->ucontext = NULL;
125
126 return -EFAULT;
127}
128
129ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
130 const char __user *buf,
131 int in_len, int out_len)
132{
133 struct ib_uverbs_query_device cmd;
134 struct ib_uverbs_query_device_resp resp;
135 struct ib_device_attr attr;
136 int ret;
137
138 if (out_len < sizeof resp)
139 return -ENOSPC;
140
141 if (copy_from_user(&cmd, buf, sizeof cmd))
142 return -EFAULT;
143
144 ret = ib_query_device(file->device->ib_dev, &attr);
145 if (ret)
146 return ret;
147
148 memset(&resp, 0, sizeof resp);
149
150 resp.fw_ver = attr.fw_ver;
151 resp.node_guid = attr.node_guid;
152 resp.sys_image_guid = attr.sys_image_guid;
153 resp.max_mr_size = attr.max_mr_size;
154 resp.page_size_cap = attr.page_size_cap;
155 resp.vendor_id = attr.vendor_id;
156 resp.vendor_part_id = attr.vendor_part_id;
157 resp.hw_ver = attr.hw_ver;
158 resp.max_qp = attr.max_qp;
159 resp.max_qp_wr = attr.max_qp_wr;
160 resp.device_cap_flags = attr.device_cap_flags;
161 resp.max_sge = attr.max_sge;
162 resp.max_sge_rd = attr.max_sge_rd;
163 resp.max_cq = attr.max_cq;
164 resp.max_cqe = attr.max_cqe;
165 resp.max_mr = attr.max_mr;
166 resp.max_pd = attr.max_pd;
167 resp.max_qp_rd_atom = attr.max_qp_rd_atom;
168 resp.max_ee_rd_atom = attr.max_ee_rd_atom;
169 resp.max_res_rd_atom = attr.max_res_rd_atom;
170 resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
171 resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
172 resp.atomic_cap = attr.atomic_cap;
173 resp.max_ee = attr.max_ee;
174 resp.max_rdd = attr.max_rdd;
175 resp.max_mw = attr.max_mw;
176 resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
177 resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
178 resp.max_mcast_grp = attr.max_mcast_grp;
179 resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
180 resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
181 resp.max_ah = attr.max_ah;
182 resp.max_fmr = attr.max_fmr;
183 resp.max_map_per_fmr = attr.max_map_per_fmr;
184 resp.max_srq = attr.max_srq;
185 resp.max_srq_wr = attr.max_srq_wr;
186 resp.max_srq_sge = attr.max_srq_sge;
187 resp.max_pkeys = attr.max_pkeys;
188 resp.local_ca_ack_delay = attr.local_ca_ack_delay;
189 resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
190
191 if (copy_to_user((void __user *) (unsigned long) cmd.response,
192 &resp, sizeof resp))
193 return -EFAULT;
194
195 return in_len;
196}
197
198ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
199 const char __user *buf,
200 int in_len, int out_len)
201{
202 struct ib_uverbs_query_port cmd;
203 struct ib_uverbs_query_port_resp resp;
204 struct ib_port_attr attr;
205 int ret;
206
207 if (out_len < sizeof resp)
208 return -ENOSPC;
209
210 if (copy_from_user(&cmd, buf, sizeof cmd))
211 return -EFAULT;
212
213 ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
214 if (ret)
215 return ret;
216
217 memset(&resp, 0, sizeof resp);
218
219 resp.state = attr.state;
220 resp.max_mtu = attr.max_mtu;
221 resp.active_mtu = attr.active_mtu;
222 resp.gid_tbl_len = attr.gid_tbl_len;
223 resp.port_cap_flags = attr.port_cap_flags;
224 resp.max_msg_sz = attr.max_msg_sz;
225 resp.bad_pkey_cntr = attr.bad_pkey_cntr;
226 resp.qkey_viol_cntr = attr.qkey_viol_cntr;
227 resp.pkey_tbl_len = attr.pkey_tbl_len;
228 resp.lid = attr.lid;
229 resp.sm_lid = attr.sm_lid;
230 resp.lmc = attr.lmc;
231 resp.max_vl_num = attr.max_vl_num;
232 resp.sm_sl = attr.sm_sl;
233 resp.subnet_timeout = attr.subnet_timeout;
234 resp.init_type_reply = attr.init_type_reply;
235 resp.active_width = attr.active_width;
236 resp.active_speed = attr.active_speed;
237 resp.phys_state = attr.phys_state;
238
239 if (copy_to_user((void __user *) (unsigned long) cmd.response,
240 &resp, sizeof resp))
241 return -EFAULT;
242
243 return in_len;
244}
245
246ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
247 const char __user *buf,
248 int in_len, int out_len)
249{
250 struct ib_uverbs_query_gid cmd;
251 struct ib_uverbs_query_gid_resp resp;
252 int ret;
253
254 if (out_len < sizeof resp)
255 return -ENOSPC;
256
257 if (copy_from_user(&cmd, buf, sizeof cmd))
258 return -EFAULT;
259
260 memset(&resp, 0, sizeof resp);
261
262 ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
263 (union ib_gid *) resp.gid);
264 if (ret)
265 return ret;
266
267 if (copy_to_user((void __user *) (unsigned long) cmd.response,
268 &resp, sizeof resp))
269 return -EFAULT;
270
271 return in_len;
272}
273
274ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
275 const char __user *buf,
276 int in_len, int out_len)
277{
278 struct ib_uverbs_query_pkey cmd;
279 struct ib_uverbs_query_pkey_resp resp;
280 int ret;
281
282 if (out_len < sizeof resp)
283 return -ENOSPC;
284
285 if (copy_from_user(&cmd, buf, sizeof cmd))
286 return -EFAULT;
287
288 memset(&resp, 0, sizeof resp);
289
290 ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
291 &resp.pkey);
292 if (ret)
293 return ret;
294
295 if (copy_to_user((void __user *) (unsigned long) cmd.response,
296 &resp, sizeof resp))
297 return -EFAULT;
298
299 return in_len;
300}
301
302ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
303 const char __user *buf,
304 int in_len, int out_len)
305{
306 struct ib_uverbs_alloc_pd cmd;
307 struct ib_uverbs_alloc_pd_resp resp;
308 struct ib_udata udata;
309 struct ib_uobject *uobj;
310 struct ib_pd *pd;
311 int ret;
312
313 if (out_len < sizeof resp)
314 return -ENOSPC;
315
316 if (copy_from_user(&cmd, buf, sizeof cmd))
317 return -EFAULT;
318
319 INIT_UDATA(&udata, buf + sizeof cmd,
320 (unsigned long) cmd.response + sizeof resp,
321 in_len - sizeof cmd, out_len - sizeof resp);
322
323 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
324 if (!uobj)
325 return -ENOMEM;
326
327 uobj->context = file->ucontext;
328
329 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
330 file->ucontext, &udata);
331 if (IS_ERR(pd)) {
332 ret = PTR_ERR(pd);
333 goto err;
334 }
335
336 pd->device = file->device->ib_dev;
337 pd->uobject = uobj;
338 atomic_set(&pd->usecnt, 0);
339
340retry:
341 if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
342 ret = -ENOMEM;
343 goto err_pd;
344 }
345
346 down(&ib_uverbs_idr_mutex);
347 ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
348 up(&ib_uverbs_idr_mutex);
349
350 if (ret == -EAGAIN)
351 goto retry;
352 if (ret)
353 goto err_pd;
354
355 spin_lock_irq(&file->ucontext->lock);
356 list_add_tail(&uobj->list, &file->ucontext->pd_list);
357 spin_unlock_irq(&file->ucontext->lock);
358
359 memset(&resp, 0, sizeof resp);
360 resp.pd_handle = uobj->id;
361
362 if (copy_to_user((void __user *) (unsigned long) cmd.response,
363 &resp, sizeof resp)) {
364 ret = -EFAULT;
365 goto err_list;
366 }
367
368 return in_len;
369
370err_list:
371 spin_lock_irq(&file->ucontext->lock);
372 list_del(&uobj->list);
373 spin_unlock_irq(&file->ucontext->lock);
374
375 down(&ib_uverbs_idr_mutex);
376 idr_remove(&ib_uverbs_pd_idr, uobj->id);
377 up(&ib_uverbs_idr_mutex);
378
379err_pd:
380 ib_dealloc_pd(pd);
381
382err:
383 kfree(uobj);
384 return ret;
385}
386
387ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
388 const char __user *buf,
389 int in_len, int out_len)
390{
391 struct ib_uverbs_dealloc_pd cmd;
392 struct ib_pd *pd;
393 struct ib_uobject *uobj;
394 int ret = -EINVAL;
395
396 if (copy_from_user(&cmd, buf, sizeof cmd))
397 return -EFAULT;
398
399 down(&ib_uverbs_idr_mutex);
400
401 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
402 if (!pd || pd->uobject->context != file->ucontext)
403 goto out;
404
405 uobj = pd->uobject;
406
407 ret = ib_dealloc_pd(pd);
408 if (ret)
409 goto out;
410
411 idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
412
413 spin_lock_irq(&file->ucontext->lock);
414 list_del(&uobj->list);
415 spin_unlock_irq(&file->ucontext->lock);
416
417 kfree(uobj);
418
419out:
420 up(&ib_uverbs_idr_mutex);
421
422 return ret ? ret : in_len;
423}
424
425ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
426 const char __user *buf, int in_len,
427 int out_len)
428{
429 struct ib_uverbs_reg_mr cmd;
430 struct ib_uverbs_reg_mr_resp resp;
431 struct ib_udata udata;
432 struct ib_umem_object *obj;
433 struct ib_pd *pd;
434 struct ib_mr *mr;
435 int ret;
436
437 if (out_len < sizeof resp)
438 return -ENOSPC;
439
440 if (copy_from_user(&cmd, buf, sizeof cmd))
441 return -EFAULT;
442
443 INIT_UDATA(&udata, buf + sizeof cmd,
444 (unsigned long) cmd.response + sizeof resp,
445 in_len - sizeof cmd, out_len - sizeof resp);
446
447 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
448 return -EINVAL;
449
450 obj = kmalloc(sizeof *obj, GFP_KERNEL);
451 if (!obj)
452 return -ENOMEM;
453
454 obj->uobject.context = file->ucontext;
455
456 /*
457 * We ask for writable memory if any access flags other than
458 * "remote read" are set. "Local write" and "remote write"
459 * obviously require write access. "Remote atomic" can do
460 * things like fetch and add, which will modify memory, and
461 * "MW bind" can change permissions by binding a window.
462 */
463 ret = ib_umem_get(file->device->ib_dev, &obj->umem,
464 (void *) (unsigned long) cmd.start, cmd.length,
465 !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
466 if (ret)
467 goto err_free;
468
469 obj->umem.virt_base = cmd.hca_va;
470
471 down(&ib_uverbs_idr_mutex);
472
473 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
474 if (!pd || pd->uobject->context != file->ucontext) {
475 ret = -EINVAL;
476 goto err_up;
477 }
478
479 if (!pd->device->reg_user_mr) {
480 ret = -ENOSYS;
481 goto err_up;
482 }
483
484 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
485 if (IS_ERR(mr)) {
486 ret = PTR_ERR(mr);
487 goto err_up;
488 }
489
490 mr->device = pd->device;
491 mr->pd = pd;
492 mr->uobject = &obj->uobject;
493 atomic_inc(&pd->usecnt);
494 atomic_set(&mr->usecnt, 0);
495
496 memset(&resp, 0, sizeof resp);
497 resp.lkey = mr->lkey;
498 resp.rkey = mr->rkey;
499
500retry:
501 if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
502 ret = -ENOMEM;
503 goto err_unreg;
504 }
505
506 ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
507
508 if (ret == -EAGAIN)
509 goto retry;
510 if (ret)
511 goto err_unreg;
512
513 resp.mr_handle = obj->uobject.id;
514
515 spin_lock_irq(&file->ucontext->lock);
516 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
517 spin_unlock_irq(&file->ucontext->lock);
518
519 if (copy_to_user((void __user *) (unsigned long) cmd.response,
520 &resp, sizeof resp)) {
521 ret = -EFAULT;
522 goto err_list;
523 }
524
525 up(&ib_uverbs_idr_mutex);
526
527 return in_len;
528
529err_list:
530 spin_lock_irq(&file->ucontext->lock);
531 list_del(&obj->uobject.list);
532 spin_unlock_irq(&file->ucontext->lock);
533
534err_unreg:
535 ib_dereg_mr(mr);
536
537err_up:
538 up(&ib_uverbs_idr_mutex);
539
540 ib_umem_release(file->device->ib_dev, &obj->umem);
541
542err_free:
543 kfree(obj);
544 return ret;
545}
546
547ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
548 const char __user *buf, int in_len,
549 int out_len)
550{
551 struct ib_uverbs_dereg_mr cmd;
552 struct ib_mr *mr;
553 struct ib_umem_object *memobj;
554 int ret = -EINVAL;
555
556 if (copy_from_user(&cmd, buf, sizeof cmd))
557 return -EFAULT;
558
559 down(&ib_uverbs_idr_mutex);
560
561 mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
562 if (!mr || mr->uobject->context != file->ucontext)
563 goto out;
564
565 memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
566
567 ret = ib_dereg_mr(mr);
568 if (ret)
569 goto out;
570
571 idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
572
573 spin_lock_irq(&file->ucontext->lock);
574 list_del(&memobj->uobject.list);
575 spin_unlock_irq(&file->ucontext->lock);
576
577 ib_umem_release(file->device->ib_dev, &memobj->umem);
578 kfree(memobj);
579
580out:
581 up(&ib_uverbs_idr_mutex);
582
583 return ret ? ret : in_len;
584}
585
586ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
587 const char __user *buf, int in_len,
588 int out_len)
589{
590 struct ib_uverbs_create_cq cmd;
591 struct ib_uverbs_create_cq_resp resp;
592 struct ib_udata udata;
593 struct ib_uobject *uobj;
594 struct ib_cq *cq;
595 int ret;
596
597 if (out_len < sizeof resp)
598 return -ENOSPC;
599
600 if (copy_from_user(&cmd, buf, sizeof cmd))
601 return -EFAULT;
602
603 INIT_UDATA(&udata, buf + sizeof cmd,
604 (unsigned long) cmd.response + sizeof resp,
605 in_len - sizeof cmd, out_len - sizeof resp);
606
607 if (cmd.event_handler >= file->device->num_comp)
608 return -EINVAL;
609
610 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
611 if (!uobj)
612 return -ENOMEM;
613
614 uobj->user_handle = cmd.user_handle;
615 uobj->context = file->ucontext;
616
617 cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
618 file->ucontext, &udata);
619 if (IS_ERR(cq)) {
620 ret = PTR_ERR(cq);
621 goto err;
622 }
623
624 cq->device = file->device->ib_dev;
625 cq->uobject = uobj;
626 cq->comp_handler = ib_uverbs_comp_handler;
627 cq->event_handler = ib_uverbs_cq_event_handler;
628 cq->cq_context = file;
629 atomic_set(&cq->usecnt, 0);
630
631retry:
632 if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
633 ret = -ENOMEM;
634 goto err_cq;
635 }
636
637 down(&ib_uverbs_idr_mutex);
638 ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
639 up(&ib_uverbs_idr_mutex);
640
641 if (ret == -EAGAIN)
642 goto retry;
643 if (ret)
644 goto err_cq;
645
646 spin_lock_irq(&file->ucontext->lock);
647 list_add_tail(&uobj->list, &file->ucontext->cq_list);
648 spin_unlock_irq(&file->ucontext->lock);
649
650 memset(&resp, 0, sizeof resp);
651 resp.cq_handle = uobj->id;
652 resp.cqe = cq->cqe;
653
654 if (copy_to_user((void __user *) (unsigned long) cmd.response,
655 &resp, sizeof resp)) {
656 ret = -EFAULT;
657 goto err_list;
658 }
659
660 return in_len;
661
662err_list:
663 spin_lock_irq(&file->ucontext->lock);
664 list_del(&uobj->list);
665 spin_unlock_irq(&file->ucontext->lock);
666
667 down(&ib_uverbs_idr_mutex);
668 idr_remove(&ib_uverbs_cq_idr, uobj->id);
669 up(&ib_uverbs_idr_mutex);
670
671err_cq:
672 ib_destroy_cq(cq);
673
674err:
675 kfree(uobj);
676 return ret;
677}
678
679ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
680 const char __user *buf, int in_len,
681 int out_len)
682{
683 struct ib_uverbs_destroy_cq cmd;
684 struct ib_cq *cq;
685 struct ib_uobject *uobj;
686 int ret = -EINVAL;
687
688 if (copy_from_user(&cmd, buf, sizeof cmd))
689 return -EFAULT;
690
691 down(&ib_uverbs_idr_mutex);
692
693 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
694 if (!cq || cq->uobject->context != file->ucontext)
695 goto out;
696
697 uobj = cq->uobject;
698
699 ret = ib_destroy_cq(cq);
700 if (ret)
701 goto out;
702
703 idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
704
705 spin_lock_irq(&file->ucontext->lock);
706 list_del(&uobj->list);
707 spin_unlock_irq(&file->ucontext->lock);
708
709 kfree(uobj);
710
711out:
712 up(&ib_uverbs_idr_mutex);
713
714 return ret ? ret : in_len;
715}
716
717ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
718 const char __user *buf, int in_len,
719 int out_len)
720{
721 struct ib_uverbs_create_qp cmd;
722 struct ib_uverbs_create_qp_resp resp;
723 struct ib_udata udata;
724 struct ib_uobject *uobj;
725 struct ib_pd *pd;
726 struct ib_cq *scq, *rcq;
727 struct ib_qp *qp;
728 struct ib_qp_init_attr attr;
729 int ret;
730
731 if (out_len < sizeof resp)
732 return -ENOSPC;
733
734 if (copy_from_user(&cmd, buf, sizeof cmd))
735 return -EFAULT;
736
737 INIT_UDATA(&udata, buf + sizeof cmd,
738 (unsigned long) cmd.response + sizeof resp,
739 in_len - sizeof cmd, out_len - sizeof resp);
740
741 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
742 if (!uobj)
743 return -ENOMEM;
744
745 down(&ib_uverbs_idr_mutex);
746
747 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
748 scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
749 rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
750
751 if (!pd || pd->uobject->context != file->ucontext ||
752 !scq || scq->uobject->context != file->ucontext ||
753 !rcq || rcq->uobject->context != file->ucontext) {
754 ret = -EINVAL;
755 goto err_up;
756 }
757
758 attr.event_handler = ib_uverbs_qp_event_handler;
759 attr.qp_context = file;
760 attr.send_cq = scq;
761 attr.recv_cq = rcq;
762 attr.srq = NULL;
763 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
764 attr.qp_type = cmd.qp_type;
765
766 attr.cap.max_send_wr = cmd.max_send_wr;
767 attr.cap.max_recv_wr = cmd.max_recv_wr;
768 attr.cap.max_send_sge = cmd.max_send_sge;
769 attr.cap.max_recv_sge = cmd.max_recv_sge;
770 attr.cap.max_inline_data = cmd.max_inline_data;
771
772 uobj->user_handle = cmd.user_handle;
773 uobj->context = file->ucontext;
774
775 qp = pd->device->create_qp(pd, &attr, &udata);
776 if (IS_ERR(qp)) {
777 ret = PTR_ERR(qp);
778 goto err_up;
779 }
780
781 qp->device = pd->device;
782 qp->pd = pd;
783 qp->send_cq = attr.send_cq;
784 qp->recv_cq = attr.recv_cq;
785 qp->srq = attr.srq;
786 qp->uobject = uobj;
787 qp->event_handler = attr.event_handler;
788 qp->qp_context = attr.qp_context;
789 qp->qp_type = attr.qp_type;
790 atomic_inc(&pd->usecnt);
791 atomic_inc(&attr.send_cq->usecnt);
792 atomic_inc(&attr.recv_cq->usecnt);
793 if (attr.srq)
794 atomic_inc(&attr.srq->usecnt);
795
796 memset(&resp, 0, sizeof resp);
797 resp.qpn = qp->qp_num;
798
799retry:
800 if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
801 ret = -ENOMEM;
802 goto err_destroy;
803 }
804
805 ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
806
807 if (ret == -EAGAIN)
808 goto retry;
809 if (ret)
810 goto err_destroy;
811
812 resp.qp_handle = uobj->id;
813
814 spin_lock_irq(&file->ucontext->lock);
815 list_add_tail(&uobj->list, &file->ucontext->qp_list);
816 spin_unlock_irq(&file->ucontext->lock);
817
818 if (copy_to_user((void __user *) (unsigned long) cmd.response,
819 &resp, sizeof resp)) {
820 ret = -EFAULT;
821 goto err_list;
822 }
823
824 up(&ib_uverbs_idr_mutex);
825
826 return in_len;
827
828err_list:
829 spin_lock_irq(&file->ucontext->lock);
830 list_del(&uobj->list);
831 spin_unlock_irq(&file->ucontext->lock);
832
833err_destroy:
834 ib_destroy_qp(qp);
835
836err_up:
837 up(&ib_uverbs_idr_mutex);
838
839 kfree(uobj);
840 return ret;
841}
842
843ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
844 const char __user *buf, int in_len,
845 int out_len)
846{
847 struct ib_uverbs_modify_qp cmd;
848 struct ib_qp *qp;
849 struct ib_qp_attr *attr;
850 int ret;
851
852 if (copy_from_user(&cmd, buf, sizeof cmd))
853 return -EFAULT;
854
855 attr = kmalloc(sizeof *attr, GFP_KERNEL);
856 if (!attr)
857 return -ENOMEM;
858
859 down(&ib_uverbs_idr_mutex);
860
861 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
862 if (!qp || qp->uobject->context != file->ucontext) {
863 ret = -EINVAL;
864 goto out;
865 }
866
867 attr->qp_state = cmd.qp_state;
868 attr->cur_qp_state = cmd.cur_qp_state;
869 attr->path_mtu = cmd.path_mtu;
870 attr->path_mig_state = cmd.path_mig_state;
871 attr->qkey = cmd.qkey;
872 attr->rq_psn = cmd.rq_psn;
873 attr->sq_psn = cmd.sq_psn;
874 attr->dest_qp_num = cmd.dest_qp_num;
875 attr->qp_access_flags = cmd.qp_access_flags;
876 attr->pkey_index = cmd.pkey_index;
877 attr->alt_pkey_index = cmd.pkey_index;
878 attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
879 attr->max_rd_atomic = cmd.max_rd_atomic;
880 attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
881 attr->min_rnr_timer = cmd.min_rnr_timer;
882 attr->port_num = cmd.port_num;
883 attr->timeout = cmd.timeout;
884 attr->retry_cnt = cmd.retry_cnt;
885 attr->rnr_retry = cmd.rnr_retry;
886 attr->alt_port_num = cmd.alt_port_num;
887 attr->alt_timeout = cmd.alt_timeout;
888
889 memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
890 attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
891 attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
892 attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
893 attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
894 attr->ah_attr.dlid = cmd.dest.dlid;
895 attr->ah_attr.sl = cmd.dest.sl;
896 attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
897 attr->ah_attr.static_rate = cmd.dest.static_rate;
898 attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
899 attr->ah_attr.port_num = cmd.dest.port_num;
900
901 memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
902 attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
903 attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
904 attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
905 attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
906 attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
907 attr->alt_ah_attr.sl = cmd.alt_dest.sl;
908 attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
909 attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
910 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
911 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
912
913 ret = ib_modify_qp(qp, attr, cmd.attr_mask);
914 if (ret)
915 goto out;
916
917 ret = in_len;
918
919out:
920 up(&ib_uverbs_idr_mutex);
921 kfree(attr);
922
923 return ret;
924}
925
926ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
927 const char __user *buf, int in_len,
928 int out_len)
929{
930 struct ib_uverbs_destroy_qp cmd;
931 struct ib_qp *qp;
932 struct ib_uobject *uobj;
933 int ret = -EINVAL;
934
935 if (copy_from_user(&cmd, buf, sizeof cmd))
936 return -EFAULT;
937
938 down(&ib_uverbs_idr_mutex);
939
940 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
941 if (!qp || qp->uobject->context != file->ucontext)
942 goto out;
943
944 uobj = qp->uobject;
945
946 ret = ib_destroy_qp(qp);
947 if (ret)
948 goto out;
949
950 idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
951
952 spin_lock_irq(&file->ucontext->lock);
953 list_del(&uobj->list);
954 spin_unlock_irq(&file->ucontext->lock);
955
956 kfree(uobj);
957
958out:
959 up(&ib_uverbs_idr_mutex);
960
961 return ret ? ret : in_len;
962}
963
964ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
965 const char __user *buf, int in_len,
966 int out_len)
967{
968 struct ib_uverbs_attach_mcast cmd;
969 struct ib_qp *qp;
970 int ret = -EINVAL;
971
972 if (copy_from_user(&cmd, buf, sizeof cmd))
973 return -EFAULT;
974
975 down(&ib_uverbs_idr_mutex);
976
977 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
978 if (qp && qp->uobject->context == file->ucontext)
979 ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
980
981 up(&ib_uverbs_idr_mutex);
982
983 return ret ? ret : in_len;
984}
985
986ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
987 const char __user *buf, int in_len,
988 int out_len)
989{
990 struct ib_uverbs_detach_mcast cmd;
991 struct ib_qp *qp;
992 int ret = -EINVAL;
993
994 if (copy_from_user(&cmd, buf, sizeof cmd))
995 return -EFAULT;
996
997 down(&ib_uverbs_idr_mutex);
998
999 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1000 if (qp && qp->uobject->context == file->ucontext)
1001 ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
1002
1003 up(&ib_uverbs_idr_mutex);
1004
1005 return ret ? ret : in_len;
1006}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
new file mode 100644
index 000000000000..eb99e693dec2
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -0,0 +1,710 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
34 */
35
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/device.h>
39#include <linux/err.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/file.h>
43#include <linux/mount.h>
44
45#include <asm/uaccess.h>
46
47#include "uverbs.h"
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("InfiniBand userspace verbs access");
51MODULE_LICENSE("Dual BSD/GPL");
52
53#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
54
55enum {
56 IB_UVERBS_MAJOR = 231,
57 IB_UVERBS_BASE_MINOR = 192,
58 IB_UVERBS_MAX_DEVICES = 32
59};
60
61#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
62
63DECLARE_MUTEX(ib_uverbs_idr_mutex);
64DEFINE_IDR(ib_uverbs_pd_idr);
65DEFINE_IDR(ib_uverbs_mr_idr);
66DEFINE_IDR(ib_uverbs_mw_idr);
67DEFINE_IDR(ib_uverbs_ah_idr);
68DEFINE_IDR(ib_uverbs_cq_idr);
69DEFINE_IDR(ib_uverbs_qp_idr);
70
71static spinlock_t map_lock;
72static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
73
74static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
75 const char __user *buf, int in_len,
76 int out_len) = {
77 [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
78 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
79 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
80 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
81 [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
82 [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
83 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
84 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
85 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
86 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
87 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
88 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
89 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
90 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
91 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
92 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
93 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
94};
95
96static struct vfsmount *uverbs_event_mnt;
97
98static void ib_uverbs_add_one(struct ib_device *device);
99static void ib_uverbs_remove_one(struct ib_device *device);
100
101static int ib_dealloc_ucontext(struct ib_ucontext *context)
102{
103 struct ib_uobject *uobj, *tmp;
104
105 if (!context)
106 return 0;
107
108 down(&ib_uverbs_idr_mutex);
109
110 /* XXX Free AHs */
111
112 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
113 struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
114 idr_remove(&ib_uverbs_qp_idr, uobj->id);
115 ib_destroy_qp(qp);
116 list_del(&uobj->list);
117 kfree(uobj);
118 }
119
120 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
121 struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
122 idr_remove(&ib_uverbs_cq_idr, uobj->id);
123 ib_destroy_cq(cq);
124 list_del(&uobj->list);
125 kfree(uobj);
126 }
127
128 /* XXX Free SRQs */
129 /* XXX Free MWs */
130
131 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
132 struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
133 struct ib_umem_object *memobj;
134
135 idr_remove(&ib_uverbs_mr_idr, uobj->id);
136 ib_dereg_mr(mr);
137
138 memobj = container_of(uobj, struct ib_umem_object, uobject);
139 ib_umem_release_on_close(mr->device, &memobj->umem);
140
141 list_del(&uobj->list);
142 kfree(memobj);
143 }
144
145 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
146 struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
147 idr_remove(&ib_uverbs_pd_idr, uobj->id);
148 ib_dealloc_pd(pd);
149 list_del(&uobj->list);
150 kfree(uobj);
151 }
152
153 up(&ib_uverbs_idr_mutex);
154
155 return context->device->dealloc_ucontext(context);
156}
157
158static void ib_uverbs_release_file(struct kref *ref)
159{
160 struct ib_uverbs_file *file =
161 container_of(ref, struct ib_uverbs_file, ref);
162
163 module_put(file->device->ib_dev->owner);
164 kfree(file);
165}
166
167static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
168 size_t count, loff_t *pos)
169{
170 struct ib_uverbs_event_file *file = filp->private_data;
171 void *event;
172 int eventsz;
173 int ret = 0;
174
175 spin_lock_irq(&file->lock);
176
177 while (list_empty(&file->event_list) && file->fd >= 0) {
178 spin_unlock_irq(&file->lock);
179
180 if (filp->f_flags & O_NONBLOCK)
181 return -EAGAIN;
182
183 if (wait_event_interruptible(file->poll_wait,
184 !list_empty(&file->event_list) ||
185 file->fd < 0))
186 return -ERESTARTSYS;
187
188 spin_lock_irq(&file->lock);
189 }
190
191 if (file->fd < 0) {
192 spin_unlock_irq(&file->lock);
193 return -ENODEV;
194 }
195
196 if (file->is_async) {
197 event = list_entry(file->event_list.next,
198 struct ib_uverbs_async_event, list);
199 eventsz = sizeof (struct ib_uverbs_async_event_desc);
200 } else {
201 event = list_entry(file->event_list.next,
202 struct ib_uverbs_comp_event, list);
203 eventsz = sizeof (struct ib_uverbs_comp_event_desc);
204 }
205
206 if (eventsz > count) {
207 ret = -EINVAL;
208 event = NULL;
209 } else
210 list_del(file->event_list.next);
211
212 spin_unlock_irq(&file->lock);
213
214 if (event) {
215 if (copy_to_user(buf, event, eventsz))
216 ret = -EFAULT;
217 else
218 ret = eventsz;
219 }
220
221 kfree(event);
222
223 return ret;
224}
225
226static unsigned int ib_uverbs_event_poll(struct file *filp,
227 struct poll_table_struct *wait)
228{
229 unsigned int pollflags = 0;
230 struct ib_uverbs_event_file *file = filp->private_data;
231
232 poll_wait(filp, &file->poll_wait, wait);
233
234 spin_lock_irq(&file->lock);
235 if (file->fd < 0)
236 pollflags = POLLERR;
237 else if (!list_empty(&file->event_list))
238 pollflags = POLLIN | POLLRDNORM;
239 spin_unlock_irq(&file->lock);
240
241 return pollflags;
242}
243
244static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
245{
246 struct list_head *entry, *tmp;
247
248 spin_lock_irq(&file->lock);
249 if (file->fd != -1) {
250 file->fd = -1;
251 list_for_each_safe(entry, tmp, &file->event_list)
252 if (file->is_async)
253 kfree(list_entry(entry, struct ib_uverbs_async_event, list));
254 else
255 kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
256 }
257 spin_unlock_irq(&file->lock);
258}
259
260static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
261{
262 struct ib_uverbs_event_file *file = filp->private_data;
263
264 return fasync_helper(fd, filp, on, &file->async_queue);
265}
266
267static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
268{
269 struct ib_uverbs_event_file *file = filp->private_data;
270
271 ib_uverbs_event_release(file);
272 ib_uverbs_event_fasync(-1, filp, 0);
273 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
274
275 return 0;
276}
277
278static struct file_operations uverbs_event_fops = {
279 /*
280 * No .owner field since we artificially create event files,
281 * so there is no increment to the module reference count in
282 * the open path. All event files come from a uverbs command
283 * file, which already takes a module reference, so this is OK.
284 */
285 .read = ib_uverbs_event_read,
286 .poll = ib_uverbs_event_poll,
287 .release = ib_uverbs_event_close,
288 .fasync = ib_uverbs_event_fasync
289};
290
291void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
292{
293 struct ib_uverbs_file *file = cq_context;
294 struct ib_uverbs_comp_event *entry;
295 unsigned long flags;
296
297 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
298 if (!entry)
299 return;
300
301 entry->desc.cq_handle = cq->uobject->user_handle;
302
303 spin_lock_irqsave(&file->comp_file[0].lock, flags);
304 list_add_tail(&entry->list, &file->comp_file[0].event_list);
305 spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
306
307 wake_up_interruptible(&file->comp_file[0].poll_wait);
308 kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
309}
310
311static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
312 __u64 element, __u64 event)
313{
314 struct ib_uverbs_async_event *entry;
315 unsigned long flags;
316
317 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
318 if (!entry)
319 return;
320
321 entry->desc.element = element;
322 entry->desc.event_type = event;
323
324 spin_lock_irqsave(&file->async_file.lock, flags);
325 list_add_tail(&entry->list, &file->async_file.event_list);
326 spin_unlock_irqrestore(&file->async_file.lock, flags);
327
328 wake_up_interruptible(&file->async_file.poll_wait);
329 kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN);
330}
331
332void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
333{
334 ib_uverbs_async_handler(context_ptr,
335 event->element.cq->uobject->user_handle,
336 event->event);
337}
338
339void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
340{
341 ib_uverbs_async_handler(context_ptr,
342 event->element.qp->uobject->user_handle,
343 event->event);
344}
345
346static void ib_uverbs_event_handler(struct ib_event_handler *handler,
347 struct ib_event *event)
348{
349 struct ib_uverbs_file *file =
350 container_of(handler, struct ib_uverbs_file, event_handler);
351
352 ib_uverbs_async_handler(file, event->element.port_num, event->event);
353}
354
355static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
356 struct ib_uverbs_file *uverbs_file)
357{
358 struct file *filp;
359
360 spin_lock_init(&file->lock);
361 INIT_LIST_HEAD(&file->event_list);
362 init_waitqueue_head(&file->poll_wait);
363 file->uverbs_file = uverbs_file;
364 file->async_queue = NULL;
365
366 file->fd = get_unused_fd();
367 if (file->fd < 0)
368 return file->fd;
369
370 filp = get_empty_filp();
371 if (!filp) {
372 put_unused_fd(file->fd);
373 return -ENFILE;
374 }
375
376 filp->f_op = &uverbs_event_fops;
377 filp->f_vfsmnt = mntget(uverbs_event_mnt);
378 filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
379 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
380 filp->f_flags = O_RDONLY;
381 filp->f_mode = FMODE_READ;
382 filp->private_data = file;
383
384 fd_install(file->fd, filp);
385
386 return 0;
387}
388
389static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
390 size_t count, loff_t *pos)
391{
392 struct ib_uverbs_file *file = filp->private_data;
393 struct ib_uverbs_cmd_hdr hdr;
394
395 if (count < sizeof hdr)
396 return -EINVAL;
397
398 if (copy_from_user(&hdr, buf, sizeof hdr))
399 return -EFAULT;
400
401 if (hdr.in_words * 4 != count)
402 return -EINVAL;
403
404 if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
405 return -EINVAL;
406
407 if (!file->ucontext &&
408 hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
409 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
410 return -EINVAL;
411
412 return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
413 hdr.in_words * 4, hdr.out_words * 4);
414}
415
416static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
417{
418 struct ib_uverbs_file *file = filp->private_data;
419
420 if (!file->ucontext)
421 return -ENODEV;
422 else
423 return file->device->ib_dev->mmap(file->ucontext, vma);
424}
425
426static int ib_uverbs_open(struct inode *inode, struct file *filp)
427{
428 struct ib_uverbs_device *dev =
429 container_of(inode->i_cdev, struct ib_uverbs_device, dev);
430 struct ib_uverbs_file *file;
431 int i = 0;
432 int ret;
433
434 if (!try_module_get(dev->ib_dev->owner))
435 return -ENODEV;
436
437 file = kmalloc(sizeof *file +
438 (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
439 GFP_KERNEL);
440 if (!file)
441 return -ENOMEM;
442
443 file->device = dev;
444 kref_init(&file->ref);
445
446 file->ucontext = NULL;
447
448 ret = ib_uverbs_event_init(&file->async_file, file);
449 if (ret)
450 goto err;
451
452 file->async_file.is_async = 1;
453
454 kref_get(&file->ref);
455
456 for (i = 0; i < dev->num_comp; ++i) {
457 ret = ib_uverbs_event_init(&file->comp_file[i], file);
458 if (ret)
459 goto err_async;
460 kref_get(&file->ref);
461 file->comp_file[i].is_async = 0;
462 }
463
464
465 filp->private_data = file;
466
467 INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
468 ib_uverbs_event_handler);
469 if (ib_register_event_handler(&file->event_handler))
470 goto err_async;
471
472 return 0;
473
474err_async:
475 while (i--)
476 ib_uverbs_event_release(&file->comp_file[i]);
477
478 ib_uverbs_event_release(&file->async_file);
479
480err:
481 kref_put(&file->ref, ib_uverbs_release_file);
482
483 return ret;
484}
485
486static int ib_uverbs_close(struct inode *inode, struct file *filp)
487{
488 struct ib_uverbs_file *file = filp->private_data;
489 int i;
490
491 ib_unregister_event_handler(&file->event_handler);
492 ib_uverbs_event_release(&file->async_file);
493 ib_dealloc_ucontext(file->ucontext);
494
495 for (i = 0; i < file->device->num_comp; ++i)
496 ib_uverbs_event_release(&file->comp_file[i]);
497
498 kref_put(&file->ref, ib_uverbs_release_file);
499
500 return 0;
501}
502
503static struct file_operations uverbs_fops = {
504 .owner = THIS_MODULE,
505 .write = ib_uverbs_write,
506 .open = ib_uverbs_open,
507 .release = ib_uverbs_close
508};
509
510static struct file_operations uverbs_mmap_fops = {
511 .owner = THIS_MODULE,
512 .write = ib_uverbs_write,
513 .mmap = ib_uverbs_mmap,
514 .open = ib_uverbs_open,
515 .release = ib_uverbs_close
516};
517
518static struct ib_client uverbs_client = {
519 .name = "uverbs",
520 .add = ib_uverbs_add_one,
521 .remove = ib_uverbs_remove_one
522};
523
524static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
525{
526 struct ib_uverbs_device *dev =
527 container_of(class_dev, struct ib_uverbs_device, class_dev);
528
529 return sprintf(buf, "%s\n", dev->ib_dev->name);
530}
531static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
532
533static void ib_uverbs_release_class_dev(struct class_device *class_dev)
534{
535 struct ib_uverbs_device *dev =
536 container_of(class_dev, struct ib_uverbs_device, class_dev);
537
538 cdev_del(&dev->dev);
539 clear_bit(dev->devnum, dev_map);
540 kfree(dev);
541}
542
543static struct class uverbs_class = {
544 .name = "infiniband_verbs",
545 .release = ib_uverbs_release_class_dev
546};
547
548static ssize_t show_abi_version(struct class *class, char *buf)
549{
550 return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
551}
552static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
553
554static void ib_uverbs_add_one(struct ib_device *device)
555{
556 struct ib_uverbs_device *uverbs_dev;
557
558 if (!device->alloc_ucontext)
559 return;
560
561 uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
562 if (!uverbs_dev)
563 return;
564
565 memset(uverbs_dev, 0, sizeof *uverbs_dev);
566
567 spin_lock(&map_lock);
568 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
569 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
570 spin_unlock(&map_lock);
571 goto err;
572 }
573 set_bit(uverbs_dev->devnum, dev_map);
574 spin_unlock(&map_lock);
575
576 uverbs_dev->ib_dev = device;
577 uverbs_dev->num_comp = 1;
578
579 if (device->mmap)
580 cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
581 else
582 cdev_init(&uverbs_dev->dev, &uverbs_fops);
583 uverbs_dev->dev.owner = THIS_MODULE;
584 kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
585 if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
586 goto err;
587
588 uverbs_dev->class_dev.class = &uverbs_class;
589 uverbs_dev->class_dev.dev = device->dma_device;
590 uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
591 snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
592 if (class_device_register(&uverbs_dev->class_dev))
593 goto err_cdev;
594
595 if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
596 goto err_class;
597
598 ib_set_client_data(device, &uverbs_client, uverbs_dev);
599
600 return;
601
602err_class:
603 class_device_unregister(&uverbs_dev->class_dev);
604
605err_cdev:
606 cdev_del(&uverbs_dev->dev);
607 clear_bit(uverbs_dev->devnum, dev_map);
608
609err:
610 kfree(uverbs_dev);
611 return;
612}
613
614static void ib_uverbs_remove_one(struct ib_device *device)
615{
616 struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
617
618 if (!uverbs_dev)
619 return;
620
621 class_device_unregister(&uverbs_dev->class_dev);
622}
623
624static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
625 const char *dev_name, void *data)
626{
627 return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
628 INFINIBANDEVENTFS_MAGIC);
629}
630
631static struct file_system_type uverbs_event_fs = {
632 /* No owner field so module can be unloaded */
633 .name = "infinibandeventfs",
634 .get_sb = uverbs_event_get_sb,
635 .kill_sb = kill_litter_super
636};
637
638static int __init ib_uverbs_init(void)
639{
640 int ret;
641
642 spin_lock_init(&map_lock);
643
644 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
645 "infiniband_verbs");
646 if (ret) {
647 printk(KERN_ERR "user_verbs: couldn't register device number\n");
648 goto out;
649 }
650
651 ret = class_register(&uverbs_class);
652 if (ret) {
653 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
654 goto out_chrdev;
655 }
656
657 ret = class_create_file(&uverbs_class, &class_attr_abi_version);
658 if (ret) {
659 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
660 goto out_class;
661 }
662
663 ret = register_filesystem(&uverbs_event_fs);
664 if (ret) {
665 printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
666 goto out_class;
667 }
668
669 uverbs_event_mnt = kern_mount(&uverbs_event_fs);
670 if (IS_ERR(uverbs_event_mnt)) {
671 ret = PTR_ERR(uverbs_event_mnt);
672 printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
673 goto out_fs;
674 }
675
676 ret = ib_register_client(&uverbs_client);
677 if (ret) {
678 printk(KERN_ERR "user_verbs: couldn't register client\n");
679 goto out_mnt;
680 }
681
682 return 0;
683
684out_mnt:
685 mntput(uverbs_event_mnt);
686
687out_fs:
688 unregister_filesystem(&uverbs_event_fs);
689
690out_class:
691 class_unregister(&uverbs_class);
692
693out_chrdev:
694 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
695
696out:
697 return ret;
698}
699
700static void __exit ib_uverbs_cleanup(void)
701{
702 ib_unregister_client(&uverbs_client);
703 mntput(uverbs_event_mnt);
704 unregister_filesystem(&uverbs_event_fs);
705 class_unregister(&uverbs_class);
706 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
707}
708
709module_init(ib_uverbs_init);
710module_exit(ib_uverbs_cleanup);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
new file mode 100644
index 000000000000..ed550f6595bd
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
34 */
35
36#include <linux/mm.h>
37#include <linux/dma-mapping.h>
38
39#include "uverbs.h"
40
41struct ib_umem_account_work {
42 struct work_struct work;
43 struct mm_struct *mm;
44 unsigned long diff;
45};
46
47
48static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
49{
50 struct ib_umem_chunk *chunk, *tmp;
51 int i;
52
53 list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
54 dma_unmap_sg(dev->dma_device, chunk->page_list,
55 chunk->nents, DMA_BIDIRECTIONAL);
56 for (i = 0; i < chunk->nents; ++i) {
57 if (umem->writable && dirty)
58 set_page_dirty_lock(chunk->page_list[i].page);
59 put_page(chunk->page_list[i].page);
60 }
61
62 kfree(chunk);
63 }
64}
65
66int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
67 void *addr, size_t size, int write)
68{
69 struct page **page_list;
70 struct ib_umem_chunk *chunk;
71 unsigned long locked;
72 unsigned long lock_limit;
73 unsigned long cur_base;
74 unsigned long npages;
75 int ret = 0;
76 int off;
77 int i;
78
79 if (!can_do_mlock())
80 return -EPERM;
81
82 page_list = (struct page **) __get_free_page(GFP_KERNEL);
83 if (!page_list)
84 return -ENOMEM;
85
86 mem->user_base = (unsigned long) addr;
87 mem->length = size;
88 mem->offset = (unsigned long) addr & ~PAGE_MASK;
89 mem->page_size = PAGE_SIZE;
90 mem->writable = write;
91
92 INIT_LIST_HEAD(&mem->chunk_list);
93
94 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
95
96 down_write(&current->mm->mmap_sem);
97
98 locked = npages + current->mm->locked_vm;
99 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
100
101 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
102 ret = -ENOMEM;
103 goto out;
104 }
105
106 cur_base = (unsigned long) addr & PAGE_MASK;
107
108 while (npages) {
109 ret = get_user_pages(current, current->mm, cur_base,
110 min_t(int, npages,
111 PAGE_SIZE / sizeof (struct page *)),
112 1, !write, page_list, NULL);
113
114 if (ret < 0)
115 goto out;
116
117 cur_base += ret * PAGE_SIZE;
118 npages -= ret;
119
120 off = 0;
121
122 while (ret) {
123 chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
124 min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
125 GFP_KERNEL);
126 if (!chunk) {
127 ret = -ENOMEM;
128 goto out;
129 }
130
131 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
132 for (i = 0; i < chunk->nents; ++i) {
133 chunk->page_list[i].page = page_list[i + off];
134 chunk->page_list[i].offset = 0;
135 chunk->page_list[i].length = PAGE_SIZE;
136 }
137
138 chunk->nmap = dma_map_sg(dev->dma_device,
139 &chunk->page_list[0],
140 chunk->nents,
141 DMA_BIDIRECTIONAL);
142 if (chunk->nmap <= 0) {
143 for (i = 0; i < chunk->nents; ++i)
144 put_page(chunk->page_list[i].page);
145 kfree(chunk);
146
147 ret = -ENOMEM;
148 goto out;
149 }
150
151 ret -= chunk->nents;
152 off += chunk->nents;
153 list_add_tail(&chunk->list, &mem->chunk_list);
154 }
155
156 ret = 0;
157 }
158
159out:
160 if (ret < 0)
161 __ib_umem_release(dev, mem, 0);
162 else
163 current->mm->locked_vm = locked;
164
165 up_write(&current->mm->mmap_sem);
166 free_page((unsigned long) page_list);
167
168 return ret;
169}
170
171void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
172{
173 __ib_umem_release(dev, umem, 1);
174
175 down_write(&current->mm->mmap_sem);
176 current->mm->locked_vm -=
177 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
178 up_write(&current->mm->mmap_sem);
179}
180
181static void ib_umem_account(void *work_ptr)
182{
183 struct ib_umem_account_work *work = work_ptr;
184
185 down_write(&work->mm->mmap_sem);
186 work->mm->locked_vm -= work->diff;
187 up_write(&work->mm->mmap_sem);
188 mmput(work->mm);
189 kfree(work);
190}
191
192void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
193{
194 struct ib_umem_account_work *work;
195 struct mm_struct *mm;
196
197 __ib_umem_release(dev, umem, 1);
198
199 mm = get_task_mm(current);
200 if (!mm)
201 return;
202
203 /*
204 * We may be called with the mm's mmap_sem already held. This
205 * can happen when a userspace munmap() is the call that drops
206 * the last reference to our file and calls our release
207 * method. If there are memory regions to destroy, we'll end
208 * up here and not be able to take the mmap_sem. Therefore we
209 * defer the vm_locked accounting to the system workqueue.
210 */
211
212 work = kmalloc(sizeof *work, GFP_KERNEL);
213 if (!work)
214 return;
215
216 INIT_WORK(&work->work, ib_umem_account, work);
217 work->mm = mm;
218 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
219
220 schedule_work(&work->work);
221}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7c08ed0cd7dd..506fdf1f2a26 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -4,6 +4,7 @@
4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Cisco Systems. All rights reserved.
7 * 8 *
8 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU 10 * licenses. You may choose to be licensed under the terms of the GNU
@@ -40,6 +41,7 @@
40#include <linux/err.h> 41#include <linux/err.h>
41 42
42#include <ib_verbs.h> 43#include <ib_verbs.h>
44#include <ib_cache.h>
43 45
44/* Protection domains */ 46/* Protection domains */
45 47
@@ -47,10 +49,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
47{ 49{
48 struct ib_pd *pd; 50 struct ib_pd *pd;
49 51
50 pd = device->alloc_pd(device); 52 pd = device->alloc_pd(device, NULL, NULL);
51 53
52 if (!IS_ERR(pd)) { 54 if (!IS_ERR(pd)) {
53 pd->device = device; 55 pd->device = device;
56 pd->uobject = NULL;
54 atomic_set(&pd->usecnt, 0); 57 atomic_set(&pd->usecnt, 0);
55 } 58 }
56 59
@@ -76,8 +79,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
76 ah = pd->device->create_ah(pd, ah_attr); 79 ah = pd->device->create_ah(pd, ah_attr);
77 80
78 if (!IS_ERR(ah)) { 81 if (!IS_ERR(ah)) {
79 ah->device = pd->device; 82 ah->device = pd->device;
80 ah->pd = pd; 83 ah->pd = pd;
84 ah->uobject = NULL;
81 atomic_inc(&pd->usecnt); 85 atomic_inc(&pd->usecnt);
82 } 86 }
83 87
@@ -85,6 +89,40 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
85} 89}
86EXPORT_SYMBOL(ib_create_ah); 90EXPORT_SYMBOL(ib_create_ah);
87 91
92struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
93 struct ib_grh *grh, u8 port_num)
94{
95 struct ib_ah_attr ah_attr;
96 u32 flow_class;
97 u16 gid_index;
98 int ret;
99
100 memset(&ah_attr, 0, sizeof ah_attr);
101 ah_attr.dlid = wc->slid;
102 ah_attr.sl = wc->sl;
103 ah_attr.src_path_bits = wc->dlid_path_bits;
104 ah_attr.port_num = port_num;
105
106 if (wc->wc_flags & IB_WC_GRH) {
107 ah_attr.ah_flags = IB_AH_GRH;
108 ah_attr.grh.dgid = grh->dgid;
109
110 ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num,
111 &gid_index);
112 if (ret)
113 return ERR_PTR(ret);
114
115 ah_attr.grh.sgid_index = (u8) gid_index;
116 flow_class = be32_to_cpu(grh->version_tclass_flow);
117 ah_attr.grh.flow_label = flow_class & 0xFFFFF;
118 ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF;
119 ah_attr.grh.hop_limit = grh->hop_limit;
120 }
121
122 return ib_create_ah(pd, &ah_attr);
123}
124EXPORT_SYMBOL(ib_create_ah_from_wc);
125
88int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 126int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
89{ 127{
90 return ah->device->modify_ah ? 128 return ah->device->modify_ah ?
@@ -122,7 +160,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
122{ 160{
123 struct ib_qp *qp; 161 struct ib_qp *qp;
124 162
125 qp = pd->device->create_qp(pd, qp_init_attr); 163 qp = pd->device->create_qp(pd, qp_init_attr, NULL);
126 164
127 if (!IS_ERR(qp)) { 165 if (!IS_ERR(qp)) {
128 qp->device = pd->device; 166 qp->device = pd->device;
@@ -130,6 +168,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
130 qp->send_cq = qp_init_attr->send_cq; 168 qp->send_cq = qp_init_attr->send_cq;
131 qp->recv_cq = qp_init_attr->recv_cq; 169 qp->recv_cq = qp_init_attr->recv_cq;
132 qp->srq = qp_init_attr->srq; 170 qp->srq = qp_init_attr->srq;
171 qp->uobject = NULL;
133 qp->event_handler = qp_init_attr->event_handler; 172 qp->event_handler = qp_init_attr->event_handler;
134 qp->qp_context = qp_init_attr->qp_context; 173 qp->qp_context = qp_init_attr->qp_context;
135 qp->qp_type = qp_init_attr->qp_type; 174 qp->qp_type = qp_init_attr->qp_type;
@@ -197,10 +236,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
197{ 236{
198 struct ib_cq *cq; 237 struct ib_cq *cq;
199 238
200 cq = device->create_cq(device, cqe); 239 cq = device->create_cq(device, cqe, NULL, NULL);
201 240
202 if (!IS_ERR(cq)) { 241 if (!IS_ERR(cq)) {
203 cq->device = device; 242 cq->device = device;
243 cq->uobject = NULL;
204 cq->comp_handler = comp_handler; 244 cq->comp_handler = comp_handler;
205 cq->event_handler = event_handler; 245 cq->event_handler = event_handler;
206 cq->cq_context = cq_context; 246 cq->cq_context = cq_context;
@@ -245,8 +285,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
245 mr = pd->device->get_dma_mr(pd, mr_access_flags); 285 mr = pd->device->get_dma_mr(pd, mr_access_flags);
246 286
247 if (!IS_ERR(mr)) { 287 if (!IS_ERR(mr)) {
248 mr->device = pd->device; 288 mr->device = pd->device;
249 mr->pd = pd; 289 mr->pd = pd;
290 mr->uobject = NULL;
250 atomic_inc(&pd->usecnt); 291 atomic_inc(&pd->usecnt);
251 atomic_set(&mr->usecnt, 0); 292 atomic_set(&mr->usecnt, 0);
252 } 293 }
@@ -267,8 +308,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
267 mr_access_flags, iova_start); 308 mr_access_flags, iova_start);
268 309
269 if (!IS_ERR(mr)) { 310 if (!IS_ERR(mr)) {
270 mr->device = pd->device; 311 mr->device = pd->device;
271 mr->pd = pd; 312 mr->pd = pd;
313 mr->uobject = NULL;
272 atomic_inc(&pd->usecnt); 314 atomic_inc(&pd->usecnt);
273 atomic_set(&mr->usecnt, 0); 315 atomic_set(&mr->usecnt, 0);
274 } 316 }
@@ -344,8 +386,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
344 386
345 mw = pd->device->alloc_mw(pd); 387 mw = pd->device->alloc_mw(pd);
346 if (!IS_ERR(mw)) { 388 if (!IS_ERR(mw)) {
347 mw->device = pd->device; 389 mw->device = pd->device;
348 mw->pd = pd; 390 mw->pd = pd;
391 mw->uobject = NULL;
349 atomic_inc(&pd->usecnt); 392 atomic_inc(&pd->usecnt);
350 } 393 }
351 394
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 085baf393ca4..d58dcbe66488 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index cd9ed958d92f..1557a522d831 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -431,6 +431,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
431 timeout, status); 431 timeout, status);
432} 432}
433 433
434int mthca_cmd_init(struct mthca_dev *dev)
435{
436 sema_init(&dev->cmd.hcr_sem, 1);
437 sema_init(&dev->cmd.poll_sem, 1);
438 dev->cmd.use_events = 0;
439
440 dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
441 MTHCA_HCR_SIZE);
442 if (!dev->hcr) {
443 mthca_err(dev, "Couldn't map command register.");
444 return -ENOMEM;
445 }
446
447 dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
448 MTHCA_MAILBOX_SIZE,
449 MTHCA_MAILBOX_SIZE, 0);
450 if (!dev->cmd.pool) {
451 iounmap(dev->hcr);
452 return -ENOMEM;
453 }
454
455 return 0;
456}
457
458void mthca_cmd_cleanup(struct mthca_dev *dev)
459{
460 pci_pool_destroy(dev->cmd.pool);
461 iounmap(dev->hcr);
462}
463
434/* 464/*
435 * Switch to using events to issue FW commands (should be called after 465 * Switch to using events to issue FW commands (should be called after
436 * event queue to command events has been initialized). 466 * event queue to command events has been initialized).
@@ -489,6 +519,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
489 up(&dev->cmd.poll_sem); 519 up(&dev->cmd.poll_sem);
490} 520}
491 521
522struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
523 unsigned int gfp_mask)
524{
525 struct mthca_mailbox *mailbox;
526
527 mailbox = kmalloc(sizeof *mailbox, gfp_mask);
528 if (!mailbox)
529 return ERR_PTR(-ENOMEM);
530
531 mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
532 if (!mailbox->buf) {
533 kfree(mailbox);
534 return ERR_PTR(-ENOMEM);
535 }
536
537 return mailbox;
538}
539
540void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
541{
542 if (!mailbox)
543 return;
544
545 pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
546 kfree(mailbox);
547}
548
492int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) 549int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
493{ 550{
494 u64 out; 551 u64 out;
@@ -513,20 +570,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
513static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, 570static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
514 u64 virt, u8 *status) 571 u64 virt, u8 *status)
515{ 572{
516 u32 *inbox; 573 struct mthca_mailbox *mailbox;
517 dma_addr_t indma;
518 struct mthca_icm_iter iter; 574 struct mthca_icm_iter iter;
575 __be64 *pages;
519 int lg; 576 int lg;
520 int nent = 0; 577 int nent = 0;
521 int i; 578 int i;
522 int err = 0; 579 int err = 0;
523 int ts = 0, tc = 0; 580 int ts = 0, tc = 0;
524 581
525 inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma); 582 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
526 if (!inbox) 583 if (IS_ERR(mailbox))
527 return -ENOMEM; 584 return PTR_ERR(mailbox);
528 585 memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
529 memset(inbox, 0, PAGE_SIZE); 586 pages = mailbox->buf;
530 587
531 for (mthca_icm_first(icm, &iter); 588 for (mthca_icm_first(icm, &iter);
532 !mthca_icm_last(&iter); 589 !mthca_icm_last(&iter);
@@ -546,19 +603,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
546 } 603 }
547 for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { 604 for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
548 if (virt != -1) { 605 if (virt != -1) {
549 *((__be64 *) (inbox + nent * 4)) = 606 pages[nent * 2] = cpu_to_be64(virt);
550 cpu_to_be64(virt);
551 virt += 1 << lg; 607 virt += 1 << lg;
552 } 608 }
553 609
554 *((__be64 *) (inbox + nent * 4 + 2)) = 610 pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
555 cpu_to_be64((mthca_icm_addr(&iter) + 611 (i << lg)) | (lg - 12));
556 (i << lg)) | (lg - 12));
557 ts += 1 << (lg - 10); 612 ts += 1 << (lg - 10);
558 ++tc; 613 ++tc;
559 614
560 if (nent == PAGE_SIZE / 16) { 615 if (nent == MTHCA_MAILBOX_SIZE / 16) {
561 err = mthca_cmd(dev, indma, nent, 0, op, 616 err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
562 CMD_TIME_CLASS_B, status); 617 CMD_TIME_CLASS_B, status);
563 if (err || *status) 618 if (err || *status)
564 goto out; 619 goto out;
@@ -568,7 +623,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
568 } 623 }
569 624
570 if (nent) 625 if (nent)
571 err = mthca_cmd(dev, indma, nent, 0, op, 626 err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
572 CMD_TIME_CLASS_B, status); 627 CMD_TIME_CLASS_B, status);
573 628
574 switch (op) { 629 switch (op) {
@@ -585,7 +640,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
585 } 640 }
586 641
587out: 642out:
588 pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma); 643 mthca_free_mailbox(dev, mailbox);
589 return err; 644 return err;
590} 645}
591 646
@@ -606,8 +661,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
606 661
607int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) 662int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
608{ 663{
664 struct mthca_mailbox *mailbox;
609 u32 *outbox; 665 u32 *outbox;
610 dma_addr_t outdma;
611 int err = 0; 666 int err = 0;
612 u8 lg; 667 u8 lg;
613 668
@@ -625,12 +680,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
625#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 680#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
626#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 681#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
627 682
628 outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma); 683 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
629 if (!outbox) { 684 if (IS_ERR(mailbox))
630 return -ENOMEM; 685 return PTR_ERR(mailbox);
631 } 686 outbox = mailbox->buf;
632 687
633 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW, 688 err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
634 CMD_TIME_CLASS_A, status); 689 CMD_TIME_CLASS_A, status);
635 690
636 if (err) 691 if (err)
@@ -681,15 +736,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
681 } 736 }
682 737
683out: 738out:
684 pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma); 739 mthca_free_mailbox(dev, mailbox);
685 return err; 740 return err;
686} 741}
687 742
688int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) 743int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
689{ 744{
745 struct mthca_mailbox *mailbox;
690 u8 info; 746 u8 info;
691 u32 *outbox; 747 u32 *outbox;
692 dma_addr_t outdma;
693 int err = 0; 748 int err = 0;
694 749
695#define ENABLE_LAM_OUT_SIZE 0x100 750#define ENABLE_LAM_OUT_SIZE 0x100
@@ -700,11 +755,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
700#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) 755#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
701#define ENABLE_LAM_INFO_ECC_MASK 0x3 756#define ENABLE_LAM_INFO_ECC_MASK 0x3
702 757
703 outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma); 758 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
704 if (!outbox) 759 if (IS_ERR(mailbox))
705 return -ENOMEM; 760 return PTR_ERR(mailbox);
761 outbox = mailbox->buf;
706 762
707 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM, 763 err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
708 CMD_TIME_CLASS_C, status); 764 CMD_TIME_CLASS_C, status);
709 765
710 if (err) 766 if (err)
@@ -733,7 +789,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
733 (unsigned long long) dev->ddr_end); 789 (unsigned long long) dev->ddr_end);
734 790
735out: 791out:
736 pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma); 792 mthca_free_mailbox(dev, mailbox);
737 return err; 793 return err;
738} 794}
739 795
@@ -744,9 +800,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
744 800
745int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) 801int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
746{ 802{
803 struct mthca_mailbox *mailbox;
747 u8 info; 804 u8 info;
748 u32 *outbox; 805 u32 *outbox;
749 dma_addr_t outdma;
750 int err = 0; 806 int err = 0;
751 807
752#define QUERY_DDR_OUT_SIZE 0x100 808#define QUERY_DDR_OUT_SIZE 0x100
@@ -757,11 +813,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
757#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) 813#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
758#define QUERY_DDR_INFO_ECC_MASK 0x3 814#define QUERY_DDR_INFO_ECC_MASK 0x3
759 815
760 outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma); 816 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
761 if (!outbox) 817 if (IS_ERR(mailbox))
762 return -ENOMEM; 818 return PTR_ERR(mailbox);
819 outbox = mailbox->buf;
763 820
764 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR, 821 err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
765 CMD_TIME_CLASS_A, status); 822 CMD_TIME_CLASS_A, status);
766 823
767 if (err) 824 if (err)
@@ -787,15 +844,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
787 (unsigned long long) dev->ddr_end); 844 (unsigned long long) dev->ddr_end);
788 845
789out: 846out:
790 pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma); 847 mthca_free_mailbox(dev, mailbox);
791 return err; 848 return err;
792} 849}
793 850
794int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, 851int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
795 struct mthca_dev_lim *dev_lim, u8 *status) 852 struct mthca_dev_lim *dev_lim, u8 *status)
796{ 853{
854 struct mthca_mailbox *mailbox;
797 u32 *outbox; 855 u32 *outbox;
798 dma_addr_t outdma;
799 u8 field; 856 u8 field;
800 u16 size; 857 u16 size;
801 int err; 858 int err;
@@ -860,11 +917,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
860#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f 917#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
861#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 918#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
862 919
863 outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma); 920 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
864 if (!outbox) 921 if (IS_ERR(mailbox))
865 return -ENOMEM; 922 return PTR_ERR(mailbox);
923 outbox = mailbox->buf;
866 924
867 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM, 925 err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
868 CMD_TIME_CLASS_A, status); 926 CMD_TIME_CLASS_A, status);
869 927
870 if (err) 928 if (err)
@@ -1020,15 +1078,15 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1020 } 1078 }
1021 1079
1022out: 1080out:
1023 pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); 1081 mthca_free_mailbox(dev, mailbox);
1024 return err; 1082 return err;
1025} 1083}
1026 1084
1027int mthca_QUERY_ADAPTER(struct mthca_dev *dev, 1085int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
1028 struct mthca_adapter *adapter, u8 *status) 1086 struct mthca_adapter *adapter, u8 *status)
1029{ 1087{
1088 struct mthca_mailbox *mailbox;
1030 u32 *outbox; 1089 u32 *outbox;
1031 dma_addr_t outdma;
1032 int err; 1090 int err;
1033 1091
1034#define QUERY_ADAPTER_OUT_SIZE 0x100 1092#define QUERY_ADAPTER_OUT_SIZE 0x100
@@ -1037,23 +1095,24 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
1037#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 1095#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
1038#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 1096#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
1039 1097
1040 outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma); 1098 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1041 if (!outbox) 1099 if (IS_ERR(mailbox))
1042 return -ENOMEM; 1100 return PTR_ERR(mailbox);
1101 outbox = mailbox->buf;
1043 1102
1044 err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER, 1103 err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
1045 CMD_TIME_CLASS_A, status); 1104 CMD_TIME_CLASS_A, status);
1046 1105
1047 if (err) 1106 if (err)
1048 goto out; 1107 goto out;
1049 1108
1050 MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); 1109 MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
1051 MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); 1110 MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
1052 MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); 1111 MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
1053 MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); 1112 MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
1054 1113
1055out: 1114out:
1056 pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); 1115 mthca_free_mailbox(dev, mailbox);
1057 return err; 1116 return err;
1058} 1117}
1059 1118
@@ -1061,8 +1120,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1061 struct mthca_init_hca_param *param, 1120 struct mthca_init_hca_param *param,
1062 u8 *status) 1121 u8 *status)
1063{ 1122{
1123 struct mthca_mailbox *mailbox;
1064 u32 *inbox; 1124 u32 *inbox;
1065 dma_addr_t indma;
1066 int err; 1125 int err;
1067 1126
1068#define INIT_HCA_IN_SIZE 0x200 1127#define INIT_HCA_IN_SIZE 0x200
@@ -1102,9 +1161,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1102#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) 1161#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
1103#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) 1162#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
1104 1163
1105 inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma); 1164 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1106 if (!inbox) 1165 if (IS_ERR(mailbox))
1107 return -ENOMEM; 1166 return PTR_ERR(mailbox);
1167 inbox = mailbox->buf;
1108 1168
1109 memset(inbox, 0, INIT_HCA_IN_SIZE); 1169 memset(inbox, 0, INIT_HCA_IN_SIZE);
1110 1170
@@ -1167,10 +1227,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1167 MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); 1227 MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
1168 } 1228 }
1169 1229
1170 err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA, 1230 err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
1171 HZ, status);
1172 1231
1173 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); 1232 mthca_free_mailbox(dev, mailbox);
1174 return err; 1233 return err;
1175} 1234}
1176 1235
@@ -1178,8 +1237,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
1178 struct mthca_init_ib_param *param, 1237 struct mthca_init_ib_param *param,
1179 int port, u8 *status) 1238 int port, u8 *status)
1180{ 1239{
1240 struct mthca_mailbox *mailbox;
1181 u32 *inbox; 1241 u32 *inbox;
1182 dma_addr_t indma;
1183 int err; 1242 int err;
1184 u32 flags; 1243 u32 flags;
1185 1244
@@ -1199,9 +1258,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
1199#define INIT_IB_NODE_GUID_OFFSET 0x18 1258#define INIT_IB_NODE_GUID_OFFSET 0x18
1200#define INIT_IB_SI_GUID_OFFSET 0x20 1259#define INIT_IB_SI_GUID_OFFSET 0x20
1201 1260
1202 inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma); 1261 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1203 if (!inbox) 1262 if (IS_ERR(mailbox))
1204 return -ENOMEM; 1263 return PTR_ERR(mailbox);
1264 inbox = mailbox->buf;
1205 1265
1206 memset(inbox, 0, INIT_IB_IN_SIZE); 1266 memset(inbox, 0, INIT_IB_IN_SIZE);
1207 1267
@@ -1221,10 +1281,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
1221 MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); 1281 MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
1222 MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); 1282 MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
1223 1283
1224 err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB, 1284 err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
1225 CMD_TIME_CLASS_A, status); 1285 CMD_TIME_CLASS_A, status);
1226 1286
1227 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); 1287 mthca_free_mailbox(dev, mailbox);
1228 return err; 1288 return err;
1229} 1289}
1230 1290
@@ -1241,8 +1301,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
1241int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, 1301int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
1242 int port, u8 *status) 1302 int port, u8 *status)
1243{ 1303{
1304 struct mthca_mailbox *mailbox;
1244 u32 *inbox; 1305 u32 *inbox;
1245 dma_addr_t indma;
1246 int err; 1306 int err;
1247 u32 flags = 0; 1307 u32 flags = 0;
1248 1308
@@ -1253,9 +1313,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
1253#define SET_IB_CAP_MASK_OFFSET 0x04 1313#define SET_IB_CAP_MASK_OFFSET 0x04
1254#define SET_IB_SI_GUID_OFFSET 0x08 1314#define SET_IB_SI_GUID_OFFSET 0x08
1255 1315
1256 inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma); 1316 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1257 if (!inbox) 1317 if (IS_ERR(mailbox))
1258 return -ENOMEM; 1318 return PTR_ERR(mailbox);
1319 inbox = mailbox->buf;
1259 1320
1260 memset(inbox, 0, SET_IB_IN_SIZE); 1321 memset(inbox, 0, SET_IB_IN_SIZE);
1261 1322
@@ -1266,10 +1327,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
1266 MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); 1327 MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
1267 MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); 1328 MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
1268 1329
1269 err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB, 1330 err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
1270 CMD_TIME_CLASS_B, status); 1331 CMD_TIME_CLASS_B, status);
1271 1332
1272 pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); 1333 mthca_free_mailbox(dev, mailbox);
1273 return err; 1334 return err;
1274} 1335}
1275 1336
@@ -1280,20 +1341,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st
1280 1341
1281int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) 1342int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
1282{ 1343{
1344 struct mthca_mailbox *mailbox;
1283 u64 *inbox; 1345 u64 *inbox;
1284 dma_addr_t indma;
1285 int err; 1346 int err;
1286 1347
1287 inbox = pci_alloc_consistent(dev->pdev, 16, &indma); 1348 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1288 if (!inbox) 1349 if (IS_ERR(mailbox))
1289 return -ENOMEM; 1350 return PTR_ERR(mailbox);
1351 inbox = mailbox->buf;
1290 1352
1291 inbox[0] = cpu_to_be64(virt); 1353 inbox[0] = cpu_to_be64(virt);
1292 inbox[1] = cpu_to_be64(dma_addr); 1354 inbox[1] = cpu_to_be64(dma_addr);
1293 1355
1294 err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status); 1356 err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
1357 CMD_TIME_CLASS_B, status);
1295 1358
1296 pci_free_consistent(dev->pdev, 16, inbox, indma); 1359 mthca_free_mailbox(dev, mailbox);
1297 1360
1298 if (!err) 1361 if (!err)
1299 mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", 1362 mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
@@ -1338,69 +1401,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
1338 return 0; 1401 return 0;
1339} 1402}
1340 1403
1341int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, 1404int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1342 int mpt_index, u8 *status) 1405 int mpt_index, u8 *status)
1343{ 1406{
1344 dma_addr_t indma; 1407 return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
1345 int err; 1408 CMD_TIME_CLASS_B, status);
1346
1347 indma = pci_map_single(dev->pdev, mpt_entry,
1348 MTHCA_MPT_ENTRY_SIZE,
1349 PCI_DMA_TODEVICE);
1350 if (pci_dma_mapping_error(indma))
1351 return -ENOMEM;
1352
1353 err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
1354 CMD_TIME_CLASS_B, status);
1355
1356 pci_unmap_single(dev->pdev, indma,
1357 MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
1358 return err;
1359} 1409}
1360 1410
1361int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, 1411int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1362 int mpt_index, u8 *status) 1412 int mpt_index, u8 *status)
1363{ 1413{
1364 dma_addr_t outdma = 0; 1414 return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
1365 int err; 1415 !mailbox, CMD_HW2SW_MPT,
1366 1416 CMD_TIME_CLASS_B, status);
1367 if (mpt_entry) {
1368 outdma = pci_map_single(dev->pdev, mpt_entry,
1369 MTHCA_MPT_ENTRY_SIZE,
1370 PCI_DMA_FROMDEVICE);
1371 if (pci_dma_mapping_error(outdma))
1372 return -ENOMEM;
1373 }
1374
1375 err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
1376 CMD_HW2SW_MPT,
1377 CMD_TIME_CLASS_B, status);
1378
1379 if (mpt_entry)
1380 pci_unmap_single(dev->pdev, outdma,
1381 MTHCA_MPT_ENTRY_SIZE,
1382 PCI_DMA_FROMDEVICE);
1383 return err;
1384} 1417}
1385 1418
1386int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, 1419int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1387 int num_mtt, u8 *status) 1420 int num_mtt, u8 *status)
1388{ 1421{
1389 dma_addr_t indma; 1422 return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
1390 int err; 1423 CMD_TIME_CLASS_B, status);
1391
1392 indma = pci_map_single(dev->pdev, mtt_entry,
1393 (num_mtt + 2) * 8,
1394 PCI_DMA_TODEVICE);
1395 if (pci_dma_mapping_error(indma))
1396 return -ENOMEM;
1397
1398 err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
1399 CMD_TIME_CLASS_B, status);
1400
1401 pci_unmap_single(dev->pdev, indma,
1402 (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
1403 return err;
1404} 1424}
1405 1425
1406int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) 1426int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
@@ -1418,92 +1438,38 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
1418 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); 1438 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
1419} 1439}
1420 1440
1421int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, 1441int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1422 int eq_num, u8 *status) 1442 int eq_num, u8 *status)
1423{ 1443{
1424 dma_addr_t indma; 1444 return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
1425 int err; 1445 CMD_TIME_CLASS_A, status);
1426
1427 indma = pci_map_single(dev->pdev, eq_context,
1428 MTHCA_EQ_CONTEXT_SIZE,
1429 PCI_DMA_TODEVICE);
1430 if (pci_dma_mapping_error(indma))
1431 return -ENOMEM;
1432
1433 err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
1434 CMD_TIME_CLASS_A, status);
1435
1436 pci_unmap_single(dev->pdev, indma,
1437 MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
1438 return err;
1439} 1446}
1440 1447
1441int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, 1448int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1442 int eq_num, u8 *status) 1449 int eq_num, u8 *status)
1443{ 1450{
1444 dma_addr_t outdma = 0; 1451 return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
1445 int err; 1452 CMD_HW2SW_EQ,
1446 1453 CMD_TIME_CLASS_A, status);
1447 outdma = pci_map_single(dev->pdev, eq_context,
1448 MTHCA_EQ_CONTEXT_SIZE,
1449 PCI_DMA_FROMDEVICE);
1450 if (pci_dma_mapping_error(outdma))
1451 return -ENOMEM;
1452
1453 err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
1454 CMD_HW2SW_EQ,
1455 CMD_TIME_CLASS_A, status);
1456
1457 pci_unmap_single(dev->pdev, outdma,
1458 MTHCA_EQ_CONTEXT_SIZE,
1459 PCI_DMA_FROMDEVICE);
1460 return err;
1461} 1454}
1462 1455
1463int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, 1456int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1464 int cq_num, u8 *status) 1457 int cq_num, u8 *status)
1465{ 1458{
1466 dma_addr_t indma; 1459 return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
1467 int err;
1468
1469 indma = pci_map_single(dev->pdev, cq_context,
1470 MTHCA_CQ_CONTEXT_SIZE,
1471 PCI_DMA_TODEVICE);
1472 if (pci_dma_mapping_error(indma))
1473 return -ENOMEM;
1474
1475 err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
1476 CMD_TIME_CLASS_A, status); 1460 CMD_TIME_CLASS_A, status);
1477
1478 pci_unmap_single(dev->pdev, indma,
1479 MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
1480 return err;
1481} 1461}
1482 1462
1483int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, 1463int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1484 int cq_num, u8 *status) 1464 int cq_num, u8 *status)
1485{ 1465{
1486 dma_addr_t outdma = 0; 1466 return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
1487 int err; 1467 CMD_HW2SW_CQ,
1488 1468 CMD_TIME_CLASS_A, status);
1489 outdma = pci_map_single(dev->pdev, cq_context,
1490 MTHCA_CQ_CONTEXT_SIZE,
1491 PCI_DMA_FROMDEVICE);
1492 if (pci_dma_mapping_error(outdma))
1493 return -ENOMEM;
1494
1495 err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
1496 CMD_HW2SW_CQ,
1497 CMD_TIME_CLASS_A, status);
1498
1499 pci_unmap_single(dev->pdev, outdma,
1500 MTHCA_CQ_CONTEXT_SIZE,
1501 PCI_DMA_FROMDEVICE);
1502 return err;
1503} 1469}
1504 1470
1505int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, 1471int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1506 int is_ee, void *qp_context, u32 optmask, 1472 int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
1507 u8 *status) 1473 u8 *status)
1508{ 1474{
1509 static const u16 op[] = { 1475 static const u16 op[] = {
@@ -1520,36 +1486,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1520 [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE 1486 [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
1521 }; 1487 };
1522 u8 op_mod = 0; 1488 u8 op_mod = 0;
1523 1489 int my_mailbox = 0;
1524 dma_addr_t indma;
1525 int err; 1490 int err;
1526 1491
1527 if (trans < 0 || trans >= ARRAY_SIZE(op)) 1492 if (trans < 0 || trans >= ARRAY_SIZE(op))
1528 return -EINVAL; 1493 return -EINVAL;
1529 1494
1530 if (trans == MTHCA_TRANS_ANY2RST) { 1495 if (trans == MTHCA_TRANS_ANY2RST) {
1531 indma = 0;
1532 op_mod = 3; /* don't write outbox, any->reset */ 1496 op_mod = 3; /* don't write outbox, any->reset */
1533 1497
1534 /* For debugging */ 1498 /* For debugging */
1535 qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, 1499 if (!mailbox) {
1536 &indma); 1500 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1537 op_mod = 2; /* write outbox, any->reset */ 1501 if (!IS_ERR(mailbox)) {
1502 my_mailbox = 1;
1503 op_mod = 2; /* write outbox, any->reset */
1504 } else
1505 mailbox = NULL;
1506 }
1538 } else { 1507 } else {
1539 indma = pci_map_single(dev->pdev, qp_context,
1540 MTHCA_QP_CONTEXT_SIZE,
1541 PCI_DMA_TODEVICE);
1542 if (pci_dma_mapping_error(indma))
1543 return -ENOMEM;
1544
1545 if (0) { 1508 if (0) {
1546 int i; 1509 int i;
1547 mthca_dbg(dev, "Dumping QP context:\n"); 1510 mthca_dbg(dev, "Dumping QP context:\n");
1548 printk(" opt param mask: %08x\n", be32_to_cpup(qp_context)); 1511 printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
1549 for (i = 0; i < 0x100 / 4; ++i) { 1512 for (i = 0; i < 0x100 / 4; ++i) {
1550 if (i % 8 == 0) 1513 if (i % 8 == 0)
1551 printk(" [%02x] ", i * 4); 1514 printk(" [%02x] ", i * 4);
1552 printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); 1515 printk(" %08x",
1516 be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
1553 if ((i + 1) % 8 == 0) 1517 if ((i + 1) % 8 == 0)
1554 printk("\n"); 1518 printk("\n");
1555 } 1519 }
@@ -1557,55 +1521,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
1557 } 1521 }
1558 1522
1559 if (trans == MTHCA_TRANS_ANY2RST) { 1523 if (trans == MTHCA_TRANS_ANY2RST) {
1560 err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num, 1524 err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
1561 op_mod, op[trans], CMD_TIME_CLASS_C, status); 1525 (!!is_ee << 24) | num, op_mod,
1526 op[trans], CMD_TIME_CLASS_C, status);
1562 1527
1563 if (0) { 1528 if (0 && mailbox) {
1564 int i; 1529 int i;
1565 mthca_dbg(dev, "Dumping QP context:\n"); 1530 mthca_dbg(dev, "Dumping QP context:\n");
1566 printk(" %08x\n", be32_to_cpup(qp_context)); 1531 printk(" %08x\n", be32_to_cpup(mailbox->buf));
1567 for (i = 0; i < 0x100 / 4; ++i) { 1532 for (i = 0; i < 0x100 / 4; ++i) {
1568 if (i % 8 == 0) 1533 if (i % 8 == 0)
1569 printk("[%02x] ", i * 4); 1534 printk("[%02x] ", i * 4);
1570 printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); 1535 printk(" %08x",
1536 be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
1571 if ((i + 1) % 8 == 0) 1537 if ((i + 1) % 8 == 0)
1572 printk("\n"); 1538 printk("\n");
1573 } 1539 }
1574 } 1540 }
1575 1541
1576 } else 1542 } else
1577 err = mthca_cmd(dev, indma, (!!is_ee << 24) | num, 1543 err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
1578 op_mod, op[trans], CMD_TIME_CLASS_C, status); 1544 op_mod, op[trans], CMD_TIME_CLASS_C, status);
1579 1545
1580 if (trans != MTHCA_TRANS_ANY2RST) 1546 if (my_mailbox)
1581 pci_unmap_single(dev->pdev, indma, 1547 mthca_free_mailbox(dev, mailbox);
1582 MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE); 1548
1583 else
1584 pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
1585 qp_context, indma);
1586 return err; 1549 return err;
1587} 1550}
1588 1551
1589int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, 1552int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
1590 void *qp_context, u8 *status) 1553 struct mthca_mailbox *mailbox, u8 *status)
1591{ 1554{
1592 dma_addr_t outdma = 0; 1555 return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
1593 int err; 1556 CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
1594
1595 outdma = pci_map_single(dev->pdev, qp_context,
1596 MTHCA_QP_CONTEXT_SIZE,
1597 PCI_DMA_FROMDEVICE);
1598 if (pci_dma_mapping_error(outdma))
1599 return -ENOMEM;
1600
1601 err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
1602 CMD_QUERY_QPEE,
1603 CMD_TIME_CLASS_A, status);
1604
1605 pci_unmap_single(dev->pdev, outdma,
1606 MTHCA_QP_CONTEXT_SIZE,
1607 PCI_DMA_FROMDEVICE);
1608 return err;
1609} 1557}
1610 1558
1611int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, 1559int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
@@ -1635,11 +1583,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
1635} 1583}
1636 1584
1637int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, 1585int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
1638 int port, struct ib_wc* in_wc, struct ib_grh* in_grh, 1586 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
1639 void *in_mad, void *response_mad, u8 *status) 1587 void *in_mad, void *response_mad, u8 *status)
1640{ 1588{
1641 void *box; 1589 struct mthca_mailbox *inmailbox, *outmailbox;
1642 dma_addr_t dma; 1590 void *inbox;
1643 int err; 1591 int err;
1644 u32 in_modifier = port; 1592 u32 in_modifier = port;
1645 u8 op_modifier = 0; 1593 u8 op_modifier = 0;
@@ -1653,11 +1601,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
1653#define MAD_IFC_PKEY_OFFSET 0x10e 1601#define MAD_IFC_PKEY_OFFSET 0x10e
1654#define MAD_IFC_GRH_OFFSET 0x140 1602#define MAD_IFC_GRH_OFFSET 0x140
1655 1603
1656 box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma); 1604 inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1657 if (!box) 1605 if (IS_ERR(inmailbox))
1658 return -ENOMEM; 1606 return PTR_ERR(inmailbox);
1607 inbox = inmailbox->buf;
1659 1608
1660 memcpy(box, in_mad, 256); 1609 outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
1610 if (IS_ERR(outmailbox)) {
1611 mthca_free_mailbox(dev, inmailbox);
1612 return PTR_ERR(outmailbox);
1613 }
1614
1615 memcpy(inbox, in_mad, 256);
1661 1616
1662 /* 1617 /*
1663 * Key check traps can't be generated unless we have in_wc to 1618 * Key check traps can't be generated unless we have in_wc to
@@ -1671,97 +1626,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
1671 if (in_wc) { 1626 if (in_wc) {
1672 u8 val; 1627 u8 val;
1673 1628
1674 memset(box + 256, 0, 256); 1629 memset(inbox + 256, 0, 256);
1675 1630
1676 MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); 1631 MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
1677 MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); 1632 MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
1678 1633
1679 val = in_wc->sl << 4; 1634 val = in_wc->sl << 4;
1680 MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET); 1635 MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET);
1681 1636
1682 val = in_wc->dlid_path_bits | 1637 val = in_wc->dlid_path_bits |
1683 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); 1638 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
1684 MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET); 1639 MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET);
1685 1640
1686 MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET); 1641 MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
1687 MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); 1642 MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
1688 1643
1689 if (in_grh) 1644 if (in_grh)
1690 memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40); 1645 memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
1691 1646
1692 op_modifier |= 0x10; 1647 op_modifier |= 0x10;
1693 1648
1694 in_modifier |= in_wc->slid << 16; 1649 in_modifier |= in_wc->slid << 16;
1695 } 1650 }
1696 1651
1697 err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier, 1652 err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
1653 in_modifier, op_modifier,
1698 CMD_MAD_IFC, CMD_TIME_CLASS_C, status); 1654 CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
1699 1655
1700 if (!err && !*status) 1656 if (!err && !*status)
1701 memcpy(response_mad, box + 512, 256); 1657 memcpy(response_mad, outmailbox->buf, 256);
1702 1658
1703 pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma); 1659 mthca_free_mailbox(dev, inmailbox);
1660 mthca_free_mailbox(dev, outmailbox);
1704 return err; 1661 return err;
1705} 1662}
1706 1663
1707int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, 1664int mthca_READ_MGM(struct mthca_dev *dev, int index,
1708 u8 *status) 1665 struct mthca_mailbox *mailbox, u8 *status)
1709{ 1666{
1710 dma_addr_t outdma = 0; 1667 return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
1711 int err; 1668 CMD_READ_MGM, CMD_TIME_CLASS_A, status);
1712
1713 outdma = pci_map_single(dev->pdev, mgm,
1714 MTHCA_MGM_ENTRY_SIZE,
1715 PCI_DMA_FROMDEVICE);
1716 if (pci_dma_mapping_error(outdma))
1717 return -ENOMEM;
1718
1719 err = mthca_cmd_box(dev, 0, outdma, index, 0,
1720 CMD_READ_MGM,
1721 CMD_TIME_CLASS_A, status);
1722
1723 pci_unmap_single(dev->pdev, outdma,
1724 MTHCA_MGM_ENTRY_SIZE,
1725 PCI_DMA_FROMDEVICE);
1726 return err;
1727} 1669}
1728 1670
1729int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, 1671int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
1730 u8 *status) 1672 struct mthca_mailbox *mailbox, u8 *status)
1731{ 1673{
1732 dma_addr_t indma; 1674 return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
1733 int err; 1675 CMD_TIME_CLASS_A, status);
1734
1735 indma = pci_map_single(dev->pdev, mgm,
1736 MTHCA_MGM_ENTRY_SIZE,
1737 PCI_DMA_TODEVICE);
1738 if (pci_dma_mapping_error(indma))
1739 return -ENOMEM;
1740
1741 err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
1742 CMD_TIME_CLASS_A, status);
1743
1744 pci_unmap_single(dev->pdev, indma,
1745 MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
1746 return err;
1747} 1676}
1748 1677
1749int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, 1678int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
1750 u8 *status) 1679 u16 *hash, u8 *status)
1751{ 1680{
1752 dma_addr_t indma;
1753 u64 imm; 1681 u64 imm;
1754 int err; 1682 int err;
1755 1683
1756 indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE); 1684 err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
1757 if (pci_dma_mapping_error(indma))
1758 return -ENOMEM;
1759
1760 err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
1761 CMD_TIME_CLASS_A, status); 1685 CMD_TIME_CLASS_A, status);
1762 *hash = imm;
1763 1686
1764 pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE); 1687 *hash = imm;
1765 return err; 1688 return err;
1766} 1689}
1767 1690
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index adf039b3c540..ed517f175dd6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -37,8 +37,7 @@
37 37
38#include <ib_verbs.h> 38#include <ib_verbs.h>
39 39
40#define MTHCA_CMD_MAILBOX_ALIGN 16UL 40#define MTHCA_MAILBOX_SIZE 4096
41#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
42 41
43enum { 42enum {
44 /* command completed successfully: */ 43 /* command completed successfully: */
@@ -112,6 +111,11 @@ enum {
112 DEV_LIM_FLAG_UD_MULTI = 1 << 21, 111 DEV_LIM_FLAG_UD_MULTI = 1 << 21,
113}; 112};
114 113
114struct mthca_mailbox {
115 dma_addr_t dma;
116 void *buf;
117};
118
115struct mthca_dev_lim { 119struct mthca_dev_lim {
116 int max_srq_sz; 120 int max_srq_sz;
117 int max_qp_sz; 121 int max_qp_sz;
@@ -235,11 +239,17 @@ struct mthca_set_ib_param {
235 u32 cap_mask; 239 u32 cap_mask;
236}; 240};
237 241
242int mthca_cmd_init(struct mthca_dev *dev);
243void mthca_cmd_cleanup(struct mthca_dev *dev);
238int mthca_cmd_use_events(struct mthca_dev *dev); 244int mthca_cmd_use_events(struct mthca_dev *dev);
239void mthca_cmd_use_polling(struct mthca_dev *dev); 245void mthca_cmd_use_polling(struct mthca_dev *dev);
240void mthca_cmd_event(struct mthca_dev *dev, u16 token, 246void mthca_cmd_event(struct mthca_dev *dev, u16 token,
241 u8 status, u64 out_param); 247 u8 status, u64 out_param);
242 248
249struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
250 unsigned int gfp_mask);
251void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
252
243int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); 253int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
244int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); 254int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
245int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); 255int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
@@ -270,41 +280,39 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
270int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); 280int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
271int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, 281int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
272 u8 *status); 282 u8 *status);
273int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, 283int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
274 int mpt_index, u8 *status); 284 int mpt_index, u8 *status);
275int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, 285int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
276 int mpt_index, u8 *status); 286 int mpt_index, u8 *status);
277int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, 287int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
278 int num_mtt, u8 *status); 288 int num_mtt, u8 *status);
279int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); 289int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
280int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, 290int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
281 int eq_num, u8 *status); 291 int eq_num, u8 *status);
282int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, 292int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
283 int eq_num, u8 *status); 293 int eq_num, u8 *status);
284int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, 294int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
285 int eq_num, u8 *status); 295 int eq_num, u8 *status);
286int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, 296int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
287 int cq_num, u8 *status); 297 int cq_num, u8 *status);
288int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, 298int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
289 int cq_num, u8 *status); 299 int cq_num, u8 *status);
290int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, 300int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
291 int is_ee, void *qp_context, u32 optmask, 301 int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
292 u8 *status); 302 u8 *status);
293int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, 303int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
294 void *qp_context, u8 *status); 304 struct mthca_mailbox *mailbox, u8 *status);
295int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, 305int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
296 u8 *status); 306 u8 *status);
297int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, 307int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
298 int port, struct ib_wc* in_wc, struct ib_grh* in_grh, 308 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
299 void *in_mad, void *response_mad, u8 *status); 309 void *in_mad, void *response_mad, u8 *status);
300int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, 310int mthca_READ_MGM(struct mthca_dev *dev, int index,
301 u8 *status); 311 struct mthca_mailbox *mailbox, u8 *status);
302int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, 312int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
303 u8 *status); 313 struct mthca_mailbox *mailbox, u8 *status);
304int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, 314int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
305 u8 *status); 315 u16 *hash, u8 *status);
306int mthca_NOP(struct mthca_dev *dev, u8 *status); 316int mthca_NOP(struct mthca_dev *dev, u8 *status);
307 317
308#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
309
310#endif /* MTHCA_CMD_H */ 318#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 2bf347b84c31..5687c3014522 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -171,6 +173,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
171 cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; 173 cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
172} 174}
173 175
176static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
177{
178 __be32 *cqe = cqe_ptr;
179
180 (void) cqe; /* avoid warning if mthca_dbg compiled away... */
181 mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
182 be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
183 be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
184 be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
185}
186
174/* 187/*
175 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index 188 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
176 * should be correct before calling update_cons_index(). 189 * should be correct before calling update_cons_index().
@@ -280,16 +293,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
280 int dbd; 293 int dbd;
281 u32 new_wqe; 294 u32 new_wqe;
282 295
283 if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { 296 if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
284 int j; 297 mthca_dbg(dev, "local QP operation err "
285 298 "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
286 mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", 299 be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
287 cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), 300 cq->cqn, cq->cons_index);
288 be32_to_cpu(cqe->wqe)); 301 dump_cqe(dev, cqe);
289
290 for (j = 0; j < 8; ++j)
291 printk(KERN_DEBUG " [%2x] %08x\n",
292 j * 4, be32_to_cpu(((u32 *) cqe)[j]));
293 } 302 }
294 303
295 /* 304 /*
@@ -364,8 +373,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
364 * If we're at the end of the WQE chain, or we've used up our 373 * If we're at the end of the WQE chain, or we've used up our
365 * doorbell count, free the CQE. Otherwise just update it for 374 * doorbell count, free the CQE. Otherwise just update it for
366 * the next poll operation. 375 * the next poll operation.
376 *
377 * This does not apply to mem-free HCAs: they don't use the
378 * doorbell count field, and so we should always free the CQE.
367 */ 379 */
368 if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) 380 if (mthca_is_memfree(dev) ||
381 !(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
369 return 0; 382 return 0;
370 383
371 cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); 384 cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
@@ -377,15 +390,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
377 return 0; 390 return 0;
378} 391}
379 392
380static void dump_cqe(struct mthca_cqe *cqe)
381{
382 int j;
383
384 for (j = 0; j < 8; ++j)
385 printk(KERN_DEBUG " [%2x] %08x\n",
386 j * 4, be32_to_cpu(((u32 *) cqe)[j]));
387}
388
389static inline int mthca_poll_one(struct mthca_dev *dev, 393static inline int mthca_poll_one(struct mthca_dev *dev,
390 struct mthca_cq *cq, 394 struct mthca_cq *cq,
391 struct mthca_qp **cur_qp, 395 struct mthca_qp **cur_qp,
@@ -414,8 +418,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
414 mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", 418 mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
415 cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), 419 cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
416 be32_to_cpu(cqe->wqe)); 420 be32_to_cpu(cqe->wqe));
417 421 dump_cqe(dev, cqe);
418 dump_cqe(cqe);
419 } 422 }
420 423
421 is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == 424 is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
@@ -638,19 +641,19 @@ static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
638 int size; 641 int size;
639 642
640 if (cq->is_direct) 643 if (cq->is_direct)
641 pci_free_consistent(dev->pdev, 644 dma_free_coherent(&dev->pdev->dev,
642 (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, 645 (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
643 cq->queue.direct.buf, 646 cq->queue.direct.buf,
644 pci_unmap_addr(&cq->queue.direct, 647 pci_unmap_addr(&cq->queue.direct,
645 mapping)); 648 mapping));
646 else { 649 else {
647 size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; 650 size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
648 for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) 651 for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
649 if (cq->queue.page_list[i].buf) 652 if (cq->queue.page_list[i].buf)
650 pci_free_consistent(dev->pdev, PAGE_SIZE, 653 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
651 cq->queue.page_list[i].buf, 654 cq->queue.page_list[i].buf,
652 pci_unmap_addr(&cq->queue.page_list[i], 655 pci_unmap_addr(&cq->queue.page_list[i],
653 mapping)); 656 mapping));
654 657
655 kfree(cq->queue.page_list); 658 kfree(cq->queue.page_list);
656 } 659 }
@@ -670,8 +673,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
670 npages = 1; 673 npages = 1;
671 shift = get_order(size) + PAGE_SHIFT; 674 shift = get_order(size) + PAGE_SHIFT;
672 675
673 cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, 676 cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
674 size, &t); 677 size, &t, GFP_KERNEL);
675 if (!cq->queue.direct.buf) 678 if (!cq->queue.direct.buf)
676 return -ENOMEM; 679 return -ENOMEM;
677 680
@@ -709,7 +712,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
709 712
710 for (i = 0; i < npages; ++i) { 713 for (i = 0; i < npages; ++i) {
711 cq->queue.page_list[i].buf = 714 cq->queue.page_list[i].buf =
712 pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); 715 dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
716 &t, GFP_KERNEL);
713 if (!cq->queue.page_list[i].buf) 717 if (!cq->queue.page_list[i].buf)
714 goto err_free; 718 goto err_free;
715 719
@@ -743,10 +747,11 @@ err_out:
743} 747}
744 748
745int mthca_init_cq(struct mthca_dev *dev, int nent, 749int mthca_init_cq(struct mthca_dev *dev, int nent,
750 struct mthca_ucontext *ctx, u32 pdn,
746 struct mthca_cq *cq) 751 struct mthca_cq *cq)
747{ 752{
748 int size = nent * MTHCA_CQ_ENTRY_SIZE; 753 int size = nent * MTHCA_CQ_ENTRY_SIZE;
749 void *mailbox = NULL; 754 struct mthca_mailbox *mailbox;
750 struct mthca_cq_context *cq_context; 755 struct mthca_cq_context *cq_context;
751 int err = -ENOMEM; 756 int err = -ENOMEM;
752 u8 status; 757 u8 status;
@@ -754,45 +759,49 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
754 759
755 might_sleep(); 760 might_sleep();
756 761
757 cq->ibcq.cqe = nent - 1; 762 cq->ibcq.cqe = nent - 1;
763 cq->is_kernel = !ctx;
758 764
759 cq->cqn = mthca_alloc(&dev->cq_table.alloc); 765 cq->cqn = mthca_alloc(&dev->cq_table.alloc);
760 if (cq->cqn == -1) 766 if (cq->cqn == -1)
761 return -ENOMEM; 767 return -ENOMEM;
762 768
763 if (mthca_is_memfree(dev)) { 769 if (mthca_is_memfree(dev)) {
764 cq->arm_sn = 1;
765
766 err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); 770 err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
767 if (err) 771 if (err)
768 goto err_out; 772 goto err_out;
769 773
770 err = -ENOMEM; 774 if (cq->is_kernel) {
775 cq->arm_sn = 1;
771 776
772 cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, 777 err = -ENOMEM;
773 cq->cqn, &cq->set_ci_db);
774 if (cq->set_ci_db_index < 0)
775 goto err_out_icm;
776 778
777 cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, 779 cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
778 cq->cqn, &cq->arm_db); 780 cq->cqn, &cq->set_ci_db);
779 if (cq->arm_db_index < 0) 781 if (cq->set_ci_db_index < 0)
780 goto err_out_ci; 782 goto err_out_icm;
783
784 cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
785 cq->cqn, &cq->arm_db);
786 if (cq->arm_db_index < 0)
787 goto err_out_ci;
788 }
781 } 789 }
782 790
783 mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, 791 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
784 GFP_KERNEL); 792 if (IS_ERR(mailbox))
785 if (!mailbox) 793 goto err_out_arm;
786 goto err_out_mailbox;
787 794
788 cq_context = MAILBOX_ALIGN(mailbox); 795 cq_context = mailbox->buf;
789 796
790 err = mthca_alloc_cq_buf(dev, size, cq); 797 if (cq->is_kernel) {
791 if (err) 798 err = mthca_alloc_cq_buf(dev, size, cq);
792 goto err_out_mailbox; 799 if (err)
800 goto err_out_mailbox;
793 801
794 for (i = 0; i < nent; ++i) 802 for (i = 0; i < nent; ++i)
795 set_cqe_hw(get_cqe(cq, i)); 803 set_cqe_hw(get_cqe(cq, i));
804 }
796 805
797 spin_lock_init(&cq->lock); 806 spin_lock_init(&cq->lock);
798 atomic_set(&cq->refcount, 1); 807 atomic_set(&cq->refcount, 1);
@@ -803,11 +812,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
803 MTHCA_CQ_STATE_DISARMED | 812 MTHCA_CQ_STATE_DISARMED |
804 MTHCA_CQ_FLAG_TR); 813 MTHCA_CQ_FLAG_TR);
805 cq_context->start = cpu_to_be64(0); 814 cq_context->start = cpu_to_be64(0);
806 cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | 815 cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
807 dev->driver_uar.index); 816 if (ctx)
817 cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
818 else
819 cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
808 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); 820 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
809 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); 821 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
810 cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); 822 cq_context->pd = cpu_to_be32(pdn);
811 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); 823 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
812 cq_context->cqn = cpu_to_be32(cq->cqn); 824 cq_context->cqn = cpu_to_be32(cq->cqn);
813 825
@@ -816,7 +828,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
816 cq_context->state_db = cpu_to_be32(cq->arm_db_index); 828 cq_context->state_db = cpu_to_be32(cq->arm_db_index);
817 } 829 }
818 830
819 err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); 831 err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
820 if (err) { 832 if (err) {
821 mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); 833 mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
822 goto err_out_free_mr; 834 goto err_out_free_mr;
@@ -840,22 +852,25 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
840 852
841 cq->cons_index = 0; 853 cq->cons_index = 0;
842 854
843 kfree(mailbox); 855 mthca_free_mailbox(dev, mailbox);
844 856
845 return 0; 857 return 0;
846 858
847err_out_free_mr: 859err_out_free_mr:
848 mthca_free_mr(dev, &cq->mr); 860 if (cq->is_kernel) {
849 mthca_free_cq_buf(dev, cq); 861 mthca_free_mr(dev, &cq->mr);
862 mthca_free_cq_buf(dev, cq);
863 }
850 864
851err_out_mailbox: 865err_out_mailbox:
852 kfree(mailbox); 866 mthca_free_mailbox(dev, mailbox);
853 867
854 if (mthca_is_memfree(dev)) 868err_out_arm:
869 if (cq->is_kernel && mthca_is_memfree(dev))
855 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); 870 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
856 871
857err_out_ci: 872err_out_ci:
858 if (mthca_is_memfree(dev)) 873 if (cq->is_kernel && mthca_is_memfree(dev))
859 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); 874 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
860 875
861err_out_icm: 876err_out_icm:
@@ -870,32 +885,31 @@ err_out:
870void mthca_free_cq(struct mthca_dev *dev, 885void mthca_free_cq(struct mthca_dev *dev,
871 struct mthca_cq *cq) 886 struct mthca_cq *cq)
872{ 887{
873 void *mailbox; 888 struct mthca_mailbox *mailbox;
874 int err; 889 int err;
875 u8 status; 890 u8 status;
876 891
877 might_sleep(); 892 might_sleep();
878 893
879 mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, 894 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
880 GFP_KERNEL); 895 if (IS_ERR(mailbox)) {
881 if (!mailbox) {
882 mthca_warn(dev, "No memory for mailbox to free CQ.\n"); 896 mthca_warn(dev, "No memory for mailbox to free CQ.\n");
883 return; 897 return;
884 } 898 }
885 899
886 err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); 900 err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
887 if (err) 901 if (err)
888 mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); 902 mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
889 else if (status) 903 else if (status)
890 mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", 904 mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
891 status);
892 905
893 if (0) { 906 if (0) {
894 u32 *ctx = MAILBOX_ALIGN(mailbox); 907 u32 *ctx = mailbox->buf;
895 int j; 908 int j;
896 909
897 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", 910 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
898 cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); 911 cq->cqn, cq->cons_index,
912 cq->is_kernel ? !!next_cqe_sw(cq) : 0);
899 for (j = 0; j < 16; ++j) 913 for (j = 0; j < 16; ++j)
900 printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); 914 printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
901 } 915 }
@@ -913,17 +927,18 @@ void mthca_free_cq(struct mthca_dev *dev,
913 atomic_dec(&cq->refcount); 927 atomic_dec(&cq->refcount);
914 wait_event(cq->wait, !atomic_read(&cq->refcount)); 928 wait_event(cq->wait, !atomic_read(&cq->refcount));
915 929
916 mthca_free_mr(dev, &cq->mr); 930 if (cq->is_kernel) {
917 mthca_free_cq_buf(dev, cq); 931 mthca_free_mr(dev, &cq->mr);
918 932 mthca_free_cq_buf(dev, cq);
919 if (mthca_is_memfree(dev)) { 933 if (mthca_is_memfree(dev)) {
920 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); 934 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
921 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); 935 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
922 mthca_table_put(dev, dev->cq_table.table, cq->cqn); 936 }
923 } 937 }
924 938
939 mthca_table_put(dev, dev->cq_table.table, cq->cqn);
925 mthca_free(&dev->cq_table.alloc, cq->cqn); 940 mthca_free(&dev->cq_table.alloc, cq->cqn);
926 kfree(mailbox); 941 mthca_free_mailbox(dev, mailbox);
927} 942}
928 943
929int __devinit mthca_init_cq_table(struct mthca_dev *dev) 944int __devinit mthca_init_cq_table(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e3d79e267dc9..5ecdd2eeeb0f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -46,8 +48,8 @@
46 48
47#define DRV_NAME "ib_mthca" 49#define DRV_NAME "ib_mthca"
48#define PFX DRV_NAME ": " 50#define PFX DRV_NAME ": "
49#define DRV_VERSION "0.06-pre" 51#define DRV_VERSION "0.06"
50#define DRV_RELDATE "November 8, 2004" 52#define DRV_RELDATE "June 23, 2005"
51 53
52enum { 54enum {
53 MTHCA_FLAG_DDR_HIDDEN = 1 << 1, 55 MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -98,6 +100,7 @@ enum {
98}; 100};
99 101
100struct mthca_cmd { 102struct mthca_cmd {
103 struct pci_pool *pool;
101 int use_events; 104 int use_events;
102 struct semaphore hcr_sem; 105 struct semaphore hcr_sem;
103 struct semaphore poll_sem; 106 struct semaphore poll_sem;
@@ -376,9 +379,15 @@ void mthca_unregister_device(struct mthca_dev *dev);
376int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); 379int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
377void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); 380void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
378 381
379int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); 382int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
380void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); 383void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
381 384
385struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
386void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
387int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
388 int start_index, u64 *buffer_list, int list_len);
389int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
390 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
382int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, 391int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
383 u32 access, struct mthca_mr *mr); 392 u32 access, struct mthca_mr *mr);
384int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, 393int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
@@ -405,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
405int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); 414int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
406int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); 415int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
407int mthca_init_cq(struct mthca_dev *dev, int nent, 416int mthca_init_cq(struct mthca_dev *dev, int nent,
417 struct mthca_ucontext *ctx, u32 pdn,
408 struct mthca_cq *cq); 418 struct mthca_cq *cq);
409void mthca_free_cq(struct mthca_dev *dev, 419void mthca_free_cq(struct mthca_dev *dev,
410 struct mthca_cq *cq); 420 struct mthca_cq *cq);
@@ -430,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
430 struct mthca_cq *recv_cq, 440 struct mthca_cq *recv_cq,
431 enum ib_qp_type type, 441 enum ib_qp_type type,
432 enum ib_sig_type send_policy, 442 enum ib_sig_type send_policy,
443 struct ib_qp_cap *cap,
433 struct mthca_qp *qp); 444 struct mthca_qp *qp);
434int mthca_alloc_sqp(struct mthca_dev *dev, 445int mthca_alloc_sqp(struct mthca_dev *dev,
435 struct mthca_pd *pd, 446 struct mthca_pd *pd,
436 struct mthca_cq *send_cq, 447 struct mthca_cq *send_cq,
437 struct mthca_cq *recv_cq, 448 struct mthca_cq *recv_cq,
438 enum ib_sig_type send_policy, 449 enum ib_sig_type send_policy,
450 struct ib_qp_cap *cap,
439 int qpn, 451 int qpn,
440 int port, 452 int port,
441 struct mthca_sqp *sqp); 453 struct mthca_sqp *sqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index 821039a49049..535fad7710fb 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index f46d615d396f..cbcf2b4722e4 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -469,7 +469,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
469 PAGE_SIZE; 469 PAGE_SIZE;
470 u64 *dma_list = NULL; 470 u64 *dma_list = NULL;
471 dma_addr_t t; 471 dma_addr_t t;
472 void *mailbox = NULL; 472 struct mthca_mailbox *mailbox;
473 struct mthca_eq_context *eq_context; 473 struct mthca_eq_context *eq_context;
474 int err = -ENOMEM; 474 int err = -ENOMEM;
475 int i; 475 int i;
@@ -494,17 +494,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
494 if (!dma_list) 494 if (!dma_list)
495 goto err_out_free; 495 goto err_out_free;
496 496
497 mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA, 497 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
498 GFP_KERNEL); 498 if (IS_ERR(mailbox))
499 if (!mailbox)
500 goto err_out_free; 499 goto err_out_free;
501 eq_context = MAILBOX_ALIGN(mailbox); 500 eq_context = mailbox->buf;
502 501
503 for (i = 0; i < npages; ++i) { 502 for (i = 0; i < npages; ++i) {
504 eq->page_list[i].buf = pci_alloc_consistent(dev->pdev, 503 eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
505 PAGE_SIZE, &t); 504 PAGE_SIZE, &t, GFP_KERNEL);
506 if (!eq->page_list[i].buf) 505 if (!eq->page_list[i].buf)
507 goto err_out_free; 506 goto err_out_free_pages;
508 507
509 dma_list[i] = t; 508 dma_list[i] = t;
510 pci_unmap_addr_set(&eq->page_list[i], mapping, t); 509 pci_unmap_addr_set(&eq->page_list[i], mapping, t);
@@ -517,7 +516,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
517 516
518 eq->eqn = mthca_alloc(&dev->eq_table.alloc); 517 eq->eqn = mthca_alloc(&dev->eq_table.alloc);
519 if (eq->eqn == -1) 518 if (eq->eqn == -1)
520 goto err_out_free; 519 goto err_out_free_pages;
521 520
522 err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, 521 err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
523 dma_list, PAGE_SHIFT, npages, 522 dma_list, PAGE_SHIFT, npages,
@@ -548,7 +547,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
548 eq_context->intr = intr; 547 eq_context->intr = intr;
549 eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); 548 eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
550 549
551 err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status); 550 err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
552 if (err) { 551 if (err) {
553 mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); 552 mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
554 goto err_out_free_mr; 553 goto err_out_free_mr;
@@ -561,7 +560,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
561 } 560 }
562 561
563 kfree(dma_list); 562 kfree(dma_list);
564 kfree(mailbox); 563 mthca_free_mailbox(dev, mailbox);
565 564
566 eq->eqn_mask = swab32(1 << eq->eqn); 565 eq->eqn_mask = swab32(1 << eq->eqn);
567 eq->cons_index = 0; 566 eq->cons_index = 0;
@@ -579,17 +578,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
579 err_out_free_eq: 578 err_out_free_eq:
580 mthca_free(&dev->eq_table.alloc, eq->eqn); 579 mthca_free(&dev->eq_table.alloc, eq->eqn);
581 580
582 err_out_free: 581 err_out_free_pages:
583 for (i = 0; i < npages; ++i) 582 for (i = 0; i < npages; ++i)
584 if (eq->page_list[i].buf) 583 if (eq->page_list[i].buf)
585 pci_free_consistent(dev->pdev, PAGE_SIZE, 584 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
586 eq->page_list[i].buf, 585 eq->page_list[i].buf,
587 pci_unmap_addr(&eq->page_list[i], 586 pci_unmap_addr(&eq->page_list[i],
588 mapping)); 587 mapping));
588
589 mthca_free_mailbox(dev, mailbox);
589 590
591 err_out_free:
590 kfree(eq->page_list); 592 kfree(eq->page_list);
591 kfree(dma_list); 593 kfree(dma_list);
592 kfree(mailbox);
593 594
594 err_out: 595 err_out:
595 return err; 596 return err;
@@ -598,25 +599,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
598static void mthca_free_eq(struct mthca_dev *dev, 599static void mthca_free_eq(struct mthca_dev *dev,
599 struct mthca_eq *eq) 600 struct mthca_eq *eq)
600{ 601{
601 void *mailbox = NULL; 602 struct mthca_mailbox *mailbox;
602 int err; 603 int err;
603 u8 status; 604 u8 status;
604 int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / 605 int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
605 PAGE_SIZE; 606 PAGE_SIZE;
606 int i; 607 int i;
607 608
608 mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA, 609 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
609 GFP_KERNEL); 610 if (IS_ERR(mailbox))
610 if (!mailbox)
611 return; 611 return;
612 612
613 err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox), 613 err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
614 eq->eqn, &status);
615 if (err) 614 if (err)
616 mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); 615 mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
617 if (status) 616 if (status)
618 mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", 617 mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
619 status);
620 618
621 dev->eq_table.arm_mask &= ~eq->eqn_mask; 619 dev->eq_table.arm_mask &= ~eq->eqn_mask;
622 620
@@ -625,7 +623,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
625 for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { 623 for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
626 if (i % 4 == 0) 624 if (i % 4 == 0)
627 printk("[%02x] ", i * 4); 625 printk("[%02x] ", i * 4);
628 printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4)); 626 printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
629 if ((i + 1) % 4 == 0) 627 if ((i + 1) % 4 == 0)
630 printk("\n"); 628 printk("\n");
631 } 629 }
@@ -638,7 +636,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
638 pci_unmap_addr(&eq->page_list[i], mapping)); 636 pci_unmap_addr(&eq->page_list[i], mapping));
639 637
640 kfree(eq->page_list); 638 kfree(eq->page_list);
641 kfree(mailbox); 639 mthca_free_mailbox(dev, mailbox);
642} 640}
643 641
644static void mthca_free_irqs(struct mthca_dev *dev) 642static void mthca_free_irqs(struct mthca_dev *dev)
@@ -709,8 +707,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
709 if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & 707 if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
710 dev->fw.arbel.eq_arm_base) + 4, 4, 708 dev->fw.arbel.eq_arm_base) + 4, 4,
711 &dev->eq_regs.arbel.eq_arm)) { 709 &dev->eq_regs.arbel.eq_arm)) {
712 mthca_err(dev, "Couldn't map interrupt clear register, " 710 mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
713 "aborting.\n");
714 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & 711 mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
715 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, 712 dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
716 dev->clr_base); 713 dev->clr_base);
@@ -721,8 +718,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
721 dev->fw.arbel.eq_set_ci_base, 718 dev->fw.arbel.eq_set_ci_base,
722 MTHCA_EQ_SET_CI_SIZE, 719 MTHCA_EQ_SET_CI_SIZE,
723 &dev->eq_regs.arbel.eq_set_ci_base)) { 720 &dev->eq_regs.arbel.eq_set_ci_base)) {
724 mthca_err(dev, "Couldn't map interrupt clear register, " 721 mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
725 "aborting.\n");
726 mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & 722 mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
727 dev->fw.arbel.eq_arm_base) + 4, 4, 723 dev->fw.arbel.eq_arm_base) + 4, 4,
728 dev->eq_regs.arbel.eq_arm); 724 dev->eq_regs.arbel.eq_arm);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index d40590356df8..2ef916859e17 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
69#endif /* CONFIG_PCI_MSI */ 70#endif /* CONFIG_PCI_MSI */
70 71
71static const char mthca_version[] __devinitdata = 72static const char mthca_version[] __devinitdata =
72 "ib_mthca: Mellanox InfiniBand HCA driver v" 73 DRV_NAME ": Mellanox InfiniBand HCA driver v"
73 DRV_VERSION " (" DRV_RELDATE ")\n"; 74 DRV_VERSION " (" DRV_RELDATE ")\n";
74 75
75static struct mthca_profile default_profile = { 76static struct mthca_profile default_profile = {
@@ -664,7 +665,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
664 goto err_pd_table_free; 665 goto err_pd_table_free;
665 } 666 }
666 667
667 err = mthca_pd_alloc(dev, &dev->driver_pd); 668 err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
668 if (err) { 669 if (err) {
669 mthca_err(dev, "Failed to create driver PD, " 670 mthca_err(dev, "Failed to create driver PD, "
670 "aborting.\n"); 671 "aborting.\n");
@@ -927,13 +928,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
927 */ 928 */
928 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 929 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
929 pci_resource_len(pdev, 0) != 1 << 20) { 930 pci_resource_len(pdev, 0) != 1 << 20) {
930 dev_err(&pdev->dev, "Missing DCS, aborting."); 931 dev_err(&pdev->dev, "Missing DCS, aborting.\n");
931 err = -ENODEV; 932 err = -ENODEV;
932 goto err_disable_pdev; 933 goto err_disable_pdev;
933 } 934 }
934 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) || 935 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
935 pci_resource_len(pdev, 2) != 1 << 23) { 936 pci_resource_len(pdev, 2) != 1 << 23) {
936 dev_err(&pdev->dev, "Missing UAR, aborting."); 937 dev_err(&pdev->dev, "Missing UAR, aborting.\n");
937 err = -ENODEV; 938 err = -ENODEV;
938 goto err_disable_pdev; 939 goto err_disable_pdev;
939 } 940 }
@@ -1004,25 +1005,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1004 !pci_enable_msi(pdev)) 1005 !pci_enable_msi(pdev))
1005 mdev->mthca_flags |= MTHCA_FLAG_MSI; 1006 mdev->mthca_flags |= MTHCA_FLAG_MSI;
1006 1007
1007 sema_init(&mdev->cmd.hcr_sem, 1); 1008 if (mthca_cmd_init(mdev)) {
1008 sema_init(&mdev->cmd.poll_sem, 1); 1009 mthca_err(mdev, "Failed to init command interface, aborting.\n");
1009 mdev->cmd.use_events = 0;
1010
1011 mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
1012 if (!mdev->hcr) {
1013 mthca_err(mdev, "Couldn't map command register, "
1014 "aborting.\n");
1015 err = -ENOMEM;
1016 goto err_free_dev; 1010 goto err_free_dev;
1017 } 1011 }
1018 1012
1019 err = mthca_tune_pci(mdev); 1013 err = mthca_tune_pci(mdev);
1020 if (err) 1014 if (err)
1021 goto err_iounmap; 1015 goto err_cmd;
1022 1016
1023 err = mthca_init_hca(mdev); 1017 err = mthca_init_hca(mdev);
1024 if (err) 1018 if (err)
1025 goto err_iounmap; 1019 goto err_cmd;
1026 1020
1027 if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { 1021 if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
1028 mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n", 1022 mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
@@ -1070,8 +1064,8 @@ err_cleanup:
1070err_close: 1064err_close:
1071 mthca_close_hca(mdev); 1065 mthca_close_hca(mdev);
1072 1066
1073err_iounmap: 1067err_cmd:
1074 iounmap(mdev->hcr); 1068 mthca_cmd_cleanup(mdev);
1075 1069
1076err_free_dev: 1070err_free_dev:
1077 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1071 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -1118,10 +1112,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
1118 iounmap(mdev->kar); 1112 iounmap(mdev->kar);
1119 mthca_uar_free(mdev, &mdev->driver_uar); 1113 mthca_uar_free(mdev, &mdev->driver_uar);
1120 mthca_cleanup_uar_table(mdev); 1114 mthca_cleanup_uar_table(mdev);
1121
1122 mthca_close_hca(mdev); 1115 mthca_close_hca(mdev);
1123 1116 mthca_cmd_cleanup(mdev);
1124 iounmap(mdev->hcr);
1125 1117
1126 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1118 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1127 pci_disable_msix(pdev); 1119 pci_disable_msix(pdev);
@@ -1163,7 +1155,7 @@ static struct pci_device_id mthca_pci_table[] = {
1163MODULE_DEVICE_TABLE(pci, mthca_pci_table); 1155MODULE_DEVICE_TABLE(pci, mthca_pci_table);
1164 1156
1165static struct pci_driver mthca_driver = { 1157static struct pci_driver mthca_driver = {
1166 .name = "ib_mthca", 1158 .name = DRV_NAME,
1167 .id_table = mthca_pci_table, 1159 .id_table = mthca_pci_table,
1168 .probe = mthca_init_one, 1160 .probe = mthca_init_one,
1169 .remove = __devexit_p(mthca_remove_one) 1161 .remove = __devexit_p(mthca_remove_one)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 70a6553a588e..5be7d949dbf6 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -66,22 +66,23 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */
66 * entry in hash chain and *mgm holds end of hash chain. 66 * entry in hash chain and *mgm holds end of hash chain.
67 */ 67 */
68static int find_mgm(struct mthca_dev *dev, 68static int find_mgm(struct mthca_dev *dev,
69 u8 *gid, struct mthca_mgm *mgm, 69 u8 *gid, struct mthca_mailbox *mgm_mailbox,
70 u16 *hash, int *prev, int *index) 70 u16 *hash, int *prev, int *index)
71{ 71{
72 void *mailbox; 72 struct mthca_mailbox *mailbox;
73 struct mthca_mgm *mgm = mgm_mailbox->buf;
73 u8 *mgid; 74 u8 *mgid;
74 int err; 75 int err;
75 u8 status; 76 u8 status;
76 77
77 mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); 78 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
78 if (!mailbox) 79 if (IS_ERR(mailbox))
79 return -ENOMEM; 80 return -ENOMEM;
80 mgid = MAILBOX_ALIGN(mailbox); 81 mgid = mailbox->buf;
81 82
82 memcpy(mgid, gid, 16); 83 memcpy(mgid, gid, 16);
83 84
84 err = mthca_MGID_HASH(dev, mgid, hash, &status); 85 err = mthca_MGID_HASH(dev, mailbox, hash, &status);
85 if (err) 86 if (err)
86 goto out; 87 goto out;
87 if (status) { 88 if (status) {
@@ -103,7 +104,7 @@ static int find_mgm(struct mthca_dev *dev,
103 *prev = -1; 104 *prev = -1;
104 105
105 do { 106 do {
106 err = mthca_READ_MGM(dev, *index, mgm, &status); 107 err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
107 if (err) 108 if (err)
108 goto out; 109 goto out;
109 if (status) { 110 if (status) {
@@ -129,14 +130,14 @@ static int find_mgm(struct mthca_dev *dev,
129 *index = -1; 130 *index = -1;
130 131
131 out: 132 out:
132 kfree(mailbox); 133 mthca_free_mailbox(dev, mailbox);
133 return err; 134 return err;
134} 135}
135 136
136int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 137int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
137{ 138{
138 struct mthca_dev *dev = to_mdev(ibqp->device); 139 struct mthca_dev *dev = to_mdev(ibqp->device);
139 void *mailbox; 140 struct mthca_mailbox *mailbox;
140 struct mthca_mgm *mgm; 141 struct mthca_mgm *mgm;
141 u16 hash; 142 u16 hash;
142 int index, prev; 143 int index, prev;
@@ -145,15 +146,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
145 int err; 146 int err;
146 u8 status; 147 u8 status;
147 148
148 mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); 149 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
149 if (!mailbox) 150 if (IS_ERR(mailbox))
150 return -ENOMEM; 151 return PTR_ERR(mailbox);
151 mgm = MAILBOX_ALIGN(mailbox); 152 mgm = mailbox->buf;
152 153
153 if (down_interruptible(&dev->mcg_table.sem)) 154 if (down_interruptible(&dev->mcg_table.sem))
154 return -EINTR; 155 return -EINTR;
155 156
156 err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); 157 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
157 if (err) 158 if (err)
158 goto out; 159 goto out;
159 160
@@ -170,7 +171,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
170 goto out; 171 goto out;
171 } 172 }
172 173
173 err = mthca_READ_MGM(dev, index, mgm, &status); 174 err = mthca_READ_MGM(dev, index, mailbox, &status);
174 if (err) 175 if (err)
175 goto out; 176 goto out;
176 if (status) { 177 if (status) {
@@ -195,7 +196,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
195 goto out; 196 goto out;
196 } 197 }
197 198
198 err = mthca_WRITE_MGM(dev, index, mgm, &status); 199 err = mthca_WRITE_MGM(dev, index, mailbox, &status);
199 if (err) 200 if (err)
200 goto out; 201 goto out;
201 if (status) { 202 if (status) {
@@ -206,7 +207,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
206 if (!link) 207 if (!link)
207 goto out; 208 goto out;
208 209
209 err = mthca_READ_MGM(dev, prev, mgm, &status); 210 err = mthca_READ_MGM(dev, prev, mailbox, &status);
210 if (err) 211 if (err)
211 goto out; 212 goto out;
212 if (status) { 213 if (status) {
@@ -217,7 +218,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
217 218
218 mgm->next_gid_index = cpu_to_be32(index << 5); 219 mgm->next_gid_index = cpu_to_be32(index << 5);
219 220
220 err = mthca_WRITE_MGM(dev, prev, mgm, &status); 221 err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
221 if (err) 222 if (err)
222 goto out; 223 goto out;
223 if (status) { 224 if (status) {
@@ -227,14 +228,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
227 228
228 out: 229 out:
229 up(&dev->mcg_table.sem); 230 up(&dev->mcg_table.sem);
230 kfree(mailbox); 231 mthca_free_mailbox(dev, mailbox);
231 return err; 232 return err;
232} 233}
233 234
234int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 235int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
235{ 236{
236 struct mthca_dev *dev = to_mdev(ibqp->device); 237 struct mthca_dev *dev = to_mdev(ibqp->device);
237 void *mailbox; 238 struct mthca_mailbox *mailbox;
238 struct mthca_mgm *mgm; 239 struct mthca_mgm *mgm;
239 u16 hash; 240 u16 hash;
240 int prev, index; 241 int prev, index;
@@ -242,15 +243,15 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
242 int err; 243 int err;
243 u8 status; 244 u8 status;
244 245
245 mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); 246 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
246 if (!mailbox) 247 if (IS_ERR(mailbox))
247 return -ENOMEM; 248 return PTR_ERR(mailbox);
248 mgm = MAILBOX_ALIGN(mailbox); 249 mgm = mailbox->buf;
249 250
250 if (down_interruptible(&dev->mcg_table.sem)) 251 if (down_interruptible(&dev->mcg_table.sem))
251 return -EINTR; 252 return -EINTR;
252 253
253 err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); 254 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
254 if (err) 255 if (err)
255 goto out; 256 goto out;
256 257
@@ -285,7 +286,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
285 mgm->qp[loc] = mgm->qp[i - 1]; 286 mgm->qp[loc] = mgm->qp[i - 1];
286 mgm->qp[i - 1] = 0; 287 mgm->qp[i - 1] = 0;
287 288
288 err = mthca_WRITE_MGM(dev, index, mgm, &status); 289 err = mthca_WRITE_MGM(dev, index, mailbox, &status);
289 if (err) 290 if (err)
290 goto out; 291 goto out;
291 if (status) { 292 if (status) {
@@ -304,7 +305,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
304 if (be32_to_cpu(mgm->next_gid_index) >> 5) { 305 if (be32_to_cpu(mgm->next_gid_index) >> 5) {
305 err = mthca_READ_MGM(dev, 306 err = mthca_READ_MGM(dev,
306 be32_to_cpu(mgm->next_gid_index) >> 5, 307 be32_to_cpu(mgm->next_gid_index) >> 5,
307 mgm, &status); 308 mailbox, &status);
308 if (err) 309 if (err)
309 goto out; 310 goto out;
310 if (status) { 311 if (status) {
@@ -316,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
316 } else 317 } else
317 memset(mgm->gid, 0, 16); 318 memset(mgm->gid, 0, 16);
318 319
319 err = mthca_WRITE_MGM(dev, index, mgm, &status); 320 err = mthca_WRITE_MGM(dev, index, mailbox, &status);
320 if (err) 321 if (err)
321 goto out; 322 goto out;
322 if (status) { 323 if (status) {
@@ -327,7 +328,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
327 } else { 328 } else {
328 /* Remove entry from AMGM */ 329 /* Remove entry from AMGM */
329 index = be32_to_cpu(mgm->next_gid_index) >> 5; 330 index = be32_to_cpu(mgm->next_gid_index) >> 5;
330 err = mthca_READ_MGM(dev, prev, mgm, &status); 331 err = mthca_READ_MGM(dev, prev, mailbox, &status);
331 if (err) 332 if (err)
332 goto out; 333 goto out;
333 if (status) { 334 if (status) {
@@ -338,7 +339,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
338 339
339 mgm->next_gid_index = cpu_to_be32(index << 5); 340 mgm->next_gid_index = cpu_to_be32(index << 5);
340 341
341 err = mthca_WRITE_MGM(dev, prev, mgm, &status); 342 err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
342 if (err) 343 if (err)
343 goto out; 344 goto out;
344 if (status) { 345 if (status) {
@@ -350,7 +351,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
350 351
351 out: 352 out:
352 up(&dev->mcg_table.sem); 353 up(&dev->mcg_table.sem);
353 kfree(mailbox); 354 mthca_free_mailbox(dev, mailbox);
354 return err; 355 return err;
355} 356}
356 357
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 637b30e35592..2a8646150355 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +48,15 @@ enum {
47 MTHCA_TABLE_CHUNK_SIZE = 1 << 18 48 MTHCA_TABLE_CHUNK_SIZE = 1 << 18
48}; 49};
49 50
51struct mthca_user_db_table {
52 struct semaphore mutex;
53 struct {
54 u64 uvirt;
55 struct scatterlist mem;
56 int refcount;
57 } page[0];
58};
59
50void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) 60void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
51{ 61{
52 struct mthca_icm_chunk *chunk, *tmp; 62 struct mthca_icm_chunk *chunk, *tmp;
@@ -179,9 +189,14 @@ out:
179 189
180void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) 190void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
181{ 191{
182 int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; 192 int i;
183 u8 status; 193 u8 status;
184 194
195 if (!mthca_is_memfree(dev))
196 return;
197
198 i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
199
185 down(&table->mutex); 200 down(&table->mutex);
186 201
187 if (--table->icm[i]->refcount == 0) { 202 if (--table->icm[i]->refcount == 0) {
@@ -256,6 +271,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
256{ 271{
257 int i; 272 int i;
258 273
274 if (!mthca_is_memfree(dev))
275 return;
276
259 for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) 277 for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
260 mthca_table_put(dev, table, i); 278 mthca_table_put(dev, table, i);
261} 279}
@@ -336,13 +354,133 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
336 kfree(table); 354 kfree(table);
337} 355}
338 356
339static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) 357static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
340{ 358{
341 return dev->uar_table.uarc_base + 359 return dev->uar_table.uarc_base +
342 dev->driver_uar.index * dev->uar_table.uarc_size + 360 uar->index * dev->uar_table.uarc_size +
343 page * 4096; 361 page * 4096;
344} 362}
345 363
364int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
365 struct mthca_user_db_table *db_tab, int index, u64 uaddr)
366{
367 int ret = 0;
368 u8 status;
369 int i;
370
371 if (!mthca_is_memfree(dev))
372 return 0;
373
374 if (index < 0 || index > dev->uar_table.uarc_size / 8)
375 return -EINVAL;
376
377 down(&db_tab->mutex);
378
379 i = index / MTHCA_DB_REC_PER_PAGE;
380
381 if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
382 (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
383 (uaddr & 4095)) {
384 ret = -EINVAL;
385 goto out;
386 }
387
388 if (db_tab->page[i].refcount) {
389 ++db_tab->page[i].refcount;
390 goto out;
391 }
392
393 ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
394 &db_tab->page[i].mem.page, NULL);
395 if (ret < 0)
396 goto out;
397
398 db_tab->page[i].mem.length = 4096;
399 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
400
401 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
402 if (ret < 0) {
403 put_page(db_tab->page[i].mem.page);
404 goto out;
405 }
406
407 ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
408 mthca_uarc_virt(dev, uar, i), &status);
409 if (!ret && status)
410 ret = -EINVAL;
411 if (ret) {
412 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
413 put_page(db_tab->page[i].mem.page);
414 goto out;
415 }
416
417 db_tab->page[i].uvirt = uaddr;
418 db_tab->page[i].refcount = 1;
419
420out:
421 up(&db_tab->mutex);
422 return ret;
423}
424
425void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
426 struct mthca_user_db_table *db_tab, int index)
427{
428 if (!mthca_is_memfree(dev))
429 return;
430
431 /*
432 * To make our bookkeeping simpler, we don't unmap DB
433 * pages until we clean up the whole db table.
434 */
435
436 down(&db_tab->mutex);
437
438 --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
439
440 up(&db_tab->mutex);
441}
442
443struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
444{
445 struct mthca_user_db_table *db_tab;
446 int npages;
447 int i;
448
449 if (!mthca_is_memfree(dev))
450 return NULL;
451
452 npages = dev->uar_table.uarc_size / 4096;
453 db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
454 if (!db_tab)
455 return ERR_PTR(-ENOMEM);
456
457 init_MUTEX(&db_tab->mutex);
458 for (i = 0; i < npages; ++i) {
459 db_tab->page[i].refcount = 0;
460 db_tab->page[i].uvirt = 0;
461 }
462
463 return db_tab;
464}
465
466void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
467 struct mthca_user_db_table *db_tab)
468{
469 int i;
470 u8 status;
471
472 if (!mthca_is_memfree(dev))
473 return;
474
475 for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
476 if (db_tab->page[i].uvirt) {
477 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
478 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
479 put_page(db_tab->page[i].mem.page);
480 }
481 }
482}
483
346int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) 484int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
347{ 485{
348 int group; 486 int group;
@@ -399,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
399 } 537 }
400 memset(page->db_rec, 0, 4096); 538 memset(page->db_rec, 0, 4096);
401 539
402 ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); 540 ret = mthca_MAP_ICM_page(dev, page->mapping,
541 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
403 if (!ret && status) 542 if (!ret && status)
404 ret = -EINVAL; 543 ret = -EINVAL;
405 if (ret) { 544 if (ret) {
@@ -453,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
453 592
454 if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && 593 if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
455 i >= dev->db_tab->max_group1 - 1) { 594 i >= dev->db_tab->max_group1 - 1) {
456 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); 595 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
457 596
458 dma_free_coherent(&dev->pdev->dev, 4096, 597 dma_free_coherent(&dev->pdev->dev, 4096,
459 page->db_rec, page->mapping); 598 page->db_rec, page->mapping);
@@ -522,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
522 if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) 661 if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
523 mthca_warn(dev, "Kernel UARC page %d not empty\n", i); 662 mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
524 663
525 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); 664 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
526 665
527 dma_free_coherent(&dev->pdev->dev, 4096, 666 dma_free_coherent(&dev->pdev->dev, 4096,
528 dev->db_tab->page[i].db_rec, 667 dev->db_tab->page[i].db_rec,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index fe7be2a6bc4a..4761d844cb5f 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -148,7 +149,7 @@ struct mthca_db_table {
148 struct semaphore mutex; 149 struct semaphore mutex;
149}; 150};
150 151
151enum { 152enum mthca_db_type {
152 MTHCA_DB_TYPE_INVALID = 0x0, 153 MTHCA_DB_TYPE_INVALID = 0x0,
153 MTHCA_DB_TYPE_CQ_SET_CI = 0x1, 154 MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
154 MTHCA_DB_TYPE_CQ_ARM = 0x2, 155 MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,6 +159,17 @@ enum {
158 MTHCA_DB_TYPE_GROUP_SEP = 0x7 159 MTHCA_DB_TYPE_GROUP_SEP = 0x7
159}; 160};
160 161
162struct mthca_user_db_table;
163struct mthca_uar;
164
165int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
166 struct mthca_user_db_table *db_tab, int index, u64 uaddr);
167void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
168 struct mthca_user_db_table *db_tab, int index);
169struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
170void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
171 struct mthca_user_db_table *db_tab);
172
161int mthca_init_db_tab(struct mthca_dev *dev); 173int mthca_init_db_tab(struct mthca_dev *dev);
162void mthca_cleanup_db_tab(struct mthca_dev *dev); 174void mthca_cleanup_db_tab(struct mthca_dev *dev);
163int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); 175int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8960fc2306be..cbe50feaf680 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -40,6 +40,12 @@
40#include "mthca_cmd.h" 40#include "mthca_cmd.h"
41#include "mthca_memfree.h" 41#include "mthca_memfree.h"
42 42
43struct mthca_mtt {
44 struct mthca_buddy *buddy;
45 int order;
46 u32 first_seg;
47};
48
43/* 49/*
44 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. 50 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
45 */ 51 */
@@ -173,8 +179,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
173 kfree(buddy->bits); 179 kfree(buddy->bits);
174} 180}
175 181
176static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, 182static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
177 struct mthca_buddy *buddy) 183 struct mthca_buddy *buddy)
178{ 184{
179 u32 seg = mthca_buddy_alloc(buddy, order); 185 u32 seg = mthca_buddy_alloc(buddy, order);
180 186
@@ -191,14 +197,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
191 return seg; 197 return seg;
192} 198}
193 199
194static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order, 200static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
195 struct mthca_buddy* buddy) 201 struct mthca_buddy *buddy)
196{ 202{
197 mthca_buddy_free(buddy, seg, order); 203 struct mthca_mtt *mtt;
204 int i;
198 205
199 if (mthca_is_memfree(dev)) 206 if (size <= 0)
200 mthca_table_put_range(dev, dev->mr_table.mtt_table, seg, 207 return ERR_PTR(-EINVAL);
201 seg + (1 << order) - 1); 208
209 mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
210 if (!mtt)
211 return ERR_PTR(-ENOMEM);
212
213 mtt->buddy = buddy;
214 mtt->order = 0;
215 for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
216 ++mtt->order;
217
218 mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
219 if (mtt->first_seg == -1) {
220 kfree(mtt);
221 return ERR_PTR(-ENOMEM);
222 }
223
224 return mtt;
225}
226
227struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
228{
229 return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
230}
231
232void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
233{
234 if (!mtt)
235 return;
236
237 mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
238
239 mthca_table_put_range(dev, dev->mr_table.mtt_table,
240 mtt->first_seg,
241 mtt->first_seg + (1 << mtt->order) - 1);
242
243 kfree(mtt);
244}
245
246int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
247 int start_index, u64 *buffer_list, int list_len)
248{
249 struct mthca_mailbox *mailbox;
250 u64 *mtt_entry;
251 int err = 0;
252 u8 status;
253 int i;
254
255 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
256 if (IS_ERR(mailbox))
257 return PTR_ERR(mailbox);
258 mtt_entry = mailbox->buf;
259
260 while (list_len > 0) {
261 mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
262 mtt->first_seg * MTHCA_MTT_SEG_SIZE +
263 start_index * 8);
264 mtt_entry[1] = 0;
265 for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
266 mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
267 MTHCA_MTT_FLAG_PRESENT);
268
269 /*
270 * If we have an odd number of entries to write, add
271 * one more dummy entry for firmware efficiency.
272 */
273 if (i & 1)
274 mtt_entry[i + 2] = 0;
275
276 err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
277 if (err) {
278 mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
279 goto out;
280 }
281 if (status) {
282 mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
283 status);
284 err = -EINVAL;
285 goto out;
286 }
287
288 list_len -= i;
289 start_index += i;
290 buffer_list += i;
291 }
292
293out:
294 mthca_free_mailbox(dev, mailbox);
295 return err;
202} 296}
203 297
204static inline u32 tavor_hw_index_to_key(u32 ind) 298static inline u32 tavor_hw_index_to_key(u32 ind)
@@ -237,91 +331,18 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
237 return tavor_key_to_hw_index(key); 331 return tavor_key_to_hw_index(key);
238} 332}
239 333
240int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, 334int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
241 u32 access, struct mthca_mr *mr) 335 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
242{ 336{
243 void *mailbox = NULL; 337 struct mthca_mailbox *mailbox;
244 struct mthca_mpt_entry *mpt_entry; 338 struct mthca_mpt_entry *mpt_entry;
245 u32 key; 339 u32 key;
340 int i;
246 int err; 341 int err;
247 u8 status; 342 u8 status;
248 343
249 might_sleep(); 344 might_sleep();
250 345
251 mr->order = -1;
252 key = mthca_alloc(&dev->mr_table.mpt_alloc);
253 if (key == -1)
254 return -ENOMEM;
255 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
256
257 if (mthca_is_memfree(dev)) {
258 err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
259 if (err)
260 goto err_out_mpt_free;
261 }
262
263 mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
264 GFP_KERNEL);
265 if (!mailbox) {
266 err = -ENOMEM;
267 goto err_out_table;
268 }
269 mpt_entry = MAILBOX_ALIGN(mailbox);
270
271 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
272 MTHCA_MPT_FLAG_MIO |
273 MTHCA_MPT_FLAG_PHYSICAL |
274 MTHCA_MPT_FLAG_REGION |
275 access);
276 mpt_entry->page_size = 0;
277 mpt_entry->key = cpu_to_be32(key);
278 mpt_entry->pd = cpu_to_be32(pd);
279 mpt_entry->start = 0;
280 mpt_entry->length = ~0ULL;
281
282 memset(&mpt_entry->lkey, 0,
283 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
284
285 err = mthca_SW2HW_MPT(dev, mpt_entry,
286 key & (dev->limits.num_mpts - 1),
287 &status);
288 if (err) {
289 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
290 goto err_out_table;
291 } else if (status) {
292 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
293 status);
294 err = -EINVAL;
295 goto err_out_table;
296 }
297
298 kfree(mailbox);
299 return err;
300
301err_out_table:
302 if (mthca_is_memfree(dev))
303 mthca_table_put(dev, dev->mr_table.mpt_table, key);
304
305err_out_mpt_free:
306 mthca_free(&dev->mr_table.mpt_alloc, key);
307 kfree(mailbox);
308 return err;
309}
310
311int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
312 u64 *buffer_list, int buffer_size_shift,
313 int list_len, u64 iova, u64 total_size,
314 u32 access, struct mthca_mr *mr)
315{
316 void *mailbox;
317 u64 *mtt_entry;
318 struct mthca_mpt_entry *mpt_entry;
319 u32 key;
320 int err = -ENOMEM;
321 u8 status;
322 int i;
323
324 might_sleep();
325 WARN_ON(buffer_size_shift >= 32); 346 WARN_ON(buffer_size_shift >= 32);
326 347
327 key = mthca_alloc(&dev->mr_table.mpt_alloc); 348 key = mthca_alloc(&dev->mr_table.mpt_alloc);
@@ -335,75 +356,33 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
335 goto err_out_mpt_free; 356 goto err_out_mpt_free;
336 } 357 }
337 358
338 for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; 359 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
339 i < list_len; 360 if (IS_ERR(mailbox)) {
340 i <<= 1, ++mr->order) 361 err = PTR_ERR(mailbox);
341 ; /* nothing */
342
343 mr->first_seg = mthca_alloc_mtt(dev, mr->order,
344 &dev->mr_table.mtt_buddy);
345 if (mr->first_seg == -1)
346 goto err_out_table; 362 goto err_out_table;
347
348 /*
349 * If list_len is odd, we add one more dummy entry for
350 * firmware efficiency.
351 */
352 mailbox = kmalloc(max(sizeof *mpt_entry,
353 (size_t) 8 * (list_len + (list_len & 1) + 2)) +
354 MTHCA_CMD_MAILBOX_EXTRA,
355 GFP_KERNEL);
356 if (!mailbox)
357 goto err_out_free_mtt;
358
359 mtt_entry = MAILBOX_ALIGN(mailbox);
360
361 mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
362 mr->first_seg * MTHCA_MTT_SEG_SIZE);
363 mtt_entry[1] = 0;
364 for (i = 0; i < list_len; ++i)
365 mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
366 MTHCA_MTT_FLAG_PRESENT);
367 if (list_len & 1) {
368 mtt_entry[i + 2] = 0;
369 ++list_len;
370 }
371
372 if (0) {
373 mthca_dbg(dev, "Dumping MPT entry\n");
374 for (i = 0; i < list_len + 2; ++i)
375 printk(KERN_ERR "[%2d] %016llx\n",
376 i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
377 }
378
379 err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
380 if (err) {
381 mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
382 goto err_out_mailbox_free;
383 }
384 if (status) {
385 mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
386 status);
387 err = -EINVAL;
388 goto err_out_mailbox_free;
389 } 363 }
390 364 mpt_entry = mailbox->buf;
391 mpt_entry = MAILBOX_ALIGN(mailbox);
392 365
393 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | 366 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
394 MTHCA_MPT_FLAG_MIO | 367 MTHCA_MPT_FLAG_MIO |
395 MTHCA_MPT_FLAG_REGION | 368 MTHCA_MPT_FLAG_REGION |
396 access); 369 access);
370 if (!mr->mtt)
371 mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
397 372
398 mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); 373 mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
399 mpt_entry->key = cpu_to_be32(key); 374 mpt_entry->key = cpu_to_be32(key);
400 mpt_entry->pd = cpu_to_be32(pd); 375 mpt_entry->pd = cpu_to_be32(pd);
401 mpt_entry->start = cpu_to_be64(iova); 376 mpt_entry->start = cpu_to_be64(iova);
402 mpt_entry->length = cpu_to_be64(total_size); 377 mpt_entry->length = cpu_to_be64(total_size);
378
403 memset(&mpt_entry->lkey, 0, 379 memset(&mpt_entry->lkey, 0,
404 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); 380 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
405 mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + 381
406 mr->first_seg * MTHCA_MTT_SEG_SIZE); 382 if (mr->mtt)
383 mpt_entry->mtt_seg =
384 cpu_to_be64(dev->mr_table.mtt_base +
385 mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
407 386
408 if (0) { 387 if (0) {
409 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); 388 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -416,45 +395,70 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
416 } 395 }
417 } 396 }
418 397
419 err = mthca_SW2HW_MPT(dev, mpt_entry, 398 err = mthca_SW2HW_MPT(dev, mailbox,
420 key & (dev->limits.num_mpts - 1), 399 key & (dev->limits.num_mpts - 1),
421 &status); 400 &status);
422 if (err) 401 if (err) {
423 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); 402 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
424 else if (status) { 403 goto err_out_mailbox;
404 } else if (status) {
425 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", 405 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
426 status); 406 status);
427 err = -EINVAL; 407 err = -EINVAL;
408 goto err_out_mailbox;
428 } 409 }
429 410
430 kfree(mailbox); 411 mthca_free_mailbox(dev, mailbox);
431 return err; 412 return err;
432 413
433err_out_mailbox_free: 414err_out_mailbox:
434 kfree(mailbox); 415 mthca_free_mailbox(dev, mailbox);
435
436err_out_free_mtt:
437 mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
438 416
439err_out_table: 417err_out_table:
440 if (mthca_is_memfree(dev)) 418 mthca_table_put(dev, dev->mr_table.mpt_table, key);
441 mthca_table_put(dev, dev->mr_table.mpt_table, key);
442 419
443err_out_mpt_free: 420err_out_mpt_free:
444 mthca_free(&dev->mr_table.mpt_alloc, key); 421 mthca_free(&dev->mr_table.mpt_alloc, key);
445 return err; 422 return err;
446} 423}
447 424
448/* Free mr or fmr */ 425int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
449static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order, 426 u32 access, struct mthca_mr *mr)
450 u32 first_seg, struct mthca_buddy *buddy)
451{ 427{
452 if (order >= 0) 428 mr->mtt = NULL;
453 mthca_free_mtt(dev, first_seg, order, buddy); 429 return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
430}
454 431
455 if (mthca_is_memfree(dev)) 432int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
456 mthca_table_put(dev, dev->mr_table.mpt_table, 433 u64 *buffer_list, int buffer_size_shift,
457 arbel_key_to_hw_index(lkey)); 434 int list_len, u64 iova, u64 total_size,
435 u32 access, struct mthca_mr *mr)
436{
437 int err;
438
439 mr->mtt = mthca_alloc_mtt(dev, list_len);
440 if (IS_ERR(mr->mtt))
441 return PTR_ERR(mr->mtt);
442
443 err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
444 if (err) {
445 mthca_free_mtt(dev, mr->mtt);
446 return err;
447 }
448
449 err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
450 total_size, access, mr);
451 if (err)
452 mthca_free_mtt(dev, mr->mtt);
453
454 return err;
455}
456
457/* Free mr or fmr */
458static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
459{
460 mthca_table_put(dev, dev->mr_table.mpt_table,
461 arbel_key_to_hw_index(lkey));
458 462
459 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); 463 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
460} 464}
@@ -476,15 +480,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
476 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", 480 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
477 status); 481 status);
478 482
479 mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg, 483 mthca_free_region(dev, mr->ibmr.lkey);
480 &dev->mr_table.mtt_buddy); 484 mthca_free_mtt(dev, mr->mtt);
481} 485}
482 486
483int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, 487int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
484 u32 access, struct mthca_fmr *mr) 488 u32 access, struct mthca_fmr *mr)
485{ 489{
486 struct mthca_mpt_entry *mpt_entry; 490 struct mthca_mpt_entry *mpt_entry;
487 void *mailbox; 491 struct mthca_mailbox *mailbox;
488 u64 mtt_seg; 492 u64 mtt_seg;
489 u32 key, idx; 493 u32 key, idx;
490 u8 status; 494 u8 status;
@@ -522,31 +526,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
522 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + 526 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
523 sizeof *(mr->mem.tavor.mpt) * idx; 527 sizeof *(mr->mem.tavor.mpt) * idx;
524 528
525 for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; 529 mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
526 i < list_len; 530 if (IS_ERR(mr->mtt))
527 i <<= 1, ++mr->order)
528 ; /* nothing */
529
530 mr->first_seg = mthca_alloc_mtt(dev, mr->order,
531 dev->mr_table.fmr_mtt_buddy);
532 if (mr->first_seg == -1)
533 goto err_out_table; 531 goto err_out_table;
534 532
535 mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE; 533 mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
536 534
537 if (mthca_is_memfree(dev)) { 535 if (mthca_is_memfree(dev)) {
538 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, 536 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
539 mr->first_seg); 537 mr->mtt->first_seg);
540 BUG_ON(!mr->mem.arbel.mtts); 538 BUG_ON(!mr->mem.arbel.mtts);
541 } else 539 } else
542 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; 540 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
543 541
544 mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, 542 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
545 GFP_KERNEL); 543 if (IS_ERR(mailbox))
546 if (!mailbox)
547 goto err_out_free_mtt; 544 goto err_out_free_mtt;
548 545
549 mpt_entry = MAILBOX_ALIGN(mailbox); 546 mpt_entry = mailbox->buf;
550 547
551 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | 548 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
552 MTHCA_MPT_FLAG_MIO | 549 MTHCA_MPT_FLAG_MIO |
@@ -571,7 +568,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
571 } 568 }
572 } 569 }
573 570
574 err = mthca_SW2HW_MPT(dev, mpt_entry, 571 err = mthca_SW2HW_MPT(dev, mailbox,
575 key & (dev->limits.num_mpts - 1), 572 key & (dev->limits.num_mpts - 1),
576 &status); 573 &status);
577 if (err) { 574 if (err) {
@@ -585,19 +582,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
585 goto err_out_mailbox_free; 582 goto err_out_mailbox_free;
586 } 583 }
587 584
588 kfree(mailbox); 585 mthca_free_mailbox(dev, mailbox);
589 return 0; 586 return 0;
590 587
591err_out_mailbox_free: 588err_out_mailbox_free:
592 kfree(mailbox); 589 mthca_free_mailbox(dev, mailbox);
593 590
594err_out_free_mtt: 591err_out_free_mtt:
595 mthca_free_mtt(dev, mr->first_seg, mr->order, 592 mthca_free_mtt(dev, mr->mtt);
596 dev->mr_table.fmr_mtt_buddy);
597 593
598err_out_table: 594err_out_table:
599 if (mthca_is_memfree(dev)) 595 mthca_table_put(dev, dev->mr_table.mpt_table, key);
600 mthca_table_put(dev, dev->mr_table.mpt_table, key);
601 596
602err_out_mpt_free: 597err_out_mpt_free:
603 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); 598 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
@@ -609,8 +604,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
609 if (fmr->maps) 604 if (fmr->maps)
610 return -EBUSY; 605 return -EBUSY;
611 606
612 mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg, 607 mthca_free_region(dev, fmr->ibmr.lkey);
613 dev->mr_table.fmr_mtt_buddy); 608 mthca_free_mtt(dev, fmr->mtt);
609
614 return 0; 610 return 0;
615} 611}
616 612
@@ -826,7 +822,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
826 if (dev->limits.reserved_mtts) { 822 if (dev->limits.reserved_mtts) {
827 i = fls(dev->limits.reserved_mtts - 1); 823 i = fls(dev->limits.reserved_mtts - 1);
828 824
829 if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) { 825 if (mthca_alloc_mtt_range(dev, i,
826 dev->mr_table.fmr_mtt_buddy) == -1) {
830 mthca_warn(dev, "MTT table of order %d is too small.\n", 827 mthca_warn(dev, "MTT table of order %d is too small.\n",
831 dev->mr_table.fmr_mtt_buddy->max_order); 828 dev->mr_table.fmr_mtt_buddy->max_order);
832 err = -ENOMEM; 829 err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index ea66847e4ea3..c2c899844e98 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -37,23 +38,27 @@
37 38
38#include "mthca_dev.h" 39#include "mthca_dev.h"
39 40
40int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) 41int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
41{ 42{
42 int err; 43 int err = 0;
43 44
44 might_sleep(); 45 might_sleep();
45 46
47 pd->privileged = privileged;
48
46 atomic_set(&pd->sqp_count, 0); 49 atomic_set(&pd->sqp_count, 0);
47 pd->pd_num = mthca_alloc(&dev->pd_table.alloc); 50 pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
48 if (pd->pd_num == -1) 51 if (pd->pd_num == -1)
49 return -ENOMEM; 52 return -ENOMEM;
50 53
51 err = mthca_mr_alloc_notrans(dev, pd->pd_num, 54 if (privileged) {
52 MTHCA_MPT_FLAG_LOCAL_READ | 55 err = mthca_mr_alloc_notrans(dev, pd->pd_num,
53 MTHCA_MPT_FLAG_LOCAL_WRITE, 56 MTHCA_MPT_FLAG_LOCAL_READ |
54 &pd->ntmr); 57 MTHCA_MPT_FLAG_LOCAL_WRITE,
55 if (err) 58 &pd->ntmr);
56 mthca_free(&dev->pd_table.alloc, pd->pd_num); 59 if (err)
60 mthca_free(&dev->pd_table.alloc, pd->pd_num);
61 }
57 62
58 return err; 63 return err;
59} 64}
@@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
61void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) 66void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
62{ 67{
63 might_sleep(); 68 might_sleep();
64 mthca_free_mr(dev, &pd->ntmr); 69 if (pd->privileged)
70 mthca_free_mr(dev, &pd->ntmr);
65 mthca_free(&dev->pd_table.alloc, pd->pd_num); 71 mthca_free(&dev->pd_table.alloc, pd->pd_num);
66} 72}
67 73
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 159f4e6c312d..81919a7b4935 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -33,9 +35,12 @@
33 */ 35 */
34 36
35#include <ib_smi.h> 37#include <ib_smi.h>
38#include <linux/mm.h>
36 39
37#include "mthca_dev.h" 40#include "mthca_dev.h"
38#include "mthca_cmd.h" 41#include "mthca_cmd.h"
42#include "mthca_user.h"
43#include "mthca_memfree.h"
39 44
40static int mthca_query_device(struct ib_device *ibdev, 45static int mthca_query_device(struct ib_device *ibdev,
41 struct ib_device_attr *props) 46 struct ib_device_attr *props)
@@ -52,7 +57,7 @@ static int mthca_query_device(struct ib_device *ibdev,
52 if (!in_mad || !out_mad) 57 if (!in_mad || !out_mad)
53 goto out; 58 goto out;
54 59
55 memset(props, 0, sizeof props); 60 memset(props, 0, sizeof *props);
56 61
57 props->fw_ver = mdev->fw_ver; 62 props->fw_ver = mdev->fw_ver;
58 63
@@ -283,7 +288,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
283 return err; 288 return err;
284} 289}
285 290
286static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) 291static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
292 struct ib_udata *udata)
293{
294 struct mthca_alloc_ucontext_resp uresp;
295 struct mthca_ucontext *context;
296 int err;
297
298 memset(&uresp, 0, sizeof uresp);
299
300 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
301 if (mthca_is_memfree(to_mdev(ibdev)))
302 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
303 else
304 uresp.uarc_size = 0;
305
306 context = kmalloc(sizeof *context, GFP_KERNEL);
307 if (!context)
308 return ERR_PTR(-ENOMEM);
309
310 err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
311 if (err) {
312 kfree(context);
313 return ERR_PTR(err);
314 }
315
316 context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
317 if (IS_ERR(context->db_tab)) {
318 err = PTR_ERR(context->db_tab);
319 mthca_uar_free(to_mdev(ibdev), &context->uar);
320 kfree(context);
321 return ERR_PTR(err);
322 }
323
324 if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
325 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
326 mthca_uar_free(to_mdev(ibdev), &context->uar);
327 kfree(context);
328 return ERR_PTR(-EFAULT);
329 }
330
331 return &context->ibucontext;
332}
333
334static int mthca_dealloc_ucontext(struct ib_ucontext *context)
335{
336 mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
337 to_mucontext(context)->db_tab);
338 mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
339 kfree(to_mucontext(context));
340
341 return 0;
342}
343
344static int mthca_mmap_uar(struct ib_ucontext *context,
345 struct vm_area_struct *vma)
346{
347 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
348 return -EINVAL;
349
350 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
351
352 if (io_remap_pfn_range(vma, vma->vm_start,
353 to_mucontext(context)->uar.pfn,
354 PAGE_SIZE, vma->vm_page_prot))
355 return -EAGAIN;
356
357 return 0;
358}
359
360static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
361 struct ib_ucontext *context,
362 struct ib_udata *udata)
287{ 363{
288 struct mthca_pd *pd; 364 struct mthca_pd *pd;
289 int err; 365 int err;
@@ -292,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
292 if (!pd) 368 if (!pd)
293 return ERR_PTR(-ENOMEM); 369 return ERR_PTR(-ENOMEM);
294 370
295 err = mthca_pd_alloc(to_mdev(ibdev), pd); 371 err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
296 if (err) { 372 if (err) {
297 kfree(pd); 373 kfree(pd);
298 return ERR_PTR(err); 374 return ERR_PTR(err);
299 } 375 }
300 376
377 if (context) {
378 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
379 mthca_pd_free(to_mdev(ibdev), pd);
380 kfree(pd);
381 return ERR_PTR(-EFAULT);
382 }
383 }
384
301 return &pd->ibpd; 385 return &pd->ibpd;
302} 386}
303 387
@@ -337,8 +421,10 @@ static int mthca_ah_destroy(struct ib_ah *ah)
337} 421}
338 422
339static struct ib_qp *mthca_create_qp(struct ib_pd *pd, 423static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
340 struct ib_qp_init_attr *init_attr) 424 struct ib_qp_init_attr *init_attr,
425 struct ib_udata *udata)
341{ 426{
427 struct mthca_create_qp ucmd;
342 struct mthca_qp *qp; 428 struct mthca_qp *qp;
343 int err; 429 int err;
344 430
@@ -347,41 +433,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
347 case IB_QPT_UC: 433 case IB_QPT_UC:
348 case IB_QPT_UD: 434 case IB_QPT_UD:
349 { 435 {
436 struct mthca_ucontext *context;
437
350 qp = kmalloc(sizeof *qp, GFP_KERNEL); 438 qp = kmalloc(sizeof *qp, GFP_KERNEL);
351 if (!qp) 439 if (!qp)
352 return ERR_PTR(-ENOMEM); 440 return ERR_PTR(-ENOMEM);
353 441
354 qp->sq.max = init_attr->cap.max_send_wr; 442 if (pd->uobject) {
355 qp->rq.max = init_attr->cap.max_recv_wr; 443 context = to_mucontext(pd->uobject->context);
356 qp->sq.max_gs = init_attr->cap.max_send_sge; 444
357 qp->rq.max_gs = init_attr->cap.max_recv_sge; 445 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
446 return ERR_PTR(-EFAULT);
447
448 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
449 context->db_tab,
450 ucmd.sq_db_index, ucmd.sq_db_page);
451 if (err) {
452 kfree(qp);
453 return ERR_PTR(err);
454 }
455
456 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
457 context->db_tab,
458 ucmd.rq_db_index, ucmd.rq_db_page);
459 if (err) {
460 mthca_unmap_user_db(to_mdev(pd->device),
461 &context->uar,
462 context->db_tab,
463 ucmd.sq_db_index);
464 kfree(qp);
465 return ERR_PTR(err);
466 }
467
468 qp->mr.ibmr.lkey = ucmd.lkey;
469 qp->sq.db_index = ucmd.sq_db_index;
470 qp->rq.db_index = ucmd.rq_db_index;
471 }
358 472
359 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), 473 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
360 to_mcq(init_attr->send_cq), 474 to_mcq(init_attr->send_cq),
361 to_mcq(init_attr->recv_cq), 475 to_mcq(init_attr->recv_cq),
362 init_attr->qp_type, init_attr->sq_sig_type, 476 init_attr->qp_type, init_attr->sq_sig_type,
363 qp); 477 &init_attr->cap, qp);
478
479 if (err && pd->uobject) {
480 context = to_mucontext(pd->uobject->context);
481
482 mthca_unmap_user_db(to_mdev(pd->device),
483 &context->uar,
484 context->db_tab,
485 ucmd.sq_db_index);
486 mthca_unmap_user_db(to_mdev(pd->device),
487 &context->uar,
488 context->db_tab,
489 ucmd.rq_db_index);
490 }
491
364 qp->ibqp.qp_num = qp->qpn; 492 qp->ibqp.qp_num = qp->qpn;
365 break; 493 break;
366 } 494 }
367 case IB_QPT_SMI: 495 case IB_QPT_SMI:
368 case IB_QPT_GSI: 496 case IB_QPT_GSI:
369 { 497 {
498 /* Don't allow userspace to create special QPs */
499 if (pd->uobject)
500 return ERR_PTR(-EINVAL);
501
370 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); 502 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
371 if (!qp) 503 if (!qp)
372 return ERR_PTR(-ENOMEM); 504 return ERR_PTR(-ENOMEM);
373 505
374 qp->sq.max = init_attr->cap.max_send_wr;
375 qp->rq.max = init_attr->cap.max_recv_wr;
376 qp->sq.max_gs = init_attr->cap.max_send_sge;
377 qp->rq.max_gs = init_attr->cap.max_recv_sge;
378
379 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 506 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
380 507
381 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), 508 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
382 to_mcq(init_attr->send_cq), 509 to_mcq(init_attr->send_cq),
383 to_mcq(init_attr->recv_cq), 510 to_mcq(init_attr->recv_cq),
384 init_attr->sq_sig_type, 511 init_attr->sq_sig_type, &init_attr->cap,
385 qp->ibqp.qp_num, init_attr->port_num, 512 qp->ibqp.qp_num, init_attr->port_num,
386 to_msqp(qp)); 513 to_msqp(qp));
387 break; 514 break;
@@ -396,42 +523,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
396 return ERR_PTR(err); 523 return ERR_PTR(err);
397 } 524 }
398 525
399 init_attr->cap.max_inline_data = 0; 526 init_attr->cap.max_inline_data = 0;
527 init_attr->cap.max_send_wr = qp->sq.max;
528 init_attr->cap.max_recv_wr = qp->rq.max;
529 init_attr->cap.max_send_sge = qp->sq.max_gs;
530 init_attr->cap.max_recv_sge = qp->rq.max_gs;
400 531
401 return &qp->ibqp; 532 return &qp->ibqp;
402} 533}
403 534
404static int mthca_destroy_qp(struct ib_qp *qp) 535static int mthca_destroy_qp(struct ib_qp *qp)
405{ 536{
537 if (qp->uobject) {
538 mthca_unmap_user_db(to_mdev(qp->device),
539 &to_mucontext(qp->uobject->context)->uar,
540 to_mucontext(qp->uobject->context)->db_tab,
541 to_mqp(qp)->sq.db_index);
542 mthca_unmap_user_db(to_mdev(qp->device),
543 &to_mucontext(qp->uobject->context)->uar,
544 to_mucontext(qp->uobject->context)->db_tab,
545 to_mqp(qp)->rq.db_index);
546 }
406 mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); 547 mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
407 kfree(qp); 548 kfree(qp);
408 return 0; 549 return 0;
409} 550}
410 551
411static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) 552static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
553 struct ib_ucontext *context,
554 struct ib_udata *udata)
412{ 555{
556 struct mthca_create_cq ucmd;
413 struct mthca_cq *cq; 557 struct mthca_cq *cq;
414 int nent; 558 int nent;
415 int err; 559 int err;
416 560
561 if (context) {
562 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
563 return ERR_PTR(-EFAULT);
564
565 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
566 to_mucontext(context)->db_tab,
567 ucmd.set_db_index, ucmd.set_db_page);
568 if (err)
569 return ERR_PTR(err);
570
571 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
572 to_mucontext(context)->db_tab,
573 ucmd.arm_db_index, ucmd.arm_db_page);
574 if (err)
575 goto err_unmap_set;
576 }
577
417 cq = kmalloc(sizeof *cq, GFP_KERNEL); 578 cq = kmalloc(sizeof *cq, GFP_KERNEL);
418 if (!cq) 579 if (!cq) {
419 return ERR_PTR(-ENOMEM); 580 err = -ENOMEM;
581 goto err_unmap_arm;
582 }
583
584 if (context) {
585 cq->mr.ibmr.lkey = ucmd.lkey;
586 cq->set_ci_db_index = ucmd.set_db_index;
587 cq->arm_db_index = ucmd.arm_db_index;
588 }
420 589
421 for (nent = 1; nent <= entries; nent <<= 1) 590 for (nent = 1; nent <= entries; nent <<= 1)
422 ; /* nothing */ 591 ; /* nothing */
423 592
424 err = mthca_init_cq(to_mdev(ibdev), nent, cq); 593 err = mthca_init_cq(to_mdev(ibdev), nent,
425 if (err) { 594 context ? to_mucontext(context) : NULL,
426 kfree(cq); 595 context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
427 cq = ERR_PTR(err); 596 cq);
597 if (err)
598 goto err_free;
599
600 if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
601 mthca_free_cq(to_mdev(ibdev), cq);
602 goto err_free;
428 } 603 }
429 604
430 return &cq->ibcq; 605 return &cq->ibcq;
606
607err_free:
608 kfree(cq);
609
610err_unmap_arm:
611 if (context)
612 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
613 to_mucontext(context)->db_tab, ucmd.arm_db_index);
614
615err_unmap_set:
616 if (context)
617 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
618 to_mucontext(context)->db_tab, ucmd.set_db_index);
619
620 return ERR_PTR(err);
431} 621}
432 622
433static int mthca_destroy_cq(struct ib_cq *cq) 623static int mthca_destroy_cq(struct ib_cq *cq)
434{ 624{
625 if (cq->uobject) {
626 mthca_unmap_user_db(to_mdev(cq->device),
627 &to_mucontext(cq->uobject->context)->uar,
628 to_mucontext(cq->uobject->context)->db_tab,
629 to_mcq(cq)->arm_db_index);
630 mthca_unmap_user_db(to_mdev(cq->device),
631 &to_mucontext(cq->uobject->context)->uar,
632 to_mucontext(cq->uobject->context)->db_tab,
633 to_mcq(cq)->set_ci_db_index);
634 }
435 mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); 635 mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
436 kfree(cq); 636 kfree(cq);
437 637
@@ -558,6 +758,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
558 convert_access(acc), mr); 758 convert_access(acc), mr);
559 759
560 if (err) { 760 if (err) {
761 kfree(page_list);
561 kfree(mr); 762 kfree(mr);
562 return ERR_PTR(err); 763 return ERR_PTR(err);
563 } 764 }
@@ -566,6 +767,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
566 return &mr->ibmr; 767 return &mr->ibmr;
567} 768}
568 769
770static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
771 int acc, struct ib_udata *udata)
772{
773 struct mthca_dev *dev = to_mdev(pd->device);
774 struct ib_umem_chunk *chunk;
775 struct mthca_mr *mr;
776 u64 *pages;
777 int shift, n, len;
778 int i, j, k;
779 int err = 0;
780
781 shift = ffs(region->page_size) - 1;
782
783 mr = kmalloc(sizeof *mr, GFP_KERNEL);
784 if (!mr)
785 return ERR_PTR(-ENOMEM);
786
787 n = 0;
788 list_for_each_entry(chunk, &region->chunk_list, list)
789 n += chunk->nents;
790
791 mr->mtt = mthca_alloc_mtt(dev, n);
792 if (IS_ERR(mr->mtt)) {
793 err = PTR_ERR(mr->mtt);
794 goto err;
795 }
796
797 pages = (u64 *) __get_free_page(GFP_KERNEL);
798 if (!pages) {
799 err = -ENOMEM;
800 goto err_mtt;
801 }
802
803 i = n = 0;
804
805 list_for_each_entry(chunk, &region->chunk_list, list)
806 for (j = 0; j < chunk->nmap; ++j) {
807 len = sg_dma_len(&chunk->page_list[j]) >> shift;
808 for (k = 0; k < len; ++k) {
809 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
810 region->page_size * k;
811 /*
812 * Be friendly to WRITE_MTT command
813 * and leave two empty slots for the
814 * index and reserved fields of the
815 * mailbox.
816 */
817 if (i == PAGE_SIZE / sizeof (u64) - 2) {
818 err = mthca_write_mtt(dev, mr->mtt,
819 n, pages, i);
820 if (err)
821 goto mtt_done;
822 n += i;
823 i = 0;
824 }
825 }
826 }
827
828 if (i)
829 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
830mtt_done:
831 free_page((unsigned long) pages);
832 if (err)
833 goto err_mtt;
834
835 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
836 region->length, convert_access(acc), mr);
837
838 if (err)
839 goto err_mtt;
840
841 return &mr->ibmr;
842
843err_mtt:
844 mthca_free_mtt(dev, mr->mtt);
845
846err:
847 kfree(mr);
848 return ERR_PTR(err);
849}
850
569static int mthca_dereg_mr(struct ib_mr *mr) 851static int mthca_dereg_mr(struct ib_mr *mr)
570{ 852{
571 struct mthca_mr *mmr = to_mmr(mr); 853 struct mthca_mr *mmr = to_mmr(mr);
@@ -690,6 +972,8 @@ int mthca_register_device(struct mthca_dev *dev)
690 int i; 972 int i;
691 973
692 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); 974 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
975 dev->ib_dev.owner = THIS_MODULE;
976
693 dev->ib_dev.node_type = IB_NODE_CA; 977 dev->ib_dev.node_type = IB_NODE_CA;
694 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 978 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
695 dev->ib_dev.dma_device = &dev->pdev->dev; 979 dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -699,6 +983,9 @@ int mthca_register_device(struct mthca_dev *dev)
699 dev->ib_dev.modify_port = mthca_modify_port; 983 dev->ib_dev.modify_port = mthca_modify_port;
700 dev->ib_dev.query_pkey = mthca_query_pkey; 984 dev->ib_dev.query_pkey = mthca_query_pkey;
701 dev->ib_dev.query_gid = mthca_query_gid; 985 dev->ib_dev.query_gid = mthca_query_gid;
986 dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
987 dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
988 dev->ib_dev.mmap = mthca_mmap_uar;
702 dev->ib_dev.alloc_pd = mthca_alloc_pd; 989 dev->ib_dev.alloc_pd = mthca_alloc_pd;
703 dev->ib_dev.dealloc_pd = mthca_dealloc_pd; 990 dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
704 dev->ib_dev.create_ah = mthca_ah_create; 991 dev->ib_dev.create_ah = mthca_ah_create;
@@ -711,6 +998,7 @@ int mthca_register_device(struct mthca_dev *dev)
711 dev->ib_dev.poll_cq = mthca_poll_cq; 998 dev->ib_dev.poll_cq = mthca_poll_cq;
712 dev->ib_dev.get_dma_mr = mthca_get_dma_mr; 999 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
713 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; 1000 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
1001 dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
714 dev->ib_dev.dereg_mr = mthca_dereg_mr; 1002 dev->ib_dev.dereg_mr = mthca_dereg_mr;
715 1003
716 if (dev->mthca_flags & MTHCA_FLAG_FMR) { 1004 if (dev->mthca_flags & MTHCA_FLAG_FMR) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 619710f95a87..1d032791cc8b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -54,18 +55,26 @@ struct mthca_uar {
54 int index; 55 int index;
55}; 56};
56 57
58struct mthca_user_db_table;
59
60struct mthca_ucontext {
61 struct ib_ucontext ibucontext;
62 struct mthca_uar uar;
63 struct mthca_user_db_table *db_tab;
64};
65
66struct mthca_mtt;
67
57struct mthca_mr { 68struct mthca_mr {
58 struct ib_mr ibmr; 69 struct ib_mr ibmr;
59 int order; 70 struct mthca_mtt *mtt;
60 u32 first_seg;
61}; 71};
62 72
63struct mthca_fmr { 73struct mthca_fmr {
64 struct ib_fmr ibmr; 74 struct ib_fmr ibmr;
65 struct ib_fmr_attr attr; 75 struct ib_fmr_attr attr;
66 int order; 76 struct mthca_mtt *mtt;
67 u32 first_seg; 77 int maps;
68 int maps;
69 union { 78 union {
70 struct { 79 struct {
71 struct mthca_mpt_entry __iomem *mpt; 80 struct mthca_mpt_entry __iomem *mpt;
@@ -83,6 +92,7 @@ struct mthca_pd {
83 u32 pd_num; 92 u32 pd_num;
84 atomic_t sqp_count; 93 atomic_t sqp_count;
85 struct mthca_mr ntmr; 94 struct mthca_mr ntmr;
95 int privileged;
86}; 96};
87 97
88struct mthca_eq { 98struct mthca_eq {
@@ -167,6 +177,7 @@ struct mthca_cq {
167 int cqn; 177 int cqn;
168 u32 cons_index; 178 u32 cons_index;
169 int is_direct; 179 int is_direct;
180 int is_kernel;
170 181
171 /* Next fields are Arbel only */ 182 /* Next fields are Arbel only */
172 int set_ci_db_index; 183 int set_ci_db_index;
@@ -236,6 +247,11 @@ struct mthca_sqp {
236 dma_addr_t header_dma; 247 dma_addr_t header_dma;
237}; 248};
238 249
250static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
251{
252 return container_of(ibucontext, struct mthca_ucontext, ibucontext);
253}
254
239static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) 255static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
240{ 256{
241 return container_of(ibmr, struct mthca_fmr, ibmr); 257 return container_of(ibmr, struct mthca_fmr, ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index ca73bab11a02..f7126b14d5ae 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -46,7 +47,9 @@ enum {
46 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, 47 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
47 MTHCA_ACK_REQ_FREQ = 10, 48 MTHCA_ACK_REQ_FREQ = 10,
48 MTHCA_FLIGHT_LIMIT = 9, 49 MTHCA_FLIGHT_LIMIT = 9,
49 MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ 50 MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
51 MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
52 MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
50}; 53};
51 54
52enum { 55enum {
@@ -357,6 +360,9 @@ static const struct {
357 [UD] = (IB_QP_PKEY_INDEX | 360 [UD] = (IB_QP_PKEY_INDEX |
358 IB_QP_PORT | 361 IB_QP_PORT |
359 IB_QP_QKEY), 362 IB_QP_QKEY),
363 [UC] = (IB_QP_PKEY_INDEX |
364 IB_QP_PORT |
365 IB_QP_ACCESS_FLAGS),
360 [RC] = (IB_QP_PKEY_INDEX | 366 [RC] = (IB_QP_PKEY_INDEX |
361 IB_QP_PORT | 367 IB_QP_PORT |
362 IB_QP_ACCESS_FLAGS), 368 IB_QP_ACCESS_FLAGS),
@@ -378,6 +384,9 @@ static const struct {
378 [UD] = (IB_QP_PKEY_INDEX | 384 [UD] = (IB_QP_PKEY_INDEX |
379 IB_QP_PORT | 385 IB_QP_PORT |
380 IB_QP_QKEY), 386 IB_QP_QKEY),
387 [UC] = (IB_QP_PKEY_INDEX |
388 IB_QP_PORT |
389 IB_QP_ACCESS_FLAGS),
381 [RC] = (IB_QP_PKEY_INDEX | 390 [RC] = (IB_QP_PKEY_INDEX |
382 IB_QP_PORT | 391 IB_QP_PORT |
383 IB_QP_ACCESS_FLAGS), 392 IB_QP_ACCESS_FLAGS),
@@ -388,6 +397,11 @@ static const struct {
388 [IB_QPS_RTR] = { 397 [IB_QPS_RTR] = {
389 .trans = MTHCA_TRANS_INIT2RTR, 398 .trans = MTHCA_TRANS_INIT2RTR,
390 .req_param = { 399 .req_param = {
400 [UC] = (IB_QP_AV |
401 IB_QP_PATH_MTU |
402 IB_QP_DEST_QPN |
403 IB_QP_RQ_PSN |
404 IB_QP_MAX_DEST_RD_ATOMIC),
391 [RC] = (IB_QP_AV | 405 [RC] = (IB_QP_AV |
392 IB_QP_PATH_MTU | 406 IB_QP_PATH_MTU |
393 IB_QP_DEST_QPN | 407 IB_QP_DEST_QPN |
@@ -398,6 +412,9 @@ static const struct {
398 .opt_param = { 412 .opt_param = {
399 [UD] = (IB_QP_PKEY_INDEX | 413 [UD] = (IB_QP_PKEY_INDEX |
400 IB_QP_QKEY), 414 IB_QP_QKEY),
415 [UC] = (IB_QP_ALT_PATH |
416 IB_QP_ACCESS_FLAGS |
417 IB_QP_PKEY_INDEX),
401 [RC] = (IB_QP_ALT_PATH | 418 [RC] = (IB_QP_ALT_PATH |
402 IB_QP_ACCESS_FLAGS | 419 IB_QP_ACCESS_FLAGS |
403 IB_QP_PKEY_INDEX), 420 IB_QP_PKEY_INDEX),
@@ -413,6 +430,8 @@ static const struct {
413 .trans = MTHCA_TRANS_RTR2RTS, 430 .trans = MTHCA_TRANS_RTR2RTS,
414 .req_param = { 431 .req_param = {
415 [UD] = IB_QP_SQ_PSN, 432 [UD] = IB_QP_SQ_PSN,
433 [UC] = (IB_QP_SQ_PSN |
434 IB_QP_MAX_QP_RD_ATOMIC),
416 [RC] = (IB_QP_TIMEOUT | 435 [RC] = (IB_QP_TIMEOUT |
417 IB_QP_RETRY_CNT | 436 IB_QP_RETRY_CNT |
418 IB_QP_RNR_RETRY | 437 IB_QP_RNR_RETRY |
@@ -423,6 +442,11 @@ static const struct {
423 .opt_param = { 442 .opt_param = {
424 [UD] = (IB_QP_CUR_STATE | 443 [UD] = (IB_QP_CUR_STATE |
425 IB_QP_QKEY), 444 IB_QP_QKEY),
445 [UC] = (IB_QP_CUR_STATE |
446 IB_QP_ALT_PATH |
447 IB_QP_ACCESS_FLAGS |
448 IB_QP_PKEY_INDEX |
449 IB_QP_PATH_MIG_STATE),
426 [RC] = (IB_QP_CUR_STATE | 450 [RC] = (IB_QP_CUR_STATE |
427 IB_QP_ALT_PATH | 451 IB_QP_ALT_PATH |
428 IB_QP_ACCESS_FLAGS | 452 IB_QP_ACCESS_FLAGS |
@@ -442,6 +466,9 @@ static const struct {
442 .opt_param = { 466 .opt_param = {
443 [UD] = (IB_QP_CUR_STATE | 467 [UD] = (IB_QP_CUR_STATE |
444 IB_QP_QKEY), 468 IB_QP_QKEY),
469 [UC] = (IB_QP_ACCESS_FLAGS |
470 IB_QP_ALT_PATH |
471 IB_QP_PATH_MIG_STATE),
445 [RC] = (IB_QP_ACCESS_FLAGS | 472 [RC] = (IB_QP_ACCESS_FLAGS |
446 IB_QP_ALT_PATH | 473 IB_QP_ALT_PATH |
447 IB_QP_PATH_MIG_STATE | 474 IB_QP_PATH_MIG_STATE |
@@ -462,6 +489,10 @@ static const struct {
462 .opt_param = { 489 .opt_param = {
463 [UD] = (IB_QP_CUR_STATE | 490 [UD] = (IB_QP_CUR_STATE |
464 IB_QP_QKEY), 491 IB_QP_QKEY),
492 [UC] = (IB_QP_CUR_STATE |
493 IB_QP_ALT_PATH |
494 IB_QP_ACCESS_FLAGS |
495 IB_QP_PATH_MIG_STATE),
465 [RC] = (IB_QP_CUR_STATE | 496 [RC] = (IB_QP_CUR_STATE |
466 IB_QP_ALT_PATH | 497 IB_QP_ALT_PATH |
467 IB_QP_ACCESS_FLAGS | 498 IB_QP_ACCESS_FLAGS |
@@ -476,6 +507,14 @@ static const struct {
476 .opt_param = { 507 .opt_param = {
477 [UD] = (IB_QP_PKEY_INDEX | 508 [UD] = (IB_QP_PKEY_INDEX |
478 IB_QP_QKEY), 509 IB_QP_QKEY),
510 [UC] = (IB_QP_AV |
511 IB_QP_MAX_QP_RD_ATOMIC |
512 IB_QP_MAX_DEST_RD_ATOMIC |
513 IB_QP_CUR_STATE |
514 IB_QP_ALT_PATH |
515 IB_QP_ACCESS_FLAGS |
516 IB_QP_PKEY_INDEX |
517 IB_QP_PATH_MIG_STATE),
479 [RC] = (IB_QP_AV | 518 [RC] = (IB_QP_AV |
480 IB_QP_TIMEOUT | 519 IB_QP_TIMEOUT |
481 IB_QP_RETRY_CNT | 520 IB_QP_RETRY_CNT |
@@ -501,6 +540,7 @@ static const struct {
501 .opt_param = { 540 .opt_param = {
502 [UD] = (IB_QP_CUR_STATE | 541 [UD] = (IB_QP_CUR_STATE |
503 IB_QP_QKEY), 542 IB_QP_QKEY),
543 [UC] = (IB_QP_CUR_STATE),
504 [RC] = (IB_QP_CUR_STATE | 544 [RC] = (IB_QP_CUR_STATE |
505 IB_QP_MIN_RNR_TIMER), 545 IB_QP_MIN_RNR_TIMER),
506 [MLX] = (IB_QP_CUR_STATE | 546 [MLX] = (IB_QP_CUR_STATE |
@@ -552,7 +592,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
552 struct mthca_dev *dev = to_mdev(ibqp->device); 592 struct mthca_dev *dev = to_mdev(ibqp->device);
553 struct mthca_qp *qp = to_mqp(ibqp); 593 struct mthca_qp *qp = to_mqp(ibqp);
554 enum ib_qp_state cur_state, new_state; 594 enum ib_qp_state cur_state, new_state;
555 void *mailbox = NULL; 595 struct mthca_mailbox *mailbox;
556 struct mthca_qp_param *qp_param; 596 struct mthca_qp_param *qp_param;
557 struct mthca_qp_context *qp_context; 597 struct mthca_qp_context *qp_context;
558 u32 req_param, opt_param; 598 u32 req_param, opt_param;
@@ -609,10 +649,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
609 return -EINVAL; 649 return -EINVAL;
610 } 650 }
611 651
612 mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); 652 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
613 if (!mailbox) 653 if (IS_ERR(mailbox))
614 return -ENOMEM; 654 return PTR_ERR(mailbox);
615 qp_param = MAILBOX_ALIGN(mailbox); 655 qp_param = mailbox->buf;
616 qp_context = &qp_param->context; 656 qp_context = &qp_param->context;
617 memset(qp_param, 0, sizeof *qp_param); 657 memset(qp_param, 0, sizeof *qp_param);
618 658
@@ -652,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
652 692
653 /* leave arbel_sched_queue as 0 */ 693 /* leave arbel_sched_queue as 0 */
654 694
655 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); 695 if (qp->ibqp.uobject)
696 qp_context->usr_page =
697 cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
698 else
699 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
656 qp_context->local_qpn = cpu_to_be32(qp->qpn); 700 qp_context->local_qpn = cpu_to_be32(qp->qpn);
657 if (attr_mask & IB_QP_DEST_QPN) { 701 if (attr_mask & IB_QP_DEST_QPN) {
658 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 702 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -683,7 +727,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
683 if (attr_mask & IB_QP_AV) { 727 if (attr_mask & IB_QP_AV) {
684 qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; 728 qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f;
685 qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); 729 qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
686 qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3; 730 qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
687 if (attr->ah_attr.ah_flags & IB_AH_GRH) { 731 if (attr->ah_attr.ah_flags & IB_AH_GRH) {
688 qp_context->pri_path.g_mylmc |= 1 << 7; 732 qp_context->pri_path.g_mylmc |= 1 << 7;
689 qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; 733 qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
@@ -724,9 +768,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
724 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); 768 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
725 } 769 }
726 770
727 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 771 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
728 qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ? 772 qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
729 ffs(attr->max_dest_rd_atomic) - 1 : 0, 773 ffs(attr->max_rd_atomic) - 1 : 0,
730 7) << 21); 774 7) << 21);
731 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); 775 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
732 } 776 }
@@ -764,10 +808,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
764 qp->atomic_rd_en = attr->qp_access_flags; 808 qp->atomic_rd_en = attr->qp_access_flags;
765 } 809 }
766 810
767 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 811 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
768 u8 rra_max; 812 u8 rra_max;
769 813
770 if (qp->resp_depth && !attr->max_rd_atomic) { 814 if (qp->resp_depth && !attr->max_dest_rd_atomic) {
771 /* 815 /*
772 * Lowering our responder resources to zero. 816 * Lowering our responder resources to zero.
773 * Turn off RDMA/atomics as responder. 817 * Turn off RDMA/atomics as responder.
@@ -778,7 +822,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
778 MTHCA_QP_OPTPAR_RAE); 822 MTHCA_QP_OPTPAR_RAE);
779 } 823 }
780 824
781 if (!qp->resp_depth && attr->max_rd_atomic) { 825 if (!qp->resp_depth && attr->max_dest_rd_atomic) {
782 /* 826 /*
783 * Increasing our responder resources from 827 * Increasing our responder resources from
784 * zero. Turn on RDMA/atomics as appropriate. 828 * zero. Turn on RDMA/atomics as appropriate.
@@ -799,7 +843,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
799 } 843 }
800 844
801 for (rra_max = 0; 845 for (rra_max = 0;
802 1 << rra_max < attr->max_rd_atomic && 846 1 << rra_max < attr->max_dest_rd_atomic &&
803 rra_max < dev->qp_table.rdb_shift; 847 rra_max < dev->qp_table.rdb_shift;
804 ++rra_max) 848 ++rra_max)
805 ; /* nothing */ 849 ; /* nothing */
@@ -807,7 +851,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
807 qp_context->params2 |= cpu_to_be32(rra_max << 21); 851 qp_context->params2 |= cpu_to_be32(rra_max << 21);
808 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); 852 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
809 853
810 qp->resp_depth = attr->max_rd_atomic; 854 qp->resp_depth = attr->max_dest_rd_atomic;
811 } 855 }
812 856
813 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); 857 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,7 +879,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
835 } 879 }
836 880
837 err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, 881 err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
838 qp->qpn, 0, qp_param, 0, &status); 882 qp->qpn, 0, mailbox, 0, &status);
839 if (status) { 883 if (status) {
840 mthca_warn(dev, "modify QP %d returned status %02x.\n", 884 mthca_warn(dev, "modify QP %d returned status %02x.\n",
841 state_table[cur_state][new_state].trans, status); 885 state_table[cur_state][new_state].trans, status);
@@ -845,7 +889,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
845 if (!err) 889 if (!err)
846 qp->state = new_state; 890 qp->state = new_state;
847 891
848 kfree(mailbox); 892 mthca_free_mailbox(dev, mailbox);
849 893
850 if (is_sqp(dev, qp)) 894 if (is_sqp(dev, qp))
851 store_attrs(to_msqp(qp), attr, attr_mask); 895 store_attrs(to_msqp(qp), attr, attr_mask);
@@ -917,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
917 961
918 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 962 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
919 1 << qp->sq.wqe_shift); 963 1 << qp->sq.wqe_shift);
964
965 /*
966 * If this is a userspace QP, we don't actually have to
967 * allocate anything. All we need is to calculate the WQE
968 * sizes and the send_wqe_offset, so we're done now.
969 */
970 if (pd->ibpd.uobject)
971 return 0;
972
920 size = PAGE_ALIGN(qp->send_wqe_offset + 973 size = PAGE_ALIGN(qp->send_wqe_offset +
921 (qp->sq.max << qp->sq.wqe_shift)); 974 (qp->sq.max << qp->sq.wqe_shift));
922 975
@@ -934,7 +987,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
934 mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", 987 mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
935 size, shift); 988 size, shift);
936 989
937 qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); 990 qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size,
991 &t, GFP_KERNEL);
938 if (!qp->queue.direct.buf) 992 if (!qp->queue.direct.buf)
939 goto err_out; 993 goto err_out;
940 994
@@ -973,7 +1027,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
973 1027
974 for (i = 0; i < npages; ++i) { 1028 for (i = 0; i < npages; ++i) {
975 qp->queue.page_list[i].buf = 1029 qp->queue.page_list[i].buf =
976 pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); 1030 dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
1031 &t, GFP_KERNEL);
977 if (!qp->queue.page_list[i].buf) 1032 if (!qp->queue.page_list[i].buf)
978 goto err_out_free; 1033 goto err_out_free;
979 1034
@@ -996,16 +1051,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
996 1051
997 err_out_free: 1052 err_out_free:
998 if (qp->is_direct) { 1053 if (qp->is_direct) {
999 pci_free_consistent(dev->pdev, size, 1054 dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
1000 qp->queue.direct.buf, 1055 pci_unmap_addr(&qp->queue.direct, mapping));
1001 pci_unmap_addr(&qp->queue.direct, mapping));
1002 } else 1056 } else
1003 for (i = 0; i < npages; ++i) { 1057 for (i = 0; i < npages; ++i) {
1004 if (qp->queue.page_list[i].buf) 1058 if (qp->queue.page_list[i].buf)
1005 pci_free_consistent(dev->pdev, PAGE_SIZE, 1059 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1006 qp->queue.page_list[i].buf, 1060 qp->queue.page_list[i].buf,
1007 pci_unmap_addr(&qp->queue.page_list[i], 1061 pci_unmap_addr(&qp->queue.page_list[i],
1008 mapping)); 1062 mapping));
1009 1063
1010 } 1064 }
1011 1065
@@ -1015,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
1015 return err; 1069 return err;
1016} 1070}
1017 1071
1018static int mthca_alloc_memfree(struct mthca_dev *dev, 1072static void mthca_free_wqe_buf(struct mthca_dev *dev,
1019 struct mthca_qp *qp) 1073 struct mthca_qp *qp)
1020{ 1074{
1021 int ret = 0; 1075 int i;
1076 int size = PAGE_ALIGN(qp->send_wqe_offset +
1077 (qp->sq.max << qp->sq.wqe_shift));
1078
1079 if (qp->is_direct) {
1080 dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
1081 pci_unmap_addr(&qp->queue.direct, mapping));
1082 } else {
1083 for (i = 0; i < size / PAGE_SIZE; ++i) {
1084 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1085 qp->queue.page_list[i].buf,
1086 pci_unmap_addr(&qp->queue.page_list[i],
1087 mapping));
1088 }
1089 }
1090
1091 kfree(qp->wrid);
1092}
1093
1094static int mthca_map_memfree(struct mthca_dev *dev,
1095 struct mthca_qp *qp)
1096{
1097 int ret;
1022 1098
1023 if (mthca_is_memfree(dev)) { 1099 if (mthca_is_memfree(dev)) {
1024 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); 1100 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1029,35 +1105,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev,
1029 if (ret) 1105 if (ret)
1030 goto err_qpc; 1106 goto err_qpc;
1031 1107
1032 ret = mthca_table_get(dev, dev->qp_table.rdb_table, 1108 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1033 qp->qpn << dev->qp_table.rdb_shift); 1109 qp->qpn << dev->qp_table.rdb_shift);
1034 if (ret) 1110 if (ret)
1035 goto err_eqpc; 1111 goto err_eqpc;
1036
1037 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1038 qp->qpn, &qp->rq.db);
1039 if (qp->rq.db_index < 0) {
1040 ret = -ENOMEM;
1041 goto err_rdb;
1042 }
1043 1112
1044 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1045 qp->qpn, &qp->sq.db);
1046 if (qp->sq.db_index < 0) {
1047 ret = -ENOMEM;
1048 goto err_rq_db;
1049 }
1050 } 1113 }
1051 1114
1052 return 0; 1115 return 0;
1053 1116
1054err_rq_db:
1055 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1056
1057err_rdb:
1058 mthca_table_put(dev, dev->qp_table.rdb_table,
1059 qp->qpn << dev->qp_table.rdb_shift);
1060
1061err_eqpc: 1117err_eqpc:
1062 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); 1118 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1063 1119
@@ -1067,16 +1123,41 @@ err_qpc:
1067 return ret; 1123 return ret;
1068} 1124}
1069 1125
1126static void mthca_unmap_memfree(struct mthca_dev *dev,
1127 struct mthca_qp *qp)
1128{
1129 mthca_table_put(dev, dev->qp_table.rdb_table,
1130 qp->qpn << dev->qp_table.rdb_shift);
1131 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1132 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1133}
1134
1135static int mthca_alloc_memfree(struct mthca_dev *dev,
1136 struct mthca_qp *qp)
1137{
1138 int ret = 0;
1139
1140 if (mthca_is_memfree(dev)) {
1141 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1142 qp->qpn, &qp->rq.db);
1143 if (qp->rq.db_index < 0)
1144 return ret;
1145
1146 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1147 qp->qpn, &qp->sq.db);
1148 if (qp->sq.db_index < 0)
1149 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1150 }
1151
1152 return ret;
1153}
1154
1070static void mthca_free_memfree(struct mthca_dev *dev, 1155static void mthca_free_memfree(struct mthca_dev *dev,
1071 struct mthca_qp *qp) 1156 struct mthca_qp *qp)
1072{ 1157{
1073 if (mthca_is_memfree(dev)) { 1158 if (mthca_is_memfree(dev)) {
1074 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); 1159 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1075 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); 1160 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1076 mthca_table_put(dev, dev->qp_table.rdb_table,
1077 qp->qpn << dev->qp_table.rdb_shift);
1078 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1079 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1080 } 1161 }
1081} 1162}
1082 1163
@@ -1108,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1108 mthca_wq_init(&qp->sq); 1189 mthca_wq_init(&qp->sq);
1109 mthca_wq_init(&qp->rq); 1190 mthca_wq_init(&qp->rq);
1110 1191
1111 ret = mthca_alloc_memfree(dev, qp); 1192 ret = mthca_map_memfree(dev, qp);
1112 if (ret) 1193 if (ret)
1113 return ret; 1194 return ret;
1114 1195
1115 ret = mthca_alloc_wqe_buf(dev, pd, qp); 1196 ret = mthca_alloc_wqe_buf(dev, pd, qp);
1116 if (ret) { 1197 if (ret) {
1117 mthca_free_memfree(dev, qp); 1198 mthca_unmap_memfree(dev, qp);
1199 return ret;
1200 }
1201
1202 /*
1203 * If this is a userspace QP, we're done now. The doorbells
1204 * will be allocated and buffers will be initialized in
1205 * userspace.
1206 */
1207 if (pd->ibpd.uobject)
1208 return 0;
1209
1210 ret = mthca_alloc_memfree(dev, qp);
1211 if (ret) {
1212 mthca_free_wqe_buf(dev, qp);
1213 mthca_unmap_memfree(dev, qp);
1118 return ret; 1214 return ret;
1119 } 1215 }
1120 1216
@@ -1147,22 +1243,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1147 return 0; 1243 return 0;
1148} 1244}
1149 1245
1150static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) 1246static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1247 struct mthca_qp *qp)
1151{ 1248{
1152 int i; 1249 /* Sanity check QP size before proceeding */
1153 1250 if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
1154 if (!mthca_is_memfree(dev)) 1251 cap->max_send_sge > 64 || cap->max_recv_sge > 64)
1155 return; 1252 return -EINVAL;
1156 1253
1157 for (i = 0; 1 << i < qp->rq.max; ++i) 1254 if (mthca_is_memfree(dev)) {
1158 ; /* nothing */ 1255 qp->rq.max = cap->max_recv_wr ?
1256 roundup_pow_of_two(cap->max_recv_wr) : 0;
1257 qp->sq.max = cap->max_send_wr ?
1258 roundup_pow_of_two(cap->max_send_wr) : 0;
1259 } else {
1260 qp->rq.max = cap->max_recv_wr;
1261 qp->sq.max = cap->max_send_wr;
1262 }
1159 1263
1160 qp->rq.max = 1 << i; 1264 qp->rq.max_gs = cap->max_recv_sge;
1265 qp->sq.max_gs = max_t(int, cap->max_send_sge,
1266 ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
1267 MTHCA_INLINE_CHUNK_SIZE) /
1268 sizeof (struct mthca_data_seg));
1161 1269
1162 for (i = 0; 1 << i < qp->sq.max; ++i) 1270 /*
1163 ; /* nothing */ 1271 * For MLX transport we need 2 extra S/G entries:
1272 * one for the header and one for the checksum at the end
1273 */
1274 if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
1275 qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
1276 return -EINVAL;
1164 1277
1165 qp->sq.max = 1 << i; 1278 return 0;
1166} 1279}
1167 1280
1168int mthca_alloc_qp(struct mthca_dev *dev, 1281int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1171,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1171 struct mthca_cq *recv_cq, 1284 struct mthca_cq *recv_cq,
1172 enum ib_qp_type type, 1285 enum ib_qp_type type,
1173 enum ib_sig_type send_policy, 1286 enum ib_sig_type send_policy,
1287 struct ib_qp_cap *cap,
1174 struct mthca_qp *qp) 1288 struct mthca_qp *qp)
1175{ 1289{
1176 int err; 1290 int err;
1177 1291
1178 mthca_align_qp_size(dev, qp); 1292 err = mthca_set_qp_size(dev, cap, qp);
1293 if (err)
1294 return err;
1179 1295
1180 switch (type) { 1296 switch (type) {
1181 case IB_QPT_RC: qp->transport = RC; break; 1297 case IB_QPT_RC: qp->transport = RC; break;
@@ -1208,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1208 struct mthca_cq *send_cq, 1324 struct mthca_cq *send_cq,
1209 struct mthca_cq *recv_cq, 1325 struct mthca_cq *recv_cq,
1210 enum ib_sig_type send_policy, 1326 enum ib_sig_type send_policy,
1327 struct ib_qp_cap *cap,
1211 int qpn, 1328 int qpn,
1212 int port, 1329 int port,
1213 struct mthca_sqp *sqp) 1330 struct mthca_sqp *sqp)
1214{ 1331{
1215 int err = 0;
1216 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; 1332 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1333 int err;
1217 1334
1218 mthca_align_qp_size(dev, &sqp->qp); 1335 err = mthca_set_qp_size(dev, cap, &sqp->qp);
1336 if (err)
1337 return err;
1219 1338
1220 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; 1339 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
1221 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, 1340 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1274,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev,
1274 struct mthca_qp *qp) 1393 struct mthca_qp *qp)
1275{ 1394{
1276 u8 status; 1395 u8 status;
1277 int size;
1278 int i;
1279 struct mthca_cq *send_cq; 1396 struct mthca_cq *send_cq;
1280 struct mthca_cq *recv_cq; 1397 struct mthca_cq *recv_cq;
1281 1398
@@ -1305,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev,
1305 if (qp->state != IB_QPS_RESET) 1422 if (qp->state != IB_QPS_RESET)
1306 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); 1423 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1307 1424
1308 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); 1425 /*
1309 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1426 * If this is a userspace QP, the buffers, MR, CQs and so on
1310 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); 1427 * will be cleaned up in userspace, so all we have to do is
1311 1428 * unref the mem-free tables and free the QPN in our table.
1312 mthca_free_mr(dev, &qp->mr); 1429 */
1313 1430 if (!qp->ibqp.uobject) {
1314 size = PAGE_ALIGN(qp->send_wqe_offset + 1431 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
1315 (qp->sq.max << qp->sq.wqe_shift)); 1432 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1433 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
1316 1434
1317 if (qp->is_direct) { 1435 mthca_free_mr(dev, &qp->mr);
1318 pci_free_consistent(dev->pdev, size, 1436 mthca_free_memfree(dev, qp);
1319 qp->queue.direct.buf, 1437 mthca_free_wqe_buf(dev, qp);
1320 pci_unmap_addr(&qp->queue.direct, mapping));
1321 } else {
1322 for (i = 0; i < size / PAGE_SIZE; ++i) {
1323 pci_free_consistent(dev->pdev, PAGE_SIZE,
1324 qp->queue.page_list[i].buf,
1325 pci_unmap_addr(&qp->queue.page_list[i],
1326 mapping));
1327 }
1328 } 1438 }
1329 1439
1330 kfree(qp->wrid); 1440 mthca_unmap_memfree(dev, qp);
1331
1332 mthca_free_memfree(dev, qp);
1333 1441
1334 if (is_sqp(dev, qp)) { 1442 if (is_sqp(dev, qp)) {
1335 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); 1443 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
@@ -1529,6 +1637,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1529 1637
1530 break; 1638 break;
1531 1639
1640 case UC:
1641 switch (wr->opcode) {
1642 case IB_WR_RDMA_WRITE:
1643 case IB_WR_RDMA_WRITE_WITH_IMM:
1644 ((struct mthca_raddr_seg *) wqe)->raddr =
1645 cpu_to_be64(wr->wr.rdma.remote_addr);
1646 ((struct mthca_raddr_seg *) wqe)->rkey =
1647 cpu_to_be32(wr->wr.rdma.rkey);
1648 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1649 wqe += sizeof (struct mthca_raddr_seg);
1650 size += sizeof (struct mthca_raddr_seg) / 16;
1651 break;
1652
1653 default:
1654 /* No extra segments required for sends */
1655 break;
1656 }
1657
1658 break;
1659
1532 case UD: 1660 case UD:
1533 ((struct mthca_tavor_ud_seg *) wqe)->lkey = 1661 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
1534 cpu_to_be32(to_mah(wr->wr.ud.ah)->key); 1662 cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
@@ -1814,9 +1942,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1814 sizeof (struct mthca_atomic_seg); 1942 sizeof (struct mthca_atomic_seg);
1815 break; 1943 break;
1816 1944
1945 case IB_WR_RDMA_READ:
1946 case IB_WR_RDMA_WRITE:
1947 case IB_WR_RDMA_WRITE_WITH_IMM:
1948 ((struct mthca_raddr_seg *) wqe)->raddr =
1949 cpu_to_be64(wr->wr.rdma.remote_addr);
1950 ((struct mthca_raddr_seg *) wqe)->rkey =
1951 cpu_to_be32(wr->wr.rdma.rkey);
1952 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
1953 wqe += sizeof (struct mthca_raddr_seg);
1954 size += sizeof (struct mthca_raddr_seg) / 16;
1955 break;
1956
1957 default:
1958 /* No extra segments required for sends */
1959 break;
1960 }
1961
1962 break;
1963
1964 case UC:
1965 switch (wr->opcode) {
1817 case IB_WR_RDMA_WRITE: 1966 case IB_WR_RDMA_WRITE:
1818 case IB_WR_RDMA_WRITE_WITH_IMM: 1967 case IB_WR_RDMA_WRITE_WITH_IMM:
1819 case IB_WR_RDMA_READ:
1820 ((struct mthca_raddr_seg *) wqe)->raddr = 1968 ((struct mthca_raddr_seg *) wqe)->raddr =
1821 cpu_to_be64(wr->wr.rdma.remote_addr); 1969 cpu_to_be64(wr->wr.rdma.remote_addr);
1822 ((struct mthca_raddr_seg *) wqe)->rkey = 1970 ((struct mthca_raddr_seg *) wqe)->rkey =
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 000000000000..3024c1b4547d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,81 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#ifndef MTHCA_USER_H
36#define MTHCA_USER_H
37
38#include <linux/types.h>
39
40/*
41 * Make sure that all structs defined in this file remain laid out so
42 * that they pack the same way on 32-bit and 64-bit architectures (to
43 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
44 * In particular do not use pointer types -- pass pointers in __u64
45 * instead.
46 */
47
48struct mthca_alloc_ucontext_resp {
49 __u32 qp_tab_size;
50 __u32 uarc_size;
51};
52
53struct mthca_alloc_pd_resp {
54 __u32 pdn;
55 __u32 reserved;
56};
57
58struct mthca_create_cq {
59 __u32 lkey;
60 __u32 pdn;
61 __u64 arm_db_page;
62 __u64 set_db_page;
63 __u32 arm_db_index;
64 __u32 set_db_index;
65};
66
67struct mthca_create_cq_resp {
68 __u32 cqn;
69 __u32 reserved;
70};
71
72struct mthca_create_qp {
73 __u32 lkey;
74 __u32 reserved;
75 __u64 sq_db_page;
76 __u64 rq_db_page;
77 __u32 sq_db_index;
78 __u32 rq_db_index;
79};
80
81#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/include/ib_cm.h b/drivers/infiniband/include/ib_cm.h
new file mode 100644
index 000000000000..da650115e79a
--- /dev/null
+++ b/drivers/infiniband/include/ib_cm.h
@@ -0,0 +1,569 @@
1/*
2 * Copyright (c) 2004 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $
36 */
37#if !defined(IB_CM_H)
38#define IB_CM_H
39
40#include <ib_mad.h>
41#include <ib_sa.h>
42
43enum ib_cm_state {
44 IB_CM_IDLE,
45 IB_CM_LISTEN,
46 IB_CM_REQ_SENT,
47 IB_CM_REQ_RCVD,
48 IB_CM_MRA_REQ_SENT,
49 IB_CM_MRA_REQ_RCVD,
50 IB_CM_REP_SENT,
51 IB_CM_REP_RCVD,
52 IB_CM_MRA_REP_SENT,
53 IB_CM_MRA_REP_RCVD,
54 IB_CM_ESTABLISHED,
55 IB_CM_DREQ_SENT,
56 IB_CM_DREQ_RCVD,
57 IB_CM_TIMEWAIT,
58 IB_CM_SIDR_REQ_SENT,
59 IB_CM_SIDR_REQ_RCVD
60};
61
62enum ib_cm_lap_state {
63 IB_CM_LAP_IDLE,
64 IB_CM_LAP_SENT,
65 IB_CM_LAP_RCVD,
66 IB_CM_MRA_LAP_SENT,
67 IB_CM_MRA_LAP_RCVD,
68};
69
70enum ib_cm_event_type {
71 IB_CM_REQ_ERROR,
72 IB_CM_REQ_RECEIVED,
73 IB_CM_REP_ERROR,
74 IB_CM_REP_RECEIVED,
75 IB_CM_RTU_RECEIVED,
76 IB_CM_USER_ESTABLISHED,
77 IB_CM_DREQ_ERROR,
78 IB_CM_DREQ_RECEIVED,
79 IB_CM_DREP_RECEIVED,
80 IB_CM_TIMEWAIT_EXIT,
81 IB_CM_MRA_RECEIVED,
82 IB_CM_REJ_RECEIVED,
83 IB_CM_LAP_ERROR,
84 IB_CM_LAP_RECEIVED,
85 IB_CM_APR_RECEIVED,
86 IB_CM_SIDR_REQ_ERROR,
87 IB_CM_SIDR_REQ_RECEIVED,
88 IB_CM_SIDR_REP_RECEIVED
89};
90
91enum ib_cm_data_size {
92 IB_CM_REQ_PRIVATE_DATA_SIZE = 92,
93 IB_CM_MRA_PRIVATE_DATA_SIZE = 222,
94 IB_CM_REJ_PRIVATE_DATA_SIZE = 148,
95 IB_CM_REP_PRIVATE_DATA_SIZE = 196,
96 IB_CM_RTU_PRIVATE_DATA_SIZE = 224,
97 IB_CM_DREQ_PRIVATE_DATA_SIZE = 220,
98 IB_CM_DREP_PRIVATE_DATA_SIZE = 224,
99 IB_CM_REJ_ARI_LENGTH = 72,
100 IB_CM_LAP_PRIVATE_DATA_SIZE = 168,
101 IB_CM_APR_PRIVATE_DATA_SIZE = 148,
102 IB_CM_APR_INFO_LENGTH = 72,
103 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
104 IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
105 IB_CM_SIDR_REP_INFO_LENGTH = 72
106};
107
108struct ib_cm_id;
109
110struct ib_cm_req_event_param {
111 struct ib_cm_id *listen_id;
112 struct ib_device *device;
113 u8 port;
114
115 struct ib_sa_path_rec *primary_path;
116 struct ib_sa_path_rec *alternate_path;
117
118 u64 remote_ca_guid;
119 u32 remote_qkey;
120 u32 remote_qpn;
121 enum ib_qp_type qp_type;
122
123 u32 starting_psn;
124 u8 responder_resources;
125 u8 initiator_depth;
126 unsigned int local_cm_response_timeout:5;
127 unsigned int flow_control:1;
128 unsigned int remote_cm_response_timeout:5;
129 unsigned int retry_count:3;
130 unsigned int rnr_retry_count:3;
131 unsigned int srq:1;
132};
133
134struct ib_cm_rep_event_param {
135 u64 remote_ca_guid;
136 u32 remote_qkey;
137 u32 remote_qpn;
138 u32 starting_psn;
139 u8 responder_resources;
140 u8 initiator_depth;
141 unsigned int target_ack_delay:5;
142 unsigned int failover_accepted:2;
143 unsigned int flow_control:1;
144 unsigned int rnr_retry_count:3;
145 unsigned int srq:1;
146};
147
148enum ib_cm_rej_reason {
149 IB_CM_REJ_NO_QP = __constant_htons(1),
150 IB_CM_REJ_NO_EEC = __constant_htons(2),
151 IB_CM_REJ_NO_RESOURCES = __constant_htons(3),
152 IB_CM_REJ_TIMEOUT = __constant_htons(4),
153 IB_CM_REJ_UNSUPPORTED = __constant_htons(5),
154 IB_CM_REJ_INVALID_COMM_ID = __constant_htons(6),
155 IB_CM_REJ_INVALID_COMM_INSTANCE = __constant_htons(7),
156 IB_CM_REJ_INVALID_SERVICE_ID = __constant_htons(8),
157 IB_CM_REJ_INVALID_TRANSPORT_TYPE = __constant_htons(9),
158 IB_CM_REJ_STALE_CONN = __constant_htons(10),
159 IB_CM_REJ_RDC_NOT_EXIST = __constant_htons(11),
160 IB_CM_REJ_INVALID_GID = __constant_htons(12),
161 IB_CM_REJ_INVALID_LID = __constant_htons(13),
162 IB_CM_REJ_INVALID_SL = __constant_htons(14),
163 IB_CM_REJ_INVALID_TRAFFIC_CLASS = __constant_htons(15),
164 IB_CM_REJ_INVALID_HOP_LIMIT = __constant_htons(16),
165 IB_CM_REJ_INVALID_PACKET_RATE = __constant_htons(17),
166 IB_CM_REJ_INVALID_ALT_GID = __constant_htons(18),
167 IB_CM_REJ_INVALID_ALT_LID = __constant_htons(19),
168 IB_CM_REJ_INVALID_ALT_SL = __constant_htons(20),
169 IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = __constant_htons(21),
170 IB_CM_REJ_INVALID_ALT_HOP_LIMIT = __constant_htons(22),
171 IB_CM_REJ_INVALID_ALT_PACKET_RATE = __constant_htons(23),
172 IB_CM_REJ_PORT_CM_REDIRECT = __constant_htons(24),
173 IB_CM_REJ_PORT_REDIRECT = __constant_htons(25),
174 IB_CM_REJ_INVALID_MTU = __constant_htons(26),
175 IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = __constant_htons(27),
176 IB_CM_REJ_CONSUMER_DEFINED = __constant_htons(28),
177 IB_CM_REJ_INVALID_RNR_RETRY = __constant_htons(29),
178 IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = __constant_htons(30),
179 IB_CM_REJ_INVALID_CLASS_VERSION = __constant_htons(31),
180 IB_CM_REJ_INVALID_FLOW_LABEL = __constant_htons(32),
181 IB_CM_REJ_INVALID_ALT_FLOW_LABEL = __constant_htons(33)
182};
183
184struct ib_cm_rej_event_param {
185 enum ib_cm_rej_reason reason;
186 void *ari;
187 u8 ari_length;
188};
189
190struct ib_cm_mra_event_param {
191 u8 service_timeout;
192};
193
194struct ib_cm_lap_event_param {
195 struct ib_sa_path_rec *alternate_path;
196};
197
198enum ib_cm_apr_status {
199 IB_CM_APR_SUCCESS,
200 IB_CM_APR_INVALID_COMM_ID,
201 IB_CM_APR_UNSUPPORTED,
202 IB_CM_APR_REJECT,
203 IB_CM_APR_REDIRECT,
204 IB_CM_APR_IS_CURRENT,
205 IB_CM_APR_INVALID_QPN_EECN,
206 IB_CM_APR_INVALID_LID,
207 IB_CM_APR_INVALID_GID,
208 IB_CM_APR_INVALID_FLOW_LABEL,
209 IB_CM_APR_INVALID_TCLASS,
210 IB_CM_APR_INVALID_HOP_LIMIT,
211 IB_CM_APR_INVALID_PACKET_RATE,
212 IB_CM_APR_INVALID_SL
213};
214
215struct ib_cm_apr_event_param {
216 enum ib_cm_apr_status ap_status;
217 void *apr_info;
218 u8 info_len;
219};
220
221struct ib_cm_sidr_req_event_param {
222 struct ib_cm_id *listen_id;
223 struct ib_device *device;
224 u8 port;
225
226 u16 pkey;
227};
228
229enum ib_cm_sidr_status {
230 IB_SIDR_SUCCESS,
231 IB_SIDR_UNSUPPORTED,
232 IB_SIDR_REJECT,
233 IB_SIDR_NO_QP,
234 IB_SIDR_REDIRECT,
235 IB_SIDR_UNSUPPORTED_VERSION
236};
237
238struct ib_cm_sidr_rep_event_param {
239 enum ib_cm_sidr_status status;
240 u32 qkey;
241 u32 qpn;
242 void *info;
243 u8 info_len;
244
245};
246
247struct ib_cm_event {
248 enum ib_cm_event_type event;
249 union {
250 struct ib_cm_req_event_param req_rcvd;
251 struct ib_cm_rep_event_param rep_rcvd;
252 /* No data for RTU received events. */
253 struct ib_cm_rej_event_param rej_rcvd;
254 struct ib_cm_mra_event_param mra_rcvd;
255 struct ib_cm_lap_event_param lap_rcvd;
256 struct ib_cm_apr_event_param apr_rcvd;
257 /* No data for DREQ/DREP received events. */
258 struct ib_cm_sidr_req_event_param sidr_req_rcvd;
259 struct ib_cm_sidr_rep_event_param sidr_rep_rcvd;
260 enum ib_wc_status send_status;
261 } param;
262
263 void *private_data;
264};
265
266/**
267 * ib_cm_handler - User-defined callback to process communication events.
268 * @cm_id: Communication identifier associated with the reported event.
269 * @event: Information about the communication event.
270 *
271 * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events
272 * generated as a result of listen requests result in the allocation of a
273 * new @cm_id. The new @cm_id is returned to the user through this callback.
274 * Clients are responsible for destroying the new @cm_id. For peer-to-peer
275 * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
276 * to a user's existing communication identifier.
277 *
278 * Users may not call ib_destroy_cm_id while in the context of this callback;
279 * however, returning a non-zero value instructs the communication manager to
280 * destroy the @cm_id after the callback completes.
281 */
282typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
283 struct ib_cm_event *event);
284
285struct ib_cm_id {
286 ib_cm_handler cm_handler;
287 void *context;
288 u64 service_id;
289 u64 service_mask;
290 enum ib_cm_state state; /* internal CM/debug use */
291 enum ib_cm_lap_state lap_state; /* internal CM/debug use */
292 u32 local_id;
293 u32 remote_id;
294};
295
296/**
297 * ib_create_cm_id - Allocate a communication identifier.
298 * @cm_handler: Callback invoked to notify the user of CM events.
299 * @context: User specified context associated with the communication
300 * identifier.
301 *
302 * Communication identifiers are used to track connection states, service
303 * ID resolution requests, and listen requests.
304 */
305struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
306 void *context);
307
308/**
309 * ib_destroy_cm_id - Destroy a connection identifier.
310 * @cm_id: Connection identifier to destroy.
311 *
312 * This call blocks until the connection identifier is destroyed.
313 */
314void ib_destroy_cm_id(struct ib_cm_id *cm_id);
315
316#define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL)
317#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL)
318
319/**
320 * ib_cm_listen - Initiates listening on the specified service ID for
321 * connection and service ID resolution requests.
322 * @cm_id: Connection identifier associated with the listen request.
323 * @service_id: Service identifier matched against incoming connection
324 * and service ID resolution requests. The service ID should be specified
325 * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
326 * assign a service ID to the caller.
327 * @service_mask: Mask applied to service ID used to listen across a
328 * range of service IDs. If set to 0, the service ID is matched
329 * exactly. This parameter is ignored if %service_id is set to
330 * IB_CM_ASSIGN_SERVICE_ID.
331 */
332int ib_cm_listen(struct ib_cm_id *cm_id,
333 u64 service_id,
334 u64 service_mask);
335
336struct ib_cm_req_param {
337 struct ib_sa_path_rec *primary_path;
338 struct ib_sa_path_rec *alternate_path;
339 u64 service_id;
340 u32 qp_num;
341 enum ib_qp_type qp_type;
342 u32 starting_psn;
343 const void *private_data;
344 u8 private_data_len;
345 u8 peer_to_peer;
346 u8 responder_resources;
347 u8 initiator_depth;
348 u8 remote_cm_response_timeout;
349 u8 flow_control;
350 u8 local_cm_response_timeout;
351 u8 retry_count;
352 u8 rnr_retry_count;
353 u8 max_cm_retries;
354 u8 srq;
355};
356
357/**
358 * ib_send_cm_req - Sends a connection request to the remote node.
359 * @cm_id: Connection identifier that will be associated with the
360 * connection request.
361 * @param: Connection request information needed to establish the
362 * connection.
363 */
364int ib_send_cm_req(struct ib_cm_id *cm_id,
365 struct ib_cm_req_param *param);
366
367struct ib_cm_rep_param {
368 u32 qp_num;
369 u32 starting_psn;
370 const void *private_data;
371 u8 private_data_len;
372 u8 responder_resources;
373 u8 initiator_depth;
374 u8 target_ack_delay;
375 u8 failover_accepted;
376 u8 flow_control;
377 u8 rnr_retry_count;
378 u8 srq;
379};
380
381/**
382 * ib_send_cm_rep - Sends a connection reply in response to a connection
383 * request.
384 * @cm_id: Connection identifier that will be associated with the
385 * connection request.
386 * @param: Connection reply information needed to establish the
387 * connection.
388 */
389int ib_send_cm_rep(struct ib_cm_id *cm_id,
390 struct ib_cm_rep_param *param);
391
392/**
393 * ib_send_cm_rtu - Sends a connection ready to use message in response
394 * to a connection reply message.
395 * @cm_id: Connection identifier associated with the connection request.
396 * @private_data: Optional user-defined private data sent with the
397 * ready to use message.
398 * @private_data_len: Size of the private data buffer, in bytes.
399 */
400int ib_send_cm_rtu(struct ib_cm_id *cm_id,
401 const void *private_data,
402 u8 private_data_len);
403
404/**
405 * ib_send_cm_dreq - Sends a disconnection request for an existing
406 * connection.
407 * @cm_id: Connection identifier associated with the connection being
408 * released.
409 * @private_data: Optional user-defined private data sent with the
410 * disconnection request message.
411 * @private_data_len: Size of the private data buffer, in bytes.
412 */
413int ib_send_cm_dreq(struct ib_cm_id *cm_id,
414 const void *private_data,
415 u8 private_data_len);
416
417/**
418 * ib_send_cm_drep - Sends a disconnection reply to a disconnection request.
419 * @cm_id: Connection identifier associated with the connection being
420 * released.
421 * @private_data: Optional user-defined private data sent with the
422 * disconnection reply message.
423 * @private_data_len: Size of the private data buffer, in bytes.
424 *
425 * If the cm_id is in the correct state, the CM will transition the connection
426 * to the timewait state, even if an error occurs sending the DREP message.
427 */
428int ib_send_cm_drep(struct ib_cm_id *cm_id,
429 const void *private_data,
430 u8 private_data_len);
431
432/**
433 * ib_cm_establish - Forces a connection state to established.
434 * @cm_id: Connection identifier to transition to established.
435 *
436 * This routine should be invoked by users who receive messages on a
437 * connected QP before an RTU has been received.
438 */
439int ib_cm_establish(struct ib_cm_id *cm_id);
440
441/**
442 * ib_send_cm_rej - Sends a connection rejection message to the
443 * remote node.
444 * @cm_id: Connection identifier associated with the connection being
445 * rejected.
446 * @reason: Reason for the connection request rejection.
447 * @ari: Optional additional rejection information.
448 * @ari_length: Size of the additional rejection information, in bytes.
449 * @private_data: Optional user-defined private data sent with the
450 * rejection message.
451 * @private_data_len: Size of the private data buffer, in bytes.
452 */
453int ib_send_cm_rej(struct ib_cm_id *cm_id,
454 enum ib_cm_rej_reason reason,
455 void *ari,
456 u8 ari_length,
457 const void *private_data,
458 u8 private_data_len);
459
460/**
461 * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
462 * message.
463 * @cm_id: Connection identifier associated with the connection message.
464 * @service_timeout: The maximum time required for the sender to reply to
465 * to the connection message.
466 * @private_data: Optional user-defined private data sent with the
467 * message receipt acknowledgement.
468 * @private_data_len: Size of the private data buffer, in bytes.
469 */
470int ib_send_cm_mra(struct ib_cm_id *cm_id,
471 u8 service_timeout,
472 const void *private_data,
473 u8 private_data_len);
474
475/**
476 * ib_send_cm_lap - Sends a load alternate path request.
477 * @cm_id: Connection identifier associated with the load alternate path
478 * message.
479 * @alternate_path: A path record that identifies the alternate path to
480 * load.
481 * @private_data: Optional user-defined private data sent with the
482 * load alternate path message.
483 * @private_data_len: Size of the private data buffer, in bytes.
484 */
485int ib_send_cm_lap(struct ib_cm_id *cm_id,
486 struct ib_sa_path_rec *alternate_path,
487 const void *private_data,
488 u8 private_data_len);
489
490/**
491 * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
492 * to a specified QP state.
493 * @cm_id: Communication identifier associated with the QP attributes to
494 * initialize.
495 * @qp_attr: On input, specifies the desired QP state. On output, the
496 * mandatory and desired optional attributes will be set in order to
497 * modify the QP to the specified state.
498 * @qp_attr_mask: The QP attribute mask that may be used to transition the
499 * QP to the specified state.
500 *
501 * Users must set the @qp_attr->qp_state to the desired QP state. This call
502 * will set all required attributes for the given transition, along with
503 * known optional attributes. Users may override the attributes returned from
504 * this call before calling ib_modify_qp.
505 */
506int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
507 struct ib_qp_attr *qp_attr,
508 int *qp_attr_mask);
509
510/**
511 * ib_send_cm_apr - Sends an alternate path response message in response to
512 * a load alternate path request.
513 * @cm_id: Connection identifier associated with the alternate path response.
514 * @status: Reply status sent with the alternate path response.
515 * @info: Optional additional information sent with the alternate path
516 * response.
517 * @info_length: Size of the additional information, in bytes.
518 * @private_data: Optional user-defined private data sent with the
519 * alternate path response message.
520 * @private_data_len: Size of the private data buffer, in bytes.
521 */
522int ib_send_cm_apr(struct ib_cm_id *cm_id,
523 enum ib_cm_apr_status status,
524 void *info,
525 u8 info_length,
526 const void *private_data,
527 u8 private_data_len);
528
529struct ib_cm_sidr_req_param {
530 struct ib_sa_path_rec *path;
531 u64 service_id;
532 int timeout_ms;
533 const void *private_data;
534 u8 private_data_len;
535 u8 max_cm_retries;
536 u16 pkey;
537};
538
539/**
540 * ib_send_cm_sidr_req - Sends a service ID resolution request to the
541 * remote node.
542 * @cm_id: Communication identifier that will be associated with the
543 * service ID resolution request.
544 * @param: Service ID resolution request information.
545 */
546int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
547 struct ib_cm_sidr_req_param *param);
548
549struct ib_cm_sidr_rep_param {
550 u32 qp_num;
551 u32 qkey;
552 enum ib_cm_sidr_status status;
553 const void *info;
554 u8 info_length;
555 const void *private_data;
556 u8 private_data_len;
557};
558
559/**
560 * ib_send_cm_sidr_rep - Sends a service ID resolution request to the
561 * remote node.
562 * @cm_id: Communication identifier associated with the received service ID
563 * resolution request.
564 * @param: Service ID resolution reply information.
565 */
566int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
567 struct ib_cm_sidr_rep_param *param);
568
569#endif /* IB_CM_H */
diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/drivers/infiniband/include/ib_fmr_pool.h
index e8769657cbbb..6c9e24d6e144 100644
--- a/drivers/infiniband/include/ib_fmr_pool.h
+++ b/drivers/infiniband/include/ib_fmr_pool.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +30,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 31 * SOFTWARE.
31 * 32 *
32 * $Id: ib_fmr_pool.h 1349 2004-12-16 21:09:43Z roland $ 33 * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
33 */ 34 */
34 35
35#if !defined(IB_FMR_POOL_H) 36#if !defined(IB_FMR_POOL_H)
@@ -78,7 +79,7 @@ struct ib_pool_fmr {
78struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, 79struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
79 struct ib_fmr_pool_param *params); 80 struct ib_fmr_pool_param *params);
80 81
81int ib_destroy_fmr_pool(struct ib_fmr_pool *pool); 82void ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
82 83
83int ib_flush_fmr_pool(struct ib_fmr_pool *pool); 84int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
84 85
diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h
index 4a6bf6763a97..491b6f25b3b8 100644
--- a/drivers/infiniband/include/ib_mad.h
+++ b/drivers/infiniband/include/ib_mad.h
@@ -33,12 +33,14 @@
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 * 35 *
36 * $Id: ib_mad.h 1389 2004-12-27 22:56:47Z roland $ 36 * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $
37 */ 37 */
38 38
39#if !defined( IB_MAD_H ) 39#if !defined( IB_MAD_H )
40#define IB_MAD_H 40#define IB_MAD_H
41 41
42#include <linux/pci.h>
43
42#include <ib_verbs.h> 44#include <ib_verbs.h>
43 45
44/* Management base version */ 46/* Management base version */
@@ -56,6 +58,8 @@
56#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 58#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
57#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F 59#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
58 60
61#define IB_OPENIB_OUI (0x001405)
62
59/* Management methods */ 63/* Management methods */
60#define IB_MGMT_METHOD_GET 0x01 64#define IB_MGMT_METHOD_GET 0x01
61#define IB_MGMT_METHOD_SET 0x02 65#define IB_MGMT_METHOD_SET 0x02
@@ -70,18 +74,37 @@
70 74
71#define IB_MGMT_MAX_METHODS 128 75#define IB_MGMT_MAX_METHODS 128
72 76
77/* RMPP information */
78#define IB_MGMT_RMPP_VERSION 1
79
80#define IB_MGMT_RMPP_TYPE_DATA 1
81#define IB_MGMT_RMPP_TYPE_ACK 2
82#define IB_MGMT_RMPP_TYPE_STOP 3
83#define IB_MGMT_RMPP_TYPE_ABORT 4
84
85#define IB_MGMT_RMPP_FLAG_ACTIVE 1
86#define IB_MGMT_RMPP_FLAG_FIRST (1<<1)
87#define IB_MGMT_RMPP_FLAG_LAST (1<<2)
88
89#define IB_MGMT_RMPP_NO_RESPTIME 0x1F
90
91#define IB_MGMT_RMPP_STATUS_SUCCESS 0
92#define IB_MGMT_RMPP_STATUS_RESX 1
93#define IB_MGMT_RMPP_STATUS_T2L 118
94#define IB_MGMT_RMPP_STATUS_BAD_LEN 119
95#define IB_MGMT_RMPP_STATUS_BAD_SEG 120
96#define IB_MGMT_RMPP_STATUS_BADT 121
97#define IB_MGMT_RMPP_STATUS_W2S 122
98#define IB_MGMT_RMPP_STATUS_S2B 123
99#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124
100#define IB_MGMT_RMPP_STATUS_UNV 125
101#define IB_MGMT_RMPP_STATUS_TMR 126
102#define IB_MGMT_RMPP_STATUS_UNSPEC 127
103
73#define IB_QP0 0 104#define IB_QP0 0
74#define IB_QP1 __constant_htonl(1) 105#define IB_QP1 __constant_htonl(1)
75#define IB_QP1_QKEY 0x80010000 106#define IB_QP1_QKEY 0x80010000
76 107#define IB_QP_SET_QKEY 0x80000000
77struct ib_grh {
78 u32 version_tclass_flow;
79 u16 paylen;
80 u8 next_hdr;
81 u8 hop_limit;
82 union ib_gid sgid;
83 union ib_gid dgid;
84} __attribute__ ((packed));
85 108
86struct ib_mad_hdr { 109struct ib_mad_hdr {
87 u8 base_version; 110 u8 base_version;
@@ -94,7 +117,7 @@ struct ib_mad_hdr {
94 u16 attr_id; 117 u16 attr_id;
95 u16 resv; 118 u16 resv;
96 u32 attr_mod; 119 u32 attr_mod;
97} __attribute__ ((packed)); 120};
98 121
99struct ib_rmpp_hdr { 122struct ib_rmpp_hdr {
100 u8 rmpp_version; 123 u8 rmpp_version;
@@ -103,17 +126,41 @@ struct ib_rmpp_hdr {
103 u8 rmpp_status; 126 u8 rmpp_status;
104 u32 seg_num; 127 u32 seg_num;
105 u32 paylen_newwin; 128 u32 paylen_newwin;
129};
130
131typedef u64 __bitwise ib_sa_comp_mask;
132
133#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
134
135/*
136 * ib_sa_hdr and ib_sa_mad structures must be packed because they have
137 * 64-bit fields that are only 32-bit aligned. 64-bit architectures will
138 * lay them out wrong otherwise. (And unfortunately they are sent on
139 * the wire so we can't change the layout)
140 */
141struct ib_sa_hdr {
142 u64 sm_key;
143 u16 attr_offset;
144 u16 reserved;
145 ib_sa_comp_mask comp_mask;
106} __attribute__ ((packed)); 146} __attribute__ ((packed));
107 147
108struct ib_mad { 148struct ib_mad {
109 struct ib_mad_hdr mad_hdr; 149 struct ib_mad_hdr mad_hdr;
110 u8 data[232]; 150 u8 data[232];
111} __attribute__ ((packed)); 151};
112 152
113struct ib_rmpp_mad { 153struct ib_rmpp_mad {
114 struct ib_mad_hdr mad_hdr; 154 struct ib_mad_hdr mad_hdr;
115 struct ib_rmpp_hdr rmpp_hdr; 155 struct ib_rmpp_hdr rmpp_hdr;
116 u8 data[220]; 156 u8 data[220];
157};
158
159struct ib_sa_mad {
160 struct ib_mad_hdr mad_hdr;
161 struct ib_rmpp_hdr rmpp_hdr;
162 struct ib_sa_hdr sa_hdr;
163 u8 data[200];
117} __attribute__ ((packed)); 164} __attribute__ ((packed));
118 165
119struct ib_vendor_mad { 166struct ib_vendor_mad {
@@ -122,7 +169,70 @@ struct ib_vendor_mad {
122 u8 reserved; 169 u8 reserved;
123 u8 oui[3]; 170 u8 oui[3];
124 u8 data[216]; 171 u8 data[216];
125} __attribute__ ((packed)); 172};
173
174/**
175 * ib_mad_send_buf - MAD data buffer and work request for sends.
176 * @mad: References an allocated MAD data buffer. The size of the data
177 * buffer is specified in the @send_wr.length field.
178 * @mapping: DMA mapping information.
179 * @mad_agent: MAD agent that allocated the buffer.
180 * @context: User-controlled context fields.
181 * @send_wr: An initialized work request structure used when sending the MAD.
182 * The wr_id field of the work request is initialized to reference this
183 * data structure.
184 * @sge: A scatter-gather list referenced by the work request.
185 *
186 * Users are responsible for initializing the MAD buffer itself, with the
187 * exception of specifying the payload length field in any RMPP MAD.
188 */
189struct ib_mad_send_buf {
190 struct ib_mad *mad;
191 DECLARE_PCI_UNMAP_ADDR(mapping)
192 struct ib_mad_agent *mad_agent;
193 void *context[2];
194 struct ib_send_wr send_wr;
195 struct ib_sge sge;
196};
197
198/**
199 * ib_get_rmpp_resptime - Returns the RMPP response time.
200 * @rmpp_hdr: An RMPP header.
201 */
202static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
203{
204 return rmpp_hdr->rmpp_rtime_flags >> 3;
205}
206
207/**
208 * ib_get_rmpp_flags - Returns the RMPP flags.
209 * @rmpp_hdr: An RMPP header.
210 */
211static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
212{
213 return rmpp_hdr->rmpp_rtime_flags & 0x7;
214}
215
216/**
217 * ib_set_rmpp_resptime - Sets the response time in an RMPP header.
218 * @rmpp_hdr: An RMPP header.
219 * @rtime: The response time to set.
220 */
221static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
222{
223 rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
224}
225
226/**
227 * ib_set_rmpp_flags - Sets the flags in an RMPP header.
228 * @rmpp_hdr: An RMPP header.
229 * @flags: The flags to set.
230 */
231static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
232{
233 rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) |
234 (flags & 0x7);
235}
126 236
127struct ib_mad_agent; 237struct ib_mad_agent;
128struct ib_mad_send_wc; 238struct ib_mad_send_wc;
@@ -168,6 +278,7 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
168 * ib_mad_agent - Used to track MAD registration with the access layer. 278 * ib_mad_agent - Used to track MAD registration with the access layer.
169 * @device: Reference to device registration is on. 279 * @device: Reference to device registration is on.
170 * @qp: Reference to QP used for sending and receiving MADs. 280 * @qp: Reference to QP used for sending and receiving MADs.
281 * @mr: Memory region for system memory usable for DMA.
171 * @recv_handler: Callback handler for a received MAD. 282 * @recv_handler: Callback handler for a received MAD.
172 * @send_handler: Callback handler for a sent MAD. 283 * @send_handler: Callback handler for a sent MAD.
173 * @snoop_handler: Callback handler for snooped sent MADs. 284 * @snoop_handler: Callback handler for snooped sent MADs.
@@ -176,16 +287,19 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
176 * Unsolicited MADs sent by this client will have the upper 32-bits 287 * Unsolicited MADs sent by this client will have the upper 32-bits
177 * of their TID set to this value. 288 * of their TID set to this value.
178 * @port_num: Port number on which QP is registered 289 * @port_num: Port number on which QP is registered
290 * @rmpp_version: If set, indicates the RMPP version used by this agent.
179 */ 291 */
180struct ib_mad_agent { 292struct ib_mad_agent {
181 struct ib_device *device; 293 struct ib_device *device;
182 struct ib_qp *qp; 294 struct ib_qp *qp;
295 struct ib_mr *mr;
183 ib_mad_recv_handler recv_handler; 296 ib_mad_recv_handler recv_handler;
184 ib_mad_send_handler send_handler; 297 ib_mad_send_handler send_handler;
185 ib_mad_snoop_handler snoop_handler; 298 ib_mad_snoop_handler snoop_handler;
186 void *context; 299 void *context;
187 u32 hi_tid; 300 u32 hi_tid;
188 u8 port_num; 301 u8 port_num;
302 u8 rmpp_version;
189}; 303};
190 304
191/** 305/**
@@ -219,6 +333,7 @@ struct ib_mad_recv_buf {
219 * ib_mad_recv_wc - received MAD information. 333 * ib_mad_recv_wc - received MAD information.
220 * @wc: Completion information for the received data. 334 * @wc: Completion information for the received data.
221 * @recv_buf: Specifies the location of the received data buffer(s). 335 * @recv_buf: Specifies the location of the received data buffer(s).
336 * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
222 * @mad_len: The length of the received MAD, without duplicated headers. 337 * @mad_len: The length of the received MAD, without duplicated headers.
223 * 338 *
224 * For received response, the wr_id field of the wc is set to the wr_id 339 * For received response, the wr_id field of the wc is set to the wr_id
@@ -227,6 +342,7 @@ struct ib_mad_recv_buf {
227struct ib_mad_recv_wc { 342struct ib_mad_recv_wc {
228 struct ib_wc *wc; 343 struct ib_wc *wc;
229 struct ib_mad_recv_buf recv_buf; 344 struct ib_mad_recv_buf recv_buf;
345 struct list_head rmpp_list;
230 int mad_len; 346 int mad_len;
231}; 347};
232 348
@@ -322,6 +438,16 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
322 * @bad_send_wr: Specifies the MAD on which an error was encountered. 438 * @bad_send_wr: Specifies the MAD on which an error was encountered.
323 * 439 *
324 * Sent MADs are not guaranteed to complete in the order that they were posted. 440 * Sent MADs are not guaranteed to complete in the order that they were posted.
441 *
442 * If the MAD requires RMPP, the data buffer should contain a single copy
443 * of the common MAD, RMPP, and class specific headers, followed by the class
444 * defined data. If the class defined data would not divide evenly into
445 * RMPP segments, then space must be allocated at the end of the referenced
446 * buffer for any required padding. To indicate the amount of class defined
447 * data being transferred, the paylen_newwin field in the RMPP header should
448 * be set to the size of the class specific header plus the amount of class
449 * defined data being transferred. The paylen_newwin field should be
450 * specified in network-byte order.
325 */ 451 */
326int ib_post_send_mad(struct ib_mad_agent *mad_agent, 452int ib_post_send_mad(struct ib_mad_agent *mad_agent,
327 struct ib_send_wr *send_wr, 453 struct ib_send_wr *send_wr,
@@ -334,15 +460,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
334 * referenced buffer should be at least the size of the mad_len specified 460 * referenced buffer should be at least the size of the mad_len specified
335 * by @mad_recv_wc. 461 * by @mad_recv_wc.
336 * 462 *
337 * This call copies a chain of received RMPP MADs into a single data buffer, 463 * This call copies a chain of received MAD segments into a single data buffer,
338 * removing duplicated headers. 464 * removing duplicated headers.
339 */ 465 */
340void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, 466void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf);
341 void *buf);
342 467
343/** 468/**
344 * ib_free_recv_mad - Returns data buffers used to receive a MAD to the 469 * ib_free_recv_mad - Returns data buffers used to receive a MAD.
345 * access layer.
346 * @mad_recv_wc: Work completion information for a received MAD. 470 * @mad_recv_wc: Work completion information for a received MAD.
347 * 471 *
348 * Clients receiving MADs through their ib_mad_recv_handler must call this 472 * Clients receiving MADs through their ib_mad_recv_handler must call this
@@ -358,8 +482,18 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
358 * MADs will be returned to the user through the corresponding 482 * MADs will be returned to the user through the corresponding
359 * ib_mad_send_handler. 483 * ib_mad_send_handler.
360 */ 484 */
361void ib_cancel_mad(struct ib_mad_agent *mad_agent, 485void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
362 u64 wr_id); 486
487/**
488 * ib_modify_mad - Modifies an outstanding send MAD operation.
489 * @mad_agent: Specifies the registration associated with sent MAD.
490 * @wr_id: Indicates the work request identifier of the MAD to modify.
491 * @timeout_ms: New timeout value for sent MAD.
492 *
493 * This call will reset the timeout value for a sent MAD to the specified
494 * value.
495 */
496int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
363 497
364/** 498/**
365 * ib_redirect_mad_qp - Registers a QP for MAD services. 499 * ib_redirect_mad_qp - Registers a QP for MAD services.
@@ -401,4 +535,43 @@ struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
401int ib_process_mad_wc(struct ib_mad_agent *mad_agent, 535int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
402 struct ib_wc *wc); 536 struct ib_wc *wc);
403 537
538/**
539 * ib_create_send_mad - Allocate and initialize a data buffer and work request
540 * for sending a MAD.
541 * @mad_agent: Specifies the registered MAD service to associate with the MAD.
542 * @remote_qpn: Specifies the QPN of the receiving node.
543 * @pkey_index: Specifies which PKey the MAD will be sent using. This field
544 * is valid only if the remote_qpn is QP 1.
545 * @ah: References the address handle used to transfer to the remote node.
546 * @rmpp_active: Indicates if the send will enable RMPP.
547 * @hdr_len: Indicates the size of the data header of the MAD. This length
548 * should include the common MAD header, RMPP header, plus any class
549 * specific header.
550 * @data_len: Indicates the size of any user-transferred data. The call will
551 * automatically adjust the allocated buffer size to account for any
552 * additional padding that may be necessary.
553 * @gfp_mask: GFP mask used for the memory allocation.
554 *
555 * This is a helper routine that may be used to allocate a MAD. Users are
556 * not required to allocate outbound MADs using this call. The returned
557 * MAD send buffer will reference a data buffer usable for sending a MAD, along
558 * with an initialized work request structure. Users may modify the returned
559 * MAD data buffer or work request before posting the send.
560 *
561 * The returned data buffer will be cleared. Users are responsible for
562 * initializing the common MAD and any class specific headers. If @rmpp_active
563 * is set, the RMPP header will be initialized for sending.
564 */
565struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
566 u32 remote_qpn, u16 pkey_index,
567 struct ib_ah *ah, int rmpp_active,
568 int hdr_len, int data_len,
569 unsigned int __nocast gfp_mask);
570
571/**
572 * ib_free_send_mad - Returns data buffers used to send a MAD.
573 * @send_buf: Previously allocated send data buffer.
574 */
575void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
576
404#endif /* IB_MAD_H */ 577#endif /* IB_MAD_H */
diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h
index 00222285eb9a..6d999f7b5d93 100644
--- a/drivers/infiniband/include/ib_sa.h
+++ b/drivers/infiniband/include/ib_sa.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +30,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 31 * SOFTWARE.
31 * 32 *
32 * $Id: ib_sa.h 1389 2004-12-27 22:56:47Z roland $ 33 * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
33 */ 34 */
34 35
35#ifndef IB_SA_H 36#ifndef IB_SA_H
@@ -41,9 +42,11 @@
41#include <ib_mad.h> 42#include <ib_mad.h>
42 43
43enum { 44enum {
44 IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ 45 IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */
45 46
46 IB_SA_METHOD_DELETE = 0x15 47 IB_SA_METHOD_GET_TABLE = 0x12,
48 IB_SA_METHOD_GET_TABLE_RESP = 0x92,
49 IB_SA_METHOD_DELETE = 0x15
47}; 50};
48 51
49enum ib_sa_selector { 52enum ib_sa_selector {
@@ -87,10 +90,6 @@ static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
87 } 90 }
88} 91}
89 92
90typedef u64 __bitwise ib_sa_comp_mask;
91
92#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
93
94/* 93/*
95 * Structures for SA records are named "struct ib_sa_xxx_rec." No 94 * Structures for SA records are named "struct ib_sa_xxx_rec." No
96 * attempt is made to pack structures to match the physical layout of 95 * attempt is made to pack structures to match the physical layout of
@@ -195,6 +194,61 @@ struct ib_sa_mcmember_rec {
195 int proxy_join; 194 int proxy_join;
196}; 195};
197 196
197/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */
198#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0)
199#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1)
200#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2)
201/* reserved: 3 */
202#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4)
203#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5)
204#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6)
205#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7)
206#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8)
207#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9)
208#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10)
209#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11)
210#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12)
211#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13)
212#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14)
213#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15)
214#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16)
215#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17)
216#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18)
217#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19)
218#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20)
219#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21)
220#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22)
221#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23)
222#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24)
223#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25)
224#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26)
225#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27)
226#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28)
227#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29)
228#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30)
229#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31)
230#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32)
231#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33)
232#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34)
233#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35)
234#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36)
235
236#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF
237
238struct ib_sa_service_rec {
239 u64 id;
240 union ib_gid gid;
241 u16 pkey;
242 /* reserved */
243 u32 lease;
244 u8 key[16];
245 u8 name[64];
246 u8 data8[16];
247 u16 data16[8];
248 u32 data32[4];
249 u64 data64[2];
250};
251
198struct ib_sa_query; 252struct ib_sa_query;
199 253
200void ib_sa_cancel_query(int id, struct ib_sa_query *query); 254void ib_sa_cancel_query(int id, struct ib_sa_query *query);
@@ -202,7 +256,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query);
202int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, 256int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
203 struct ib_sa_path_rec *rec, 257 struct ib_sa_path_rec *rec,
204 ib_sa_comp_mask comp_mask, 258 ib_sa_comp_mask comp_mask,
205 int timeout_ms, int gfp_mask, 259 int timeout_ms, unsigned int __nocast gfp_mask,
206 void (*callback)(int status, 260 void (*callback)(int status,
207 struct ib_sa_path_rec *resp, 261 struct ib_sa_path_rec *resp,
208 void *context), 262 void *context),
@@ -213,13 +267,24 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
213 u8 method, 267 u8 method,
214 struct ib_sa_mcmember_rec *rec, 268 struct ib_sa_mcmember_rec *rec,
215 ib_sa_comp_mask comp_mask, 269 ib_sa_comp_mask comp_mask,
216 int timeout_ms, int gfp_mask, 270 int timeout_ms, unsigned int __nocast gfp_mask,
217 void (*callback)(int status, 271 void (*callback)(int status,
218 struct ib_sa_mcmember_rec *resp, 272 struct ib_sa_mcmember_rec *resp,
219 void *context), 273 void *context),
220 void *context, 274 void *context,
221 struct ib_sa_query **query); 275 struct ib_sa_query **query);
222 276
277int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
278 u8 method,
279 struct ib_sa_service_rec *rec,
280 ib_sa_comp_mask comp_mask,
281 int timeout_ms, unsigned int __nocast gfp_mask,
282 void (*callback)(int status,
283 struct ib_sa_service_rec *resp,
284 void *context),
285 void *context,
286 struct ib_sa_query **sa_query);
287
223/** 288/**
224 * ib_sa_mcmember_rec_set - Start an MCMember set query 289 * ib_sa_mcmember_rec_set - Start an MCMember set query
225 * @device:device to send query on 290 * @device:device to send query on
@@ -248,7 +313,7 @@ static inline int
248ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, 313ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
249 struct ib_sa_mcmember_rec *rec, 314 struct ib_sa_mcmember_rec *rec,
250 ib_sa_comp_mask comp_mask, 315 ib_sa_comp_mask comp_mask,
251 int timeout_ms, int gfp_mask, 316 int timeout_ms, unsigned int __nocast gfp_mask,
252 void (*callback)(int status, 317 void (*callback)(int status,
253 struct ib_sa_mcmember_rec *resp, 318 struct ib_sa_mcmember_rec *resp,
254 void *context), 319 void *context),
@@ -290,7 +355,7 @@ static inline int
290ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, 355ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
291 struct ib_sa_mcmember_rec *rec, 356 struct ib_sa_mcmember_rec *rec,
292 ib_sa_comp_mask comp_mask, 357 ib_sa_comp_mask comp_mask,
293 int timeout_ms, int gfp_mask, 358 int timeout_ms, unsigned int __nocast gfp_mask,
294 void (*callback)(int status, 359 void (*callback)(int status,
295 struct ib_sa_mcmember_rec *resp, 360 struct ib_sa_mcmember_rec *resp,
296 void *context), 361 void *context),
diff --git a/drivers/infiniband/include/ib_user_cm.h b/drivers/infiniband/include/ib_user_cm.h
new file mode 100644
index 000000000000..500b1af6ff77
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_cm.h
@@ -0,0 +1,328 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $
33 */
34
35#ifndef IB_USER_CM_H
36#define IB_USER_CM_H
37
38#include <linux/types.h>
39
40#define IB_USER_CM_ABI_VERSION 1
41
42enum {
43 IB_USER_CM_CMD_CREATE_ID,
44 IB_USER_CM_CMD_DESTROY_ID,
45 IB_USER_CM_CMD_ATTR_ID,
46
47 IB_USER_CM_CMD_LISTEN,
48 IB_USER_CM_CMD_ESTABLISH,
49
50 IB_USER_CM_CMD_SEND_REQ,
51 IB_USER_CM_CMD_SEND_REP,
52 IB_USER_CM_CMD_SEND_RTU,
53 IB_USER_CM_CMD_SEND_DREQ,
54 IB_USER_CM_CMD_SEND_DREP,
55 IB_USER_CM_CMD_SEND_REJ,
56 IB_USER_CM_CMD_SEND_MRA,
57 IB_USER_CM_CMD_SEND_LAP,
58 IB_USER_CM_CMD_SEND_APR,
59 IB_USER_CM_CMD_SEND_SIDR_REQ,
60 IB_USER_CM_CMD_SEND_SIDR_REP,
61
62 IB_USER_CM_CMD_EVENT,
63};
64/*
65 * command ABI structures.
66 */
67struct ib_ucm_cmd_hdr {
68 __u32 cmd;
69 __u16 in;
70 __u16 out;
71};
72
73struct ib_ucm_create_id {
74 __u64 response;
75};
76
77struct ib_ucm_create_id_resp {
78 __u32 id;
79};
80
81struct ib_ucm_destroy_id {
82 __u32 id;
83};
84
85struct ib_ucm_attr_id {
86 __u64 response;
87 __u32 id;
88};
89
90struct ib_ucm_attr_id_resp {
91 __u64 service_id;
92 __u64 service_mask;
93 __u32 local_id;
94 __u32 remote_id;
95};
96
97struct ib_ucm_listen {
98 __u64 service_id;
99 __u64 service_mask;
100 __u32 id;
101};
102
103struct ib_ucm_establish {
104 __u32 id;
105};
106
107struct ib_ucm_private_data {
108 __u64 data;
109 __u32 id;
110 __u8 len;
111 __u8 reserved[3];
112};
113
114struct ib_ucm_path_rec {
115 __u8 dgid[16];
116 __u8 sgid[16];
117 __u16 dlid;
118 __u16 slid;
119 __u32 raw_traffic;
120 __u32 flow_label;
121 __u32 reversible;
122 __u32 mtu;
123 __u16 pkey;
124 __u8 hop_limit;
125 __u8 traffic_class;
126 __u8 numb_path;
127 __u8 sl;
128 __u8 mtu_selector;
129 __u8 rate_selector;
130 __u8 rate;
131 __u8 packet_life_time_selector;
132 __u8 packet_life_time;
133 __u8 preference;
134};
135
136struct ib_ucm_req {
137 __u32 id;
138 __u32 qpn;
139 __u32 qp_type;
140 __u32 psn;
141 __u64 sid;
142 __u64 data;
143 __u64 primary_path;
144 __u64 alternate_path;
145 __u8 len;
146 __u8 peer_to_peer;
147 __u8 responder_resources;
148 __u8 initiator_depth;
149 __u8 remote_cm_response_timeout;
150 __u8 flow_control;
151 __u8 local_cm_response_timeout;
152 __u8 retry_count;
153 __u8 rnr_retry_count;
154 __u8 max_cm_retries;
155 __u8 srq;
156 __u8 reserved[1];
157};
158
159struct ib_ucm_rep {
160 __u64 data;
161 __u32 id;
162 __u32 qpn;
163 __u32 psn;
164 __u8 len;
165 __u8 responder_resources;
166 __u8 initiator_depth;
167 __u8 target_ack_delay;
168 __u8 failover_accepted;
169 __u8 flow_control;
170 __u8 rnr_retry_count;
171 __u8 srq;
172};
173
174struct ib_ucm_info {
175 __u32 id;
176 __u32 status;
177 __u64 info;
178 __u64 data;
179 __u8 info_len;
180 __u8 data_len;
181 __u8 reserved[2];
182};
183
184struct ib_ucm_mra {
185 __u64 data;
186 __u32 id;
187 __u8 len;
188 __u8 timeout;
189 __u8 reserved[2];
190};
191
192struct ib_ucm_lap {
193 __u64 path;
194 __u64 data;
195 __u32 id;
196 __u8 len;
197 __u8 reserved[3];
198};
199
200struct ib_ucm_sidr_req {
201 __u32 id;
202 __u32 timeout;
203 __u64 sid;
204 __u64 data;
205 __u64 path;
206 __u16 pkey;
207 __u8 len;
208 __u8 max_cm_retries;
209};
210
211struct ib_ucm_sidr_rep {
212 __u32 id;
213 __u32 qpn;
214 __u32 qkey;
215 __u32 status;
216 __u64 info;
217 __u64 data;
218 __u8 info_len;
219 __u8 data_len;
220 __u8 reserved[2];
221};
222/*
223 * event notification ABI structures.
224 */
225struct ib_ucm_event_get {
226 __u64 response;
227 __u64 data;
228 __u64 info;
229 __u8 data_len;
230 __u8 info_len;
231 __u8 reserved[2];
232};
233
234struct ib_ucm_req_event_resp {
235 __u32 listen_id;
236 /* device */
237 /* port */
238 struct ib_ucm_path_rec primary_path;
239 struct ib_ucm_path_rec alternate_path;
240 __u64 remote_ca_guid;
241 __u32 remote_qkey;
242 __u32 remote_qpn;
243 __u32 qp_type;
244 __u32 starting_psn;
245 __u8 responder_resources;
246 __u8 initiator_depth;
247 __u8 local_cm_response_timeout;
248 __u8 flow_control;
249 __u8 remote_cm_response_timeout;
250 __u8 retry_count;
251 __u8 rnr_retry_count;
252 __u8 srq;
253};
254
255struct ib_ucm_rep_event_resp {
256 __u64 remote_ca_guid;
257 __u32 remote_qkey;
258 __u32 remote_qpn;
259 __u32 starting_psn;
260 __u8 responder_resources;
261 __u8 initiator_depth;
262 __u8 target_ack_delay;
263 __u8 failover_accepted;
264 __u8 flow_control;
265 __u8 rnr_retry_count;
266 __u8 srq;
267 __u8 reserved[1];
268};
269
270struct ib_ucm_rej_event_resp {
271 __u32 reason;
272 /* ari in ib_ucm_event_get info field. */
273};
274
275struct ib_ucm_mra_event_resp {
276 __u8 timeout;
277 __u8 reserved[3];
278};
279
280struct ib_ucm_lap_event_resp {
281 struct ib_ucm_path_rec path;
282};
283
284struct ib_ucm_apr_event_resp {
285 __u32 status;
286 /* apr info in ib_ucm_event_get info field. */
287};
288
289struct ib_ucm_sidr_req_event_resp {
290 __u32 listen_id;
291 /* device */
292 /* port */
293 __u16 pkey;
294 __u8 reserved[2];
295};
296
297struct ib_ucm_sidr_rep_event_resp {
298 __u32 status;
299 __u32 qkey;
300 __u32 qpn;
301 /* info in ib_ucm_event_get info field. */
302};
303
304#define IB_UCM_PRES_DATA 0x01
305#define IB_UCM_PRES_INFO 0x02
306#define IB_UCM_PRES_PRIMARY 0x04
307#define IB_UCM_PRES_ALTERNATE 0x08
308
309struct ib_ucm_event_resp {
310 __u32 id;
311 __u32 event;
312 __u32 present;
313 union {
314 struct ib_ucm_req_event_resp req_resp;
315 struct ib_ucm_rep_event_resp rep_resp;
316 struct ib_ucm_rej_event_resp rej_resp;
317 struct ib_ucm_mra_event_resp mra_resp;
318 struct ib_ucm_lap_event_resp lap_resp;
319 struct ib_ucm_apr_event_resp apr_resp;
320
321 struct ib_ucm_sidr_req_event_resp sidr_req_resp;
322 struct ib_ucm_sidr_rep_event_resp sidr_rep_resp;
323
324 __u32 send_status;
325 } u;
326};
327
328#endif /* IB_USER_CM_H */
diff --git a/drivers/infiniband/include/ib_user_mad.h b/drivers/infiniband/include/ib_user_mad.h
index 06ad4a6075fa..a9a56b50aacc 100644
--- a/drivers/infiniband/include/ib_user_mad.h
+++ b/drivers/infiniband/include/ib_user_mad.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -29,7 +30,7 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 31 * SOFTWARE.
31 * 32 *
32 * $Id: ib_user_mad.h 1389 2004-12-27 22:56:47Z roland $ 33 * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
33 */ 34 */
34 35
35#ifndef IB_USER_MAD_H 36#ifndef IB_USER_MAD_H
@@ -42,7 +43,7 @@
42 * Increment this value if any changes that break userspace ABI 43 * Increment this value if any changes that break userspace ABI
43 * compatibility are made. 44 * compatibility are made.
44 */ 45 */
45#define IB_USER_MAD_ABI_VERSION 2 46#define IB_USER_MAD_ABI_VERSION 5
46 47
47/* 48/*
48 * Make sure that all structs defined in this file remain laid out so 49 * Make sure that all structs defined in this file remain laid out so
@@ -51,13 +52,13 @@
51 */ 52 */
52 53
53/** 54/**
54 * ib_user_mad - MAD packet 55 * ib_user_mad_hdr - MAD packet header
55 * @data - Contents of MAD
56 * @id - ID of agent MAD received with/to be sent with 56 * @id - ID of agent MAD received with/to be sent with
57 * @status - 0 on successful receive, ETIMEDOUT if no response 57 * @status - 0 on successful receive, ETIMEDOUT if no response
58 * received (transaction ID in data[] will be set to TID of original 58 * received (transaction ID in data[] will be set to TID of original
59 * request) (ignored on send) 59 * request) (ignored on send)
60 * @timeout_ms - Milliseconds to wait for response (unset on receive) 60 * @timeout_ms - Milliseconds to wait for response (unset on receive)
61 * @retries - Number of automatic retries to attempt
61 * @qpn - Remote QP number received from/to be sent to 62 * @qpn - Remote QP number received from/to be sent to
62 * @qkey - Remote Q_Key to be sent with (unset on receive) 63 * @qkey - Remote Q_Key to be sent with (unset on receive)
63 * @lid - Remote lid received from/to be sent to 64 * @lid - Remote lid received from/to be sent to
@@ -72,11 +73,12 @@
72 * 73 *
73 * All multi-byte quantities are stored in network (big endian) byte order. 74 * All multi-byte quantities are stored in network (big endian) byte order.
74 */ 75 */
75struct ib_user_mad { 76struct ib_user_mad_hdr {
76 __u8 data[256];
77 __u32 id; 77 __u32 id;
78 __u32 status; 78 __u32 status;
79 __u32 timeout_ms; 79 __u32 timeout_ms;
80 __u32 retries;
81 __u32 length;
80 __u32 qpn; 82 __u32 qpn;
81 __u32 qkey; 83 __u32 qkey;
82 __u16 lid; 84 __u16 lid;
@@ -91,6 +93,17 @@ struct ib_user_mad {
91}; 93};
92 94
93/** 95/**
96 * ib_user_mad - MAD packet
97 * @hdr - MAD packet header
98 * @data - Contents of MAD
99 *
100 */
101struct ib_user_mad {
102 struct ib_user_mad_hdr hdr;
103 __u8 data[0];
104};
105
106/**
94 * ib_user_mad_reg_req - MAD registration request 107 * ib_user_mad_reg_req - MAD registration request
95 * @id - Set by the kernel; used to identify agent in future requests. 108 * @id - Set by the kernel; used to identify agent in future requests.
96 * @qpn - Queue pair number; must be 0 or 1. 109 * @qpn - Queue pair number; must be 0 or 1.
@@ -103,6 +116,8 @@ struct ib_user_mad {
103 * management class to receive. 116 * management class to receive.
104 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class 117 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
105 * in the range from 0x30 to 0x4f. Otherwise not used. 118 * in the range from 0x30 to 0x4f. Otherwise not used.
119 * @rmpp_version: If set, indicates the RMPP version used.
120 *
106 */ 121 */
107struct ib_user_mad_reg_req { 122struct ib_user_mad_reg_req {
108 __u32 id; 123 __u32 id;
@@ -111,6 +126,7 @@ struct ib_user_mad_reg_req {
111 __u8 mgmt_class; 126 __u8 mgmt_class;
112 __u8 mgmt_class_version; 127 __u8 mgmt_class_version;
113 __u8 oui[3]; 128 __u8 oui[3];
129 __u8 rmpp_version;
114}; 130};
115 131
116#define IB_IOCTL_MAGIC 0x1b 132#define IB_IOCTL_MAGIC 0x1b
diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h
new file mode 100644
index 000000000000..7c613706af72
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_verbs.h
@@ -0,0 +1,389 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
34 */
35
36#ifndef IB_USER_VERBS_H
37#define IB_USER_VERBS_H
38
39#include <linux/types.h>
40
41/*
42 * Increment this value if any changes that break userspace ABI
43 * compatibility are made.
44 */
45#define IB_USER_VERBS_ABI_VERSION 1
46
47enum {
48 IB_USER_VERBS_CMD_QUERY_PARAMS,
49 IB_USER_VERBS_CMD_GET_CONTEXT,
50 IB_USER_VERBS_CMD_QUERY_DEVICE,
51 IB_USER_VERBS_CMD_QUERY_PORT,
52 IB_USER_VERBS_CMD_QUERY_GID,
53 IB_USER_VERBS_CMD_QUERY_PKEY,
54 IB_USER_VERBS_CMD_ALLOC_PD,
55 IB_USER_VERBS_CMD_DEALLOC_PD,
56 IB_USER_VERBS_CMD_CREATE_AH,
57 IB_USER_VERBS_CMD_MODIFY_AH,
58 IB_USER_VERBS_CMD_QUERY_AH,
59 IB_USER_VERBS_CMD_DESTROY_AH,
60 IB_USER_VERBS_CMD_REG_MR,
61 IB_USER_VERBS_CMD_REG_SMR,
62 IB_USER_VERBS_CMD_REREG_MR,
63 IB_USER_VERBS_CMD_QUERY_MR,
64 IB_USER_VERBS_CMD_DEREG_MR,
65 IB_USER_VERBS_CMD_ALLOC_MW,
66 IB_USER_VERBS_CMD_BIND_MW,
67 IB_USER_VERBS_CMD_DEALLOC_MW,
68 IB_USER_VERBS_CMD_CREATE_CQ,
69 IB_USER_VERBS_CMD_RESIZE_CQ,
70 IB_USER_VERBS_CMD_DESTROY_CQ,
71 IB_USER_VERBS_CMD_POLL_CQ,
72 IB_USER_VERBS_CMD_PEEK_CQ,
73 IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
74 IB_USER_VERBS_CMD_CREATE_QP,
75 IB_USER_VERBS_CMD_QUERY_QP,
76 IB_USER_VERBS_CMD_MODIFY_QP,
77 IB_USER_VERBS_CMD_DESTROY_QP,
78 IB_USER_VERBS_CMD_POST_SEND,
79 IB_USER_VERBS_CMD_POST_RECV,
80 IB_USER_VERBS_CMD_ATTACH_MCAST,
81 IB_USER_VERBS_CMD_DETACH_MCAST
82};
83
84/*
85 * Make sure that all structs defined in this file remain laid out so
86 * that they pack the same way on 32-bit and 64-bit architectures (to
87 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
88 * In particular do not use pointer types -- pass pointers in __u64
89 * instead.
90 */
91
92struct ib_uverbs_async_event_desc {
93 __u64 element;
94 __u32 event_type; /* enum ib_event_type */
95 __u32 reserved;
96};
97
98struct ib_uverbs_comp_event_desc {
99 __u64 cq_handle;
100};
101
102/*
103 * All commands from userspace should start with a __u32 command field
104 * followed by __u16 in_words and out_words fields (which give the
105 * length of the command block and response buffer if any in 32-bit
106 * words). The kernel driver will read these fields first and read
107 * the rest of the command struct based on these value.
108 */
109
110struct ib_uverbs_cmd_hdr {
111 __u32 command;
112 __u16 in_words;
113 __u16 out_words;
114};
115
116/*
117 * No driver_data for "query params" command, since this is intended
118 * to be a core function with no possible device dependence.
119 */
120struct ib_uverbs_query_params {
121 __u64 response;
122};
123
124struct ib_uverbs_query_params_resp {
125 __u32 num_cq_events;
126};
127
128struct ib_uverbs_get_context {
129 __u64 response;
130 __u64 cq_fd_tab;
131 __u64 driver_data[0];
132};
133
134struct ib_uverbs_get_context_resp {
135 __u32 async_fd;
136 __u32 reserved;
137};
138
139struct ib_uverbs_query_device {
140 __u64 response;
141 __u64 driver_data[0];
142};
143
144struct ib_uverbs_query_device_resp {
145 __u64 fw_ver;
146 __u64 node_guid;
147 __u64 sys_image_guid;
148 __u64 max_mr_size;
149 __u64 page_size_cap;
150 __u32 vendor_id;
151 __u32 vendor_part_id;
152 __u32 hw_ver;
153 __u32 max_qp;
154 __u32 max_qp_wr;
155 __u32 device_cap_flags;
156 __u32 max_sge;
157 __u32 max_sge_rd;
158 __u32 max_cq;
159 __u32 max_cqe;
160 __u32 max_mr;
161 __u32 max_pd;
162 __u32 max_qp_rd_atom;
163 __u32 max_ee_rd_atom;
164 __u32 max_res_rd_atom;
165 __u32 max_qp_init_rd_atom;
166 __u32 max_ee_init_rd_atom;
167 __u32 atomic_cap;
168 __u32 max_ee;
169 __u32 max_rdd;
170 __u32 max_mw;
171 __u32 max_raw_ipv6_qp;
172 __u32 max_raw_ethy_qp;
173 __u32 max_mcast_grp;
174 __u32 max_mcast_qp_attach;
175 __u32 max_total_mcast_qp_attach;
176 __u32 max_ah;
177 __u32 max_fmr;
178 __u32 max_map_per_fmr;
179 __u32 max_srq;
180 __u32 max_srq_wr;
181 __u32 max_srq_sge;
182 __u16 max_pkeys;
183 __u8 local_ca_ack_delay;
184 __u8 phys_port_cnt;
185 __u8 reserved[4];
186};
187
188struct ib_uverbs_query_port {
189 __u64 response;
190 __u8 port_num;
191 __u8 reserved[7];
192 __u64 driver_data[0];
193};
194
195struct ib_uverbs_query_port_resp {
196 __u32 port_cap_flags;
197 __u32 max_msg_sz;
198 __u32 bad_pkey_cntr;
199 __u32 qkey_viol_cntr;
200 __u32 gid_tbl_len;
201 __u16 pkey_tbl_len;
202 __u16 lid;
203 __u16 sm_lid;
204 __u8 state;
205 __u8 max_mtu;
206 __u8 active_mtu;
207 __u8 lmc;
208 __u8 max_vl_num;
209 __u8 sm_sl;
210 __u8 subnet_timeout;
211 __u8 init_type_reply;
212 __u8 active_width;
213 __u8 active_speed;
214 __u8 phys_state;
215 __u8 reserved[3];
216};
217
218struct ib_uverbs_query_gid {
219 __u64 response;
220 __u8 port_num;
221 __u8 index;
222 __u8 reserved[6];
223 __u64 driver_data[0];
224};
225
226struct ib_uverbs_query_gid_resp {
227 __u8 gid[16];
228};
229
230struct ib_uverbs_query_pkey {
231 __u64 response;
232 __u8 port_num;
233 __u8 index;
234 __u8 reserved[6];
235 __u64 driver_data[0];
236};
237
238struct ib_uverbs_query_pkey_resp {
239 __u16 pkey;
240 __u16 reserved;
241};
242
243struct ib_uverbs_alloc_pd {
244 __u64 response;
245 __u64 driver_data[0];
246};
247
248struct ib_uverbs_alloc_pd_resp {
249 __u32 pd_handle;
250};
251
252struct ib_uverbs_dealloc_pd {
253 __u32 pd_handle;
254};
255
256struct ib_uverbs_reg_mr {
257 __u64 response;
258 __u64 start;
259 __u64 length;
260 __u64 hca_va;
261 __u32 pd_handle;
262 __u32 access_flags;
263 __u64 driver_data[0];
264};
265
266struct ib_uverbs_reg_mr_resp {
267 __u32 mr_handle;
268 __u32 lkey;
269 __u32 rkey;
270};
271
272struct ib_uverbs_dereg_mr {
273 __u32 mr_handle;
274};
275
276struct ib_uverbs_create_cq {
277 __u64 response;
278 __u64 user_handle;
279 __u32 cqe;
280 __u32 event_handler;
281 __u64 driver_data[0];
282};
283
284struct ib_uverbs_create_cq_resp {
285 __u32 cq_handle;
286 __u32 cqe;
287};
288
289struct ib_uverbs_destroy_cq {
290 __u32 cq_handle;
291};
292
293struct ib_uverbs_create_qp {
294 __u64 response;
295 __u64 user_handle;
296 __u32 pd_handle;
297 __u32 send_cq_handle;
298 __u32 recv_cq_handle;
299 __u32 srq_handle;
300 __u32 max_send_wr;
301 __u32 max_recv_wr;
302 __u32 max_send_sge;
303 __u32 max_recv_sge;
304 __u32 max_inline_data;
305 __u8 sq_sig_all;
306 __u8 qp_type;
307 __u8 is_srq;
308 __u8 reserved;
309 __u64 driver_data[0];
310};
311
312struct ib_uverbs_create_qp_resp {
313 __u32 qp_handle;
314 __u32 qpn;
315};
316
317/*
318 * This struct needs to remain a multiple of 8 bytes to keep the
319 * alignment of the modify QP parameters.
320 */
321struct ib_uverbs_qp_dest {
322 __u8 dgid[16];
323 __u32 flow_label;
324 __u16 dlid;
325 __u16 reserved;
326 __u8 sgid_index;
327 __u8 hop_limit;
328 __u8 traffic_class;
329 __u8 sl;
330 __u8 src_path_bits;
331 __u8 static_rate;
332 __u8 is_global;
333 __u8 port_num;
334};
335
336struct ib_uverbs_modify_qp {
337 struct ib_uverbs_qp_dest dest;
338 struct ib_uverbs_qp_dest alt_dest;
339 __u32 qp_handle;
340 __u32 attr_mask;
341 __u32 qkey;
342 __u32 rq_psn;
343 __u32 sq_psn;
344 __u32 dest_qp_num;
345 __u32 qp_access_flags;
346 __u16 pkey_index;
347 __u16 alt_pkey_index;
348 __u8 qp_state;
349 __u8 cur_qp_state;
350 __u8 path_mtu;
351 __u8 path_mig_state;
352 __u8 en_sqd_async_notify;
353 __u8 max_rd_atomic;
354 __u8 max_dest_rd_atomic;
355 __u8 min_rnr_timer;
356 __u8 port_num;
357 __u8 timeout;
358 __u8 retry_cnt;
359 __u8 rnr_retry;
360 __u8 alt_port_num;
361 __u8 alt_timeout;
362 __u8 reserved[2];
363 __u64 driver_data[0];
364};
365
366struct ib_uverbs_modify_qp_resp {
367};
368
369struct ib_uverbs_destroy_qp {
370 __u32 qp_handle;
371};
372
373struct ib_uverbs_attach_mcast {
374 __u8 gid[16];
375 __u32 qp_handle;
376 __u16 mlid;
377 __u16 reserved;
378 __u64 driver_data[0];
379};
380
381struct ib_uverbs_detach_mcast {
382 __u8 gid[16];
383 __u32 qp_handle;
384 __u16 mlid;
385 __u16 reserved;
386 __u64 driver_data[0];
387};
388
389#endif /* IB_USER_VERBS_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h
index cf01f044a223..5d24edaa66e6 100644
--- a/drivers/infiniband/include/ib_verbs.h
+++ b/drivers/infiniband/include/ib_verbs.h
@@ -4,6 +4,7 @@
4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Cisco Systems. All rights reserved.
7 * 8 *
8 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU 10 * licenses. You may choose to be licensed under the terms of the GNU
@@ -41,7 +42,10 @@
41 42
42#include <linux/types.h> 43#include <linux/types.h>
43#include <linux/device.h> 44#include <linux/device.h>
45
44#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <asm/scatterlist.h>
48#include <asm/uaccess.h>
45 49
46union ib_gid { 50union ib_gid {
47 u8 raw[16]; 51 u8 raw[16];
@@ -285,6 +289,15 @@ struct ib_global_route {
285 u8 traffic_class; 289 u8 traffic_class;
286}; 290};
287 291
292struct ib_grh {
293 u32 version_tclass_flow;
294 u16 paylen;
295 u8 next_hdr;
296 u8 hop_limit;
297 union ib_gid sgid;
298 union ib_gid dgid;
299};
300
288enum { 301enum {
289 IB_MULTICAST_QPN = 0xffffff 302 IB_MULTICAST_QPN = 0xffffff
290}; 303};
@@ -544,7 +557,7 @@ struct ib_send_wr {
544 int num_sge; 557 int num_sge;
545 enum ib_wr_opcode opcode; 558 enum ib_wr_opcode opcode;
546 int send_flags; 559 int send_flags;
547 u32 imm_data; 560 __be32 imm_data;
548 union { 561 union {
549 struct { 562 struct {
550 u64 remote_addr; 563 u64 remote_addr;
@@ -562,6 +575,7 @@ struct ib_send_wr {
562 u32 remote_qpn; 575 u32 remote_qpn;
563 u32 remote_qkey; 576 u32 remote_qkey;
564 int timeout_ms; /* valid for MADs only */ 577 int timeout_ms; /* valid for MADs only */
578 int retries; /* valid for MADs only */
565 u16 pkey_index; /* valid for GSI only */ 579 u16 pkey_index; /* valid for GSI only */
566 u8 port_num; /* valid for DR SMPs on switch only */ 580 u8 port_num; /* valid for DR SMPs on switch only */
567 } ud; 581 } ud;
@@ -618,29 +632,86 @@ struct ib_fmr_attr {
618 u8 page_size; 632 u8 page_size;
619}; 633};
620 634
635struct ib_ucontext {
636 struct ib_device *device;
637 struct list_head pd_list;
638 struct list_head mr_list;
639 struct list_head mw_list;
640 struct list_head cq_list;
641 struct list_head qp_list;
642 struct list_head srq_list;
643 struct list_head ah_list;
644 spinlock_t lock;
645};
646
647struct ib_uobject {
648 u64 user_handle; /* handle given to us by userspace */
649 struct ib_ucontext *context; /* associated user context */
650 struct list_head list; /* link to context's list */
651 u32 id; /* index into kernel idr */
652};
653
654struct ib_umem {
655 unsigned long user_base;
656 unsigned long virt_base;
657 size_t length;
658 int offset;
659 int page_size;
660 int writable;
661 struct list_head chunk_list;
662};
663
664struct ib_umem_chunk {
665 struct list_head list;
666 int nents;
667 int nmap;
668 struct scatterlist page_list[0];
669};
670
671struct ib_udata {
672 void __user *inbuf;
673 void __user *outbuf;
674 size_t inlen;
675 size_t outlen;
676};
677
678#define IB_UMEM_MAX_PAGE_CHUNK \
679 ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
680 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
681 (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
682
683struct ib_umem_object {
684 struct ib_uobject uobject;
685 struct ib_umem umem;
686};
687
621struct ib_pd { 688struct ib_pd {
622 struct ib_device *device; 689 struct ib_device *device;
623 atomic_t usecnt; /* count all resources */ 690 struct ib_uobject *uobject;
691 atomic_t usecnt; /* count all resources */
624}; 692};
625 693
626struct ib_ah { 694struct ib_ah {
627 struct ib_device *device; 695 struct ib_device *device;
628 struct ib_pd *pd; 696 struct ib_pd *pd;
697 struct ib_uobject *uobject;
629}; 698};
630 699
631typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 700typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
632 701
633struct ib_cq { 702struct ib_cq {
634 struct ib_device *device; 703 struct ib_device *device;
635 ib_comp_handler comp_handler; 704 struct ib_uobject *uobject;
636 void (*event_handler)(struct ib_event *, void *); 705 ib_comp_handler comp_handler;
637 void * cq_context; 706 void (*event_handler)(struct ib_event *, void *);
638 int cqe; 707 void * cq_context;
639 atomic_t usecnt; /* count number of work queues */ 708 int cqe;
709 atomic_t usecnt; /* count number of work queues */
640}; 710};
641 711
642struct ib_srq { 712struct ib_srq {
643 struct ib_device *device; 713 struct ib_device *device;
714 struct ib_uobject *uobject;
644 struct ib_pd *pd; 715 struct ib_pd *pd;
645 void *srq_context; 716 void *srq_context;
646 atomic_t usecnt; 717 atomic_t usecnt;
@@ -652,6 +723,7 @@ struct ib_qp {
652 struct ib_cq *send_cq; 723 struct ib_cq *send_cq;
653 struct ib_cq *recv_cq; 724 struct ib_cq *recv_cq;
654 struct ib_srq *srq; 725 struct ib_srq *srq;
726 struct ib_uobject *uobject;
655 void (*event_handler)(struct ib_event *, void *); 727 void (*event_handler)(struct ib_event *, void *);
656 void *qp_context; 728 void *qp_context;
657 u32 qp_num; 729 u32 qp_num;
@@ -659,16 +731,18 @@ struct ib_qp {
659}; 731};
660 732
661struct ib_mr { 733struct ib_mr {
662 struct ib_device *device; 734 struct ib_device *device;
663 struct ib_pd *pd; 735 struct ib_pd *pd;
664 u32 lkey; 736 struct ib_uobject *uobject;
665 u32 rkey; 737 u32 lkey;
666 atomic_t usecnt; /* count number of MWs */ 738 u32 rkey;
739 atomic_t usecnt; /* count number of MWs */
667}; 740};
668 741
669struct ib_mw { 742struct ib_mw {
670 struct ib_device *device; 743 struct ib_device *device;
671 struct ib_pd *pd; 744 struct ib_pd *pd;
745 struct ib_uobject *uobject;
672 u32 rkey; 746 u32 rkey;
673}; 747};
674 748
@@ -737,7 +811,14 @@ struct ib_device {
737 int (*modify_port)(struct ib_device *device, 811 int (*modify_port)(struct ib_device *device,
738 u8 port_num, int port_modify_mask, 812 u8 port_num, int port_modify_mask,
739 struct ib_port_modify *port_modify); 813 struct ib_port_modify *port_modify);
740 struct ib_pd * (*alloc_pd)(struct ib_device *device); 814 struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
815 struct ib_udata *udata);
816 int (*dealloc_ucontext)(struct ib_ucontext *context);
817 int (*mmap)(struct ib_ucontext *context,
818 struct vm_area_struct *vma);
819 struct ib_pd * (*alloc_pd)(struct ib_device *device,
820 struct ib_ucontext *context,
821 struct ib_udata *udata);
741 int (*dealloc_pd)(struct ib_pd *pd); 822 int (*dealloc_pd)(struct ib_pd *pd);
742 struct ib_ah * (*create_ah)(struct ib_pd *pd, 823 struct ib_ah * (*create_ah)(struct ib_pd *pd,
743 struct ib_ah_attr *ah_attr); 824 struct ib_ah_attr *ah_attr);
@@ -747,7 +828,8 @@ struct ib_device {
747 struct ib_ah_attr *ah_attr); 828 struct ib_ah_attr *ah_attr);
748 int (*destroy_ah)(struct ib_ah *ah); 829 int (*destroy_ah)(struct ib_ah *ah);
749 struct ib_qp * (*create_qp)(struct ib_pd *pd, 830 struct ib_qp * (*create_qp)(struct ib_pd *pd,
750 struct ib_qp_init_attr *qp_init_attr); 831 struct ib_qp_init_attr *qp_init_attr,
832 struct ib_udata *udata);
751 int (*modify_qp)(struct ib_qp *qp, 833 int (*modify_qp)(struct ib_qp *qp,
752 struct ib_qp_attr *qp_attr, 834 struct ib_qp_attr *qp_attr,
753 int qp_attr_mask); 835 int qp_attr_mask);
@@ -762,8 +844,9 @@ struct ib_device {
762 int (*post_recv)(struct ib_qp *qp, 844 int (*post_recv)(struct ib_qp *qp,
763 struct ib_recv_wr *recv_wr, 845 struct ib_recv_wr *recv_wr,
764 struct ib_recv_wr **bad_recv_wr); 846 struct ib_recv_wr **bad_recv_wr);
765 struct ib_cq * (*create_cq)(struct ib_device *device, 847 struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
766 int cqe); 848 struct ib_ucontext *context,
849 struct ib_udata *udata);
767 int (*destroy_cq)(struct ib_cq *cq); 850 int (*destroy_cq)(struct ib_cq *cq);
768 int (*resize_cq)(struct ib_cq *cq, int *cqe); 851 int (*resize_cq)(struct ib_cq *cq, int *cqe);
769 int (*poll_cq)(struct ib_cq *cq, int num_entries, 852 int (*poll_cq)(struct ib_cq *cq, int num_entries,
@@ -780,6 +863,10 @@ struct ib_device {
780 int num_phys_buf, 863 int num_phys_buf,
781 int mr_access_flags, 864 int mr_access_flags,
782 u64 *iova_start); 865 u64 *iova_start);
866 struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
867 struct ib_umem *region,
868 int mr_access_flags,
869 struct ib_udata *udata);
783 int (*query_mr)(struct ib_mr *mr, 870 int (*query_mr)(struct ib_mr *mr,
784 struct ib_mr_attr *mr_attr); 871 struct ib_mr_attr *mr_attr);
785 int (*dereg_mr)(struct ib_mr *mr); 872 int (*dereg_mr)(struct ib_mr *mr);
@@ -817,6 +904,7 @@ struct ib_device {
817 struct ib_mad *in_mad, 904 struct ib_mad *in_mad,
818 struct ib_mad *out_mad); 905 struct ib_mad *out_mad);
819 906
907 struct module *owner;
820 struct class_device class_dev; 908 struct class_device class_dev;
821 struct kobject ports_parent; 909 struct kobject ports_parent;
822 struct list_head port_list; 910 struct list_head port_list;
@@ -852,6 +940,16 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
852void ib_set_client_data(struct ib_device *device, struct ib_client *client, 940void ib_set_client_data(struct ib_device *device, struct ib_client *client,
853 void *data); 941 void *data);
854 942
943static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
944{
945 return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
946}
947
948static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
949{
950 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
951}
952
855int ib_register_event_handler (struct ib_event_handler *event_handler); 953int ib_register_event_handler (struct ib_event_handler *event_handler);
856int ib_unregister_event_handler(struct ib_event_handler *event_handler); 954int ib_unregister_event_handler(struct ib_event_handler *event_handler);
857void ib_dispatch_event(struct ib_event *event); 955void ib_dispatch_event(struct ib_event *event);
@@ -902,6 +1000,21 @@ int ib_dealloc_pd(struct ib_pd *pd);
902struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 1000struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
903 1001
904/** 1002/**
1003 * ib_create_ah_from_wc - Creates an address handle associated with the
1004 * sender of the specified work completion.
1005 * @pd: The protection domain associated with the address handle.
1006 * @wc: Work completion information associated with a received message.
1007 * @grh: References the received global route header. This parameter is
1008 * ignored unless the work completion indicates that the GRH is valid.
1009 * @port_num: The outbound port number to associate with the address.
1010 *
1011 * The address handle is used to reference a local or global destination
1012 * in all UD QP post sends.
1013 */
1014struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
1015 struct ib_grh *grh, u8 port_num);
1016
1017/**
905 * ib_modify_ah - Modifies the address vector associated with an address 1018 * ib_modify_ah - Modifies the address vector associated with an address
906 * handle. 1019 * handle.
907 * @ah: The address handle to modify. 1020 * @ah: The address handle to modify.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8238766746b2..eee82363167d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -81,7 +81,7 @@ void ipoib_free_ah(struct kref *kref)
81 81
82 unsigned long flags; 82 unsigned long flags;
83 83
84 if (ah->last_send <= priv->tx_tail) { 84 if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
85 ipoib_dbg(priv, "Freeing ah %p\n", ah->ah); 85 ipoib_dbg(priv, "Freeing ah %p\n", ah->ah);
86 ib_destroy_ah(ah->ah); 86 ib_destroy_ah(ah->ah);
87 kfree(ah); 87 kfree(ah);
@@ -355,7 +355,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
355 355
356 spin_lock_irq(&priv->lock); 356 spin_lock_irq(&priv->lock);
357 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) 357 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
358 if (ah->last_send <= priv->tx_tail) { 358 if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
359 list_del(&ah->list); 359 list_del(&ah->list);
360 list_add_tail(&ah->list, &remove_list); 360 list_add_tail(&ah->list, &remove_list);
361 } 361 }
@@ -486,7 +486,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
486 * assume the HW is wedged and just free up 486 * assume the HW is wedged and just free up
487 * all our pending work requests. 487 * all our pending work requests.
488 */ 488 */
489 while (priv->tx_tail < priv->tx_head) { 489 while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
490 tx_req = &priv->tx_ring[priv->tx_tail & 490 tx_req = &priv->tx_ring[priv->tx_tail &
491 (IPOIB_TX_RING_SIZE - 1)]; 491 (IPOIB_TX_RING_SIZE - 1)];
492 dma_unmap_single(priv->ca->dma_device, 492 dma_unmap_single(priv->ca->dma_device,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6f60abbaebd5..fa00816a3cf7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -600,9 +600,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
600 600
601 ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); 601 ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
602 } else { 602 } else {
603 /* unicast GID -- should be ARP reply */ 603 /* unicast GID -- should be ARP or RARP reply */
604 604
605 if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) { 605 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
606 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
606 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " 607 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
607 IPOIB_GID_FMT "\n", 608 IPOIB_GID_FMT "\n",
608 skb->dst ? "neigh" : "dst", 609 skb->dst ? "neigh" : "dst",