aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/uverbs.h132
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1006
-rw-r--r--drivers/infiniband/core/uverbs_main.c698
-rw-r--r--drivers/infiniband/core/uverbs_mem.c221
-rw-r--r--drivers/infiniband/core/verbs.c32
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c76
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c141
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c330
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c215
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h81
-rw-r--r--drivers/infiniband/include/ib_user_verbs.h389
-rw-r--r--drivers/infiniband/include/ib_verbs.h124
19 files changed, 3348 insertions, 174 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 3cc3ff0cccb1..79c8e2dd9c33 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -7,6 +7,16 @@ config INFINIBAND
7 any protocols you wish to use as well as drivers for your 7 any protocols you wish to use as well as drivers for your
8 InfiniBand hardware. 8 InfiniBand hardware.
9 9
10config INFINIBAND_USER_VERBS
11 tristate "InfiniBand userspace verbs support"
12 depends on INFINIBAND
13 ---help---
14 Userspace InfiniBand verbs support. This is the kernel side
15 of userspace verbs, which allows userspace processes to
16 directly access InfiniBand hardware for fast-path
17 operations. You will also need libibverbs and a hardware
18 driver library from <http://www.openib.org>.
19
10source "drivers/infiniband/hw/mthca/Kconfig" 20source "drivers/infiniband/hw/mthca/Kconfig"
11 21
12source "drivers/infiniband/ulp/ipoib/Kconfig" 22source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d2dbfb52c0a3..e1a7cf3e8636 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,6 +1,7 @@
1EXTRA_CFLAGS += -Idrivers/infiniband/include 1EXTRA_CFLAGS += -Idrivers/infiniband/include
2 2
3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
4obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o
4 5
5ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 6ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
6 device.o fmr_pool.o cache.o 7 device.o fmr_pool.o cache.o
@@ -10,3 +11,5 @@ ib_mad-y := mad.o smi.o agent.o
10ib_sa-y := sa_query.o 11ib_sa-y := sa_query.o
11 12
12ib_umad-y := user_mad.o 13ib_umad-y := user_mad.o
14
15ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
new file mode 100644
index 000000000000..57347f1e82c1
--- /dev/null
+++ b/drivers/infiniband/core/uverbs.h
@@ -0,0 +1,132 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
34 */
35
36#ifndef UVERBS_H
37#define UVERBS_H
38
39/* Include device.h and fs.h until cdev.h is self-sufficient */
40#include <linux/fs.h>
41#include <linux/device.h>
42#include <linux/cdev.h>
43#include <linux/kref.h>
44#include <linux/idr.h>
45
46#include <ib_verbs.h>
47#include <ib_user_verbs.h>
48
49struct ib_uverbs_device {
50 int devnum;
51 struct cdev dev;
52 struct class_device class_dev;
53 struct ib_device *ib_dev;
54 int num_comp;
55};
56
57struct ib_uverbs_event_file {
58 struct kref ref;
59 struct ib_uverbs_file *uverbs_file;
60 spinlock_t lock;
61 int fd;
62 int is_async;
63 wait_queue_head_t poll_wait;
64 struct list_head event_list;
65};
66
67struct ib_uverbs_file {
68 struct kref ref;
69 struct ib_uverbs_device *device;
70 struct ib_ucontext *ucontext;
71 struct ib_event_handler event_handler;
72 struct ib_uverbs_event_file async_file;
73 struct ib_uverbs_event_file comp_file[1];
74};
75
76struct ib_uverbs_async_event {
77 struct ib_uverbs_async_event_desc desc;
78 struct list_head list;
79};
80
81struct ib_uverbs_comp_event {
82 struct ib_uverbs_comp_event_desc desc;
83 struct list_head list;
84};
85
86struct ib_uobject_mr {
87 struct ib_uobject uobj;
88 struct page *page_list;
89 struct scatterlist *sg_list;
90};
91
92extern struct semaphore ib_uverbs_idr_mutex;
93extern struct idr ib_uverbs_pd_idr;
94extern struct idr ib_uverbs_mr_idr;
95extern struct idr ib_uverbs_mw_idr;
96extern struct idr ib_uverbs_ah_idr;
97extern struct idr ib_uverbs_cq_idr;
98extern struct idr ib_uverbs_qp_idr;
99
100void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
101void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
102void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
103
104int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
105 void *addr, size_t size, int write);
106void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
107void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
108
109#define IB_UVERBS_DECLARE_CMD(name) \
110 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
111 const char __user *buf, int in_len, \
112 int out_len)
113
114IB_UVERBS_DECLARE_CMD(query_params);
115IB_UVERBS_DECLARE_CMD(get_context);
116IB_UVERBS_DECLARE_CMD(query_device);
117IB_UVERBS_DECLARE_CMD(query_port);
118IB_UVERBS_DECLARE_CMD(query_gid);
119IB_UVERBS_DECLARE_CMD(query_pkey);
120IB_UVERBS_DECLARE_CMD(alloc_pd);
121IB_UVERBS_DECLARE_CMD(dealloc_pd);
122IB_UVERBS_DECLARE_CMD(reg_mr);
123IB_UVERBS_DECLARE_CMD(dereg_mr);
124IB_UVERBS_DECLARE_CMD(create_cq);
125IB_UVERBS_DECLARE_CMD(destroy_cq);
126IB_UVERBS_DECLARE_CMD(create_qp);
127IB_UVERBS_DECLARE_CMD(modify_qp);
128IB_UVERBS_DECLARE_CMD(destroy_qp);
129IB_UVERBS_DECLARE_CMD(attach_mcast);
130IB_UVERBS_DECLARE_CMD(detach_mcast);
131
132#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
new file mode 100644
index 000000000000..5f2bbcda4c73
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -0,0 +1,1006 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
34 */
35
36#include <asm/uaccess.h>
37
38#include "uverbs.h"
39
40#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
41 do { \
42 (udata)->inbuf = (void __user *) (ibuf); \
43 (udata)->outbuf = (void __user *) (obuf); \
44 (udata)->inlen = (ilen); \
45 (udata)->outlen = (olen); \
46 } while (0)
47
48ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
49 const char __user *buf,
50 int in_len, int out_len)
51{
52 struct ib_uverbs_query_params cmd;
53 struct ib_uverbs_query_params_resp resp;
54
55 if (out_len < sizeof resp)
56 return -ENOSPC;
57
58 if (copy_from_user(&cmd, buf, sizeof cmd))
59 return -EFAULT;
60
61 memset(&resp, 0, sizeof resp);
62
63 resp.num_cq_events = file->device->num_comp;
64
65 if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
66 return -EFAULT;
67
68 return in_len;
69}
70
71ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
72 const char __user *buf,
73 int in_len, int out_len)
74{
75 struct ib_uverbs_get_context cmd;
76 struct ib_uverbs_get_context_resp resp;
77 struct ib_udata udata;
78 struct ib_device *ibdev = file->device->ib_dev;
79 int i;
80 int ret = in_len;
81
82 if (out_len < sizeof resp)
83 return -ENOSPC;
84
85 if (copy_from_user(&cmd, buf, sizeof cmd))
86 return -EFAULT;
87
88 INIT_UDATA(&udata, buf + sizeof cmd,
89 (unsigned long) cmd.response + sizeof resp,
90 in_len - sizeof cmd, out_len - sizeof resp);
91
92 file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
93 if (IS_ERR(file->ucontext)) {
94 ret = PTR_ERR(file->ucontext);
95 file->ucontext = NULL;
96 return ret;
97 }
98
99 file->ucontext->device = ibdev;
100 INIT_LIST_HEAD(&file->ucontext->pd_list);
101 INIT_LIST_HEAD(&file->ucontext->mr_list);
102 INIT_LIST_HEAD(&file->ucontext->mw_list);
103 INIT_LIST_HEAD(&file->ucontext->cq_list);
104 INIT_LIST_HEAD(&file->ucontext->qp_list);
105 INIT_LIST_HEAD(&file->ucontext->srq_list);
106 INIT_LIST_HEAD(&file->ucontext->ah_list);
107 spin_lock_init(&file->ucontext->lock);
108
109 resp.async_fd = file->async_file.fd;
110 for (i = 0; i < file->device->num_comp; ++i)
111 if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
112 i * sizeof (__u32),
113 &file->comp_file[i].fd, sizeof (__u32)))
114 goto err;
115
116 if (copy_to_user((void __user *) (unsigned long) cmd.response,
117 &resp, sizeof resp))
118 goto err;
119
120 return in_len;
121
122err:
123 ibdev->dealloc_ucontext(file->ucontext);
124 file->ucontext = NULL;
125
126 return -EFAULT;
127}
128
129ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
130 const char __user *buf,
131 int in_len, int out_len)
132{
133 struct ib_uverbs_query_device cmd;
134 struct ib_uverbs_query_device_resp resp;
135 struct ib_device_attr attr;
136 int ret;
137
138 if (out_len < sizeof resp)
139 return -ENOSPC;
140
141 if (copy_from_user(&cmd, buf, sizeof cmd))
142 return -EFAULT;
143
144 ret = ib_query_device(file->device->ib_dev, &attr);
145 if (ret)
146 return ret;
147
148 memset(&resp, 0, sizeof resp);
149
150 resp.fw_ver = attr.fw_ver;
151 resp.node_guid = attr.node_guid;
152 resp.sys_image_guid = attr.sys_image_guid;
153 resp.max_mr_size = attr.max_mr_size;
154 resp.page_size_cap = attr.page_size_cap;
155 resp.vendor_id = attr.vendor_id;
156 resp.vendor_part_id = attr.vendor_part_id;
157 resp.hw_ver = attr.hw_ver;
158 resp.max_qp = attr.max_qp;
159 resp.max_qp_wr = attr.max_qp_wr;
160 resp.device_cap_flags = attr.device_cap_flags;
161 resp.max_sge = attr.max_sge;
162 resp.max_sge_rd = attr.max_sge_rd;
163 resp.max_cq = attr.max_cq;
164 resp.max_cqe = attr.max_cqe;
165 resp.max_mr = attr.max_mr;
166 resp.max_pd = attr.max_pd;
167 resp.max_qp_rd_atom = attr.max_qp_rd_atom;
168 resp.max_ee_rd_atom = attr.max_ee_rd_atom;
169 resp.max_res_rd_atom = attr.max_res_rd_atom;
170 resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
171 resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
172 resp.atomic_cap = attr.atomic_cap;
173 resp.max_ee = attr.max_ee;
174 resp.max_rdd = attr.max_rdd;
175 resp.max_mw = attr.max_mw;
176 resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
177 resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
178 resp.max_mcast_grp = attr.max_mcast_grp;
179 resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
180 resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
181 resp.max_ah = attr.max_ah;
182 resp.max_fmr = attr.max_fmr;
183 resp.max_map_per_fmr = attr.max_map_per_fmr;
184 resp.max_srq = attr.max_srq;
185 resp.max_srq_wr = attr.max_srq_wr;
186 resp.max_srq_sge = attr.max_srq_sge;
187 resp.max_pkeys = attr.max_pkeys;
188 resp.local_ca_ack_delay = attr.local_ca_ack_delay;
189 resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
190
191 if (copy_to_user((void __user *) (unsigned long) cmd.response,
192 &resp, sizeof resp))
193 return -EFAULT;
194
195 return in_len;
196}
197
198ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
199 const char __user *buf,
200 int in_len, int out_len)
201{
202 struct ib_uverbs_query_port cmd;
203 struct ib_uverbs_query_port_resp resp;
204 struct ib_port_attr attr;
205 int ret;
206
207 if (out_len < sizeof resp)
208 return -ENOSPC;
209
210 if (copy_from_user(&cmd, buf, sizeof cmd))
211 return -EFAULT;
212
213 ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
214 if (ret)
215 return ret;
216
217 memset(&resp, 0, sizeof resp);
218
219 resp.state = attr.state;
220 resp.max_mtu = attr.max_mtu;
221 resp.active_mtu = attr.active_mtu;
222 resp.gid_tbl_len = attr.gid_tbl_len;
223 resp.port_cap_flags = attr.port_cap_flags;
224 resp.max_msg_sz = attr.max_msg_sz;
225 resp.bad_pkey_cntr = attr.bad_pkey_cntr;
226 resp.qkey_viol_cntr = attr.qkey_viol_cntr;
227 resp.pkey_tbl_len = attr.pkey_tbl_len;
228 resp.lid = attr.lid;
229 resp.sm_lid = attr.sm_lid;
230 resp.lmc = attr.lmc;
231 resp.max_vl_num = attr.max_vl_num;
232 resp.sm_sl = attr.sm_sl;
233 resp.subnet_timeout = attr.subnet_timeout;
234 resp.init_type_reply = attr.init_type_reply;
235 resp.active_width = attr.active_width;
236 resp.active_speed = attr.active_speed;
237 resp.phys_state = attr.phys_state;
238
239 if (copy_to_user((void __user *) (unsigned long) cmd.response,
240 &resp, sizeof resp))
241 return -EFAULT;
242
243 return in_len;
244}
245
246ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
247 const char __user *buf,
248 int in_len, int out_len)
249{
250 struct ib_uverbs_query_gid cmd;
251 struct ib_uverbs_query_gid_resp resp;
252 int ret;
253
254 if (out_len < sizeof resp)
255 return -ENOSPC;
256
257 if (copy_from_user(&cmd, buf, sizeof cmd))
258 return -EFAULT;
259
260 memset(&resp, 0, sizeof resp);
261
262 ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
263 (union ib_gid *) resp.gid);
264 if (ret)
265 return ret;
266
267 if (copy_to_user((void __user *) (unsigned long) cmd.response,
268 &resp, sizeof resp))
269 return -EFAULT;
270
271 return in_len;
272}
273
274ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
275 const char __user *buf,
276 int in_len, int out_len)
277{
278 struct ib_uverbs_query_pkey cmd;
279 struct ib_uverbs_query_pkey_resp resp;
280 int ret;
281
282 if (out_len < sizeof resp)
283 return -ENOSPC;
284
285 if (copy_from_user(&cmd, buf, sizeof cmd))
286 return -EFAULT;
287
288 memset(&resp, 0, sizeof resp);
289
290 ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
291 &resp.pkey);
292 if (ret)
293 return ret;
294
295 if (copy_to_user((void __user *) (unsigned long) cmd.response,
296 &resp, sizeof resp))
297 return -EFAULT;
298
299 return in_len;
300}
301
302ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
303 const char __user *buf,
304 int in_len, int out_len)
305{
306 struct ib_uverbs_alloc_pd cmd;
307 struct ib_uverbs_alloc_pd_resp resp;
308 struct ib_udata udata;
309 struct ib_uobject *uobj;
310 struct ib_pd *pd;
311 int ret;
312
313 if (out_len < sizeof resp)
314 return -ENOSPC;
315
316 if (copy_from_user(&cmd, buf, sizeof cmd))
317 return -EFAULT;
318
319 INIT_UDATA(&udata, buf + sizeof cmd,
320 (unsigned long) cmd.response + sizeof resp,
321 in_len - sizeof cmd, out_len - sizeof resp);
322
323 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
324 if (!uobj)
325 return -ENOMEM;
326
327 uobj->context = file->ucontext;
328
329 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
330 file->ucontext, &udata);
331 if (IS_ERR(pd)) {
332 ret = PTR_ERR(pd);
333 goto err;
334 }
335
336 pd->device = file->device->ib_dev;
337 pd->uobject = uobj;
338 atomic_set(&pd->usecnt, 0);
339
340retry:
341 if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
342 ret = -ENOMEM;
343 goto err_pd;
344 }
345
346 down(&ib_uverbs_idr_mutex);
347 ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
348 up(&ib_uverbs_idr_mutex);
349
350 if (ret == -EAGAIN)
351 goto retry;
352 if (ret)
353 goto err_pd;
354
355 spin_lock_irq(&file->ucontext->lock);
356 list_add_tail(&uobj->list, &file->ucontext->pd_list);
357 spin_unlock_irq(&file->ucontext->lock);
358
359 memset(&resp, 0, sizeof resp);
360 resp.pd_handle = uobj->id;
361
362 if (copy_to_user((void __user *) (unsigned long) cmd.response,
363 &resp, sizeof resp)) {
364 ret = -EFAULT;
365 goto err_list;
366 }
367
368 return in_len;
369
370err_list:
371 spin_lock_irq(&file->ucontext->lock);
372 list_del(&uobj->list);
373 spin_unlock_irq(&file->ucontext->lock);
374
375 down(&ib_uverbs_idr_mutex);
376 idr_remove(&ib_uverbs_pd_idr, uobj->id);
377 up(&ib_uverbs_idr_mutex);
378
379err_pd:
380 ib_dealloc_pd(pd);
381
382err:
383 kfree(uobj);
384 return ret;
385}
386
387ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
388 const char __user *buf,
389 int in_len, int out_len)
390{
391 struct ib_uverbs_dealloc_pd cmd;
392 struct ib_pd *pd;
393 struct ib_uobject *uobj;
394 int ret = -EINVAL;
395
396 if (copy_from_user(&cmd, buf, sizeof cmd))
397 return -EFAULT;
398
399 down(&ib_uverbs_idr_mutex);
400
401 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
402 if (!pd || pd->uobject->context != file->ucontext)
403 goto out;
404
405 uobj = pd->uobject;
406
407 ret = ib_dealloc_pd(pd);
408 if (ret)
409 goto out;
410
411 idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
412
413 spin_lock_irq(&file->ucontext->lock);
414 list_del(&uobj->list);
415 spin_unlock_irq(&file->ucontext->lock);
416
417 kfree(uobj);
418
419out:
420 up(&ib_uverbs_idr_mutex);
421
422 return ret ? ret : in_len;
423}
424
425ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
426 const char __user *buf, int in_len,
427 int out_len)
428{
429 struct ib_uverbs_reg_mr cmd;
430 struct ib_uverbs_reg_mr_resp resp;
431 struct ib_udata udata;
432 struct ib_umem_object *obj;
433 struct ib_pd *pd;
434 struct ib_mr *mr;
435 int ret;
436
437 if (out_len < sizeof resp)
438 return -ENOSPC;
439
440 if (copy_from_user(&cmd, buf, sizeof cmd))
441 return -EFAULT;
442
443 INIT_UDATA(&udata, buf + sizeof cmd,
444 (unsigned long) cmd.response + sizeof resp,
445 in_len - sizeof cmd, out_len - sizeof resp);
446
447 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
448 return -EINVAL;
449
450 obj = kmalloc(sizeof *obj, GFP_KERNEL);
451 if (!obj)
452 return -ENOMEM;
453
454 obj->uobject.context = file->ucontext;
455
456 /*
457 * We ask for writable memory if any access flags other than
458 * "remote read" are set. "Local write" and "remote write"
459 * obviously require write access. "Remote atomic" can do
460 * things like fetch and add, which will modify memory, and
461 * "MW bind" can change permissions by binding a window.
462 */
463 ret = ib_umem_get(file->device->ib_dev, &obj->umem,
464 (void *) (unsigned long) cmd.start, cmd.length,
465 !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
466 if (ret)
467 goto err_free;
468
469 obj->umem.virt_base = cmd.hca_va;
470
471 down(&ib_uverbs_idr_mutex);
472
473 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
474 if (!pd || pd->uobject->context != file->ucontext) {
475 ret = -EINVAL;
476 goto err_up;
477 }
478
479 if (!pd->device->reg_user_mr) {
480 ret = -ENOSYS;
481 goto err_up;
482 }
483
484 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
485 if (IS_ERR(mr)) {
486 ret = PTR_ERR(mr);
487 goto err_up;
488 }
489
490 mr->device = pd->device;
491 mr->pd = pd;
492 mr->uobject = &obj->uobject;
493 atomic_inc(&pd->usecnt);
494 atomic_set(&mr->usecnt, 0);
495
496 memset(&resp, 0, sizeof resp);
497 resp.lkey = mr->lkey;
498 resp.rkey = mr->rkey;
499
500retry:
501 if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
502 ret = -ENOMEM;
503 goto err_unreg;
504 }
505
506 ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
507
508 if (ret == -EAGAIN)
509 goto retry;
510 if (ret)
511 goto err_unreg;
512
513 resp.mr_handle = obj->uobject.id;
514
515 spin_lock_irq(&file->ucontext->lock);
516 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
517 spin_unlock_irq(&file->ucontext->lock);
518
519 if (copy_to_user((void __user *) (unsigned long) cmd.response,
520 &resp, sizeof resp)) {
521 ret = -EFAULT;
522 goto err_list;
523 }
524
525 up(&ib_uverbs_idr_mutex);
526
527 return in_len;
528
529err_list:
530 spin_lock_irq(&file->ucontext->lock);
531 list_del(&obj->uobject.list);
532 spin_unlock_irq(&file->ucontext->lock);
533
534err_unreg:
535 ib_dereg_mr(mr);
536
537err_up:
538 up(&ib_uverbs_idr_mutex);
539
540 ib_umem_release(file->device->ib_dev, &obj->umem);
541
542err_free:
543 kfree(obj);
544 return ret;
545}
546
547ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
548 const char __user *buf, int in_len,
549 int out_len)
550{
551 struct ib_uverbs_dereg_mr cmd;
552 struct ib_mr *mr;
553 struct ib_umem_object *memobj;
554 int ret = -EINVAL;
555
556 if (copy_from_user(&cmd, buf, sizeof cmd))
557 return -EFAULT;
558
559 down(&ib_uverbs_idr_mutex);
560
561 mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
562 if (!mr || mr->uobject->context != file->ucontext)
563 goto out;
564
565 memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
566
567 ret = ib_dereg_mr(mr);
568 if (ret)
569 goto out;
570
571 idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
572
573 spin_lock_irq(&file->ucontext->lock);
574 list_del(&memobj->uobject.list);
575 spin_unlock_irq(&file->ucontext->lock);
576
577 ib_umem_release(file->device->ib_dev, &memobj->umem);
578 kfree(memobj);
579
580out:
581 up(&ib_uverbs_idr_mutex);
582
583 return ret ? ret : in_len;
584}
585
586ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
587 const char __user *buf, int in_len,
588 int out_len)
589{
590 struct ib_uverbs_create_cq cmd;
591 struct ib_uverbs_create_cq_resp resp;
592 struct ib_udata udata;
593 struct ib_uobject *uobj;
594 struct ib_cq *cq;
595 int ret;
596
597 if (out_len < sizeof resp)
598 return -ENOSPC;
599
600 if (copy_from_user(&cmd, buf, sizeof cmd))
601 return -EFAULT;
602
603 INIT_UDATA(&udata, buf + sizeof cmd,
604 (unsigned long) cmd.response + sizeof resp,
605 in_len - sizeof cmd, out_len - sizeof resp);
606
607 if (cmd.event_handler >= file->device->num_comp)
608 return -EINVAL;
609
610 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
611 if (!uobj)
612 return -ENOMEM;
613
614 uobj->user_handle = cmd.user_handle;
615 uobj->context = file->ucontext;
616
617 cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
618 file->ucontext, &udata);
619 if (IS_ERR(cq)) {
620 ret = PTR_ERR(cq);
621 goto err;
622 }
623
624 cq->device = file->device->ib_dev;
625 cq->uobject = uobj;
626 cq->comp_handler = ib_uverbs_comp_handler;
627 cq->event_handler = ib_uverbs_cq_event_handler;
628 cq->cq_context = file;
629 atomic_set(&cq->usecnt, 0);
630
631retry:
632 if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
633 ret = -ENOMEM;
634 goto err_cq;
635 }
636
637 down(&ib_uverbs_idr_mutex);
638 ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
639 up(&ib_uverbs_idr_mutex);
640
641 if (ret == -EAGAIN)
642 goto retry;
643 if (ret)
644 goto err_cq;
645
646 spin_lock_irq(&file->ucontext->lock);
647 list_add_tail(&uobj->list, &file->ucontext->cq_list);
648 spin_unlock_irq(&file->ucontext->lock);
649
650 memset(&resp, 0, sizeof resp);
651 resp.cq_handle = uobj->id;
652 resp.cqe = cq->cqe;
653
654 if (copy_to_user((void __user *) (unsigned long) cmd.response,
655 &resp, sizeof resp)) {
656 ret = -EFAULT;
657 goto err_list;
658 }
659
660 return in_len;
661
662err_list:
663 spin_lock_irq(&file->ucontext->lock);
664 list_del(&uobj->list);
665 spin_unlock_irq(&file->ucontext->lock);
666
667 down(&ib_uverbs_idr_mutex);
668 idr_remove(&ib_uverbs_cq_idr, uobj->id);
669 up(&ib_uverbs_idr_mutex);
670
671err_cq:
672 ib_destroy_cq(cq);
673
674err:
675 kfree(uobj);
676 return ret;
677}
678
679ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
680 const char __user *buf, int in_len,
681 int out_len)
682{
683 struct ib_uverbs_destroy_cq cmd;
684 struct ib_cq *cq;
685 struct ib_uobject *uobj;
686 int ret = -EINVAL;
687
688 if (copy_from_user(&cmd, buf, sizeof cmd))
689 return -EFAULT;
690
691 down(&ib_uverbs_idr_mutex);
692
693 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
694 if (!cq || cq->uobject->context != file->ucontext)
695 goto out;
696
697 uobj = cq->uobject;
698
699 ret = ib_destroy_cq(cq);
700 if (ret)
701 goto out;
702
703 idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
704
705 spin_lock_irq(&file->ucontext->lock);
706 list_del(&uobj->list);
707 spin_unlock_irq(&file->ucontext->lock);
708
709 kfree(uobj);
710
711out:
712 up(&ib_uverbs_idr_mutex);
713
714 return ret ? ret : in_len;
715}
716
717ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
718 const char __user *buf, int in_len,
719 int out_len)
720{
721 struct ib_uverbs_create_qp cmd;
722 struct ib_uverbs_create_qp_resp resp;
723 struct ib_udata udata;
724 struct ib_uobject *uobj;
725 struct ib_pd *pd;
726 struct ib_cq *scq, *rcq;
727 struct ib_qp *qp;
728 struct ib_qp_init_attr attr;
729 int ret;
730
731 if (out_len < sizeof resp)
732 return -ENOSPC;
733
734 if (copy_from_user(&cmd, buf, sizeof cmd))
735 return -EFAULT;
736
737 INIT_UDATA(&udata, buf + sizeof cmd,
738 (unsigned long) cmd.response + sizeof resp,
739 in_len - sizeof cmd, out_len - sizeof resp);
740
741 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
742 if (!uobj)
743 return -ENOMEM;
744
745 down(&ib_uverbs_idr_mutex);
746
747 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
748 scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
749 rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
750
751 if (!pd || pd->uobject->context != file->ucontext ||
752 !scq || scq->uobject->context != file->ucontext ||
753 !rcq || rcq->uobject->context != file->ucontext) {
754 ret = -EINVAL;
755 goto err_up;
756 }
757
758 attr.event_handler = ib_uverbs_qp_event_handler;
759 attr.qp_context = file;
760 attr.send_cq = scq;
761 attr.recv_cq = rcq;
762 attr.srq = NULL;
763 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
764 attr.qp_type = cmd.qp_type;
765
766 attr.cap.max_send_wr = cmd.max_send_wr;
767 attr.cap.max_recv_wr = cmd.max_recv_wr;
768 attr.cap.max_send_sge = cmd.max_send_sge;
769 attr.cap.max_recv_sge = cmd.max_recv_sge;
770 attr.cap.max_inline_data = cmd.max_inline_data;
771
772 uobj->user_handle = cmd.user_handle;
773 uobj->context = file->ucontext;
774
775 qp = pd->device->create_qp(pd, &attr, &udata);
776 if (IS_ERR(qp)) {
777 ret = PTR_ERR(qp);
778 goto err_up;
779 }
780
781 qp->device = pd->device;
782 qp->pd = pd;
783 qp->send_cq = attr.send_cq;
784 qp->recv_cq = attr.recv_cq;
785 qp->srq = attr.srq;
786 qp->uobject = uobj;
787 qp->event_handler = attr.event_handler;
788 qp->qp_context = attr.qp_context;
789 qp->qp_type = attr.qp_type;
790 atomic_inc(&pd->usecnt);
791 atomic_inc(&attr.send_cq->usecnt);
792 atomic_inc(&attr.recv_cq->usecnt);
793 if (attr.srq)
794 atomic_inc(&attr.srq->usecnt);
795
796 memset(&resp, 0, sizeof resp);
797 resp.qpn = qp->qp_num;
798
799retry:
800 if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
801 ret = -ENOMEM;
802 goto err_destroy;
803 }
804
805 ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
806
807 if (ret == -EAGAIN)
808 goto retry;
809 if (ret)
810 goto err_destroy;
811
812 resp.qp_handle = uobj->id;
813
814 spin_lock_irq(&file->ucontext->lock);
815 list_add_tail(&uobj->list, &file->ucontext->qp_list);
816 spin_unlock_irq(&file->ucontext->lock);
817
818 if (copy_to_user((void __user *) (unsigned long) cmd.response,
819 &resp, sizeof resp)) {
820 ret = -EFAULT;
821 goto err_list;
822 }
823
824 up(&ib_uverbs_idr_mutex);
825
826 return in_len;
827
828err_list:
829 spin_lock_irq(&file->ucontext->lock);
830 list_del(&uobj->list);
831 spin_unlock_irq(&file->ucontext->lock);
832
833err_destroy:
834 ib_destroy_qp(qp);
835
836err_up:
837 up(&ib_uverbs_idr_mutex);
838
839 kfree(uobj);
840 return ret;
841}
842
843ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
844 const char __user *buf, int in_len,
845 int out_len)
846{
847 struct ib_uverbs_modify_qp cmd;
848 struct ib_qp *qp;
849 struct ib_qp_attr *attr;
850 int ret;
851
852 if (copy_from_user(&cmd, buf, sizeof cmd))
853 return -EFAULT;
854
855 attr = kmalloc(sizeof *attr, GFP_KERNEL);
856 if (!attr)
857 return -ENOMEM;
858
859 down(&ib_uverbs_idr_mutex);
860
861 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
862 if (!qp || qp->uobject->context != file->ucontext) {
863 ret = -EINVAL;
864 goto out;
865 }
866
867 attr->qp_state = cmd.qp_state;
868 attr->cur_qp_state = cmd.cur_qp_state;
869 attr->path_mtu = cmd.path_mtu;
870 attr->path_mig_state = cmd.path_mig_state;
871 attr->qkey = cmd.qkey;
872 attr->rq_psn = cmd.rq_psn;
873 attr->sq_psn = cmd.sq_psn;
874 attr->dest_qp_num = cmd.dest_qp_num;
875 attr->qp_access_flags = cmd.qp_access_flags;
876 attr->pkey_index = cmd.pkey_index;
877 attr->alt_pkey_index = cmd.pkey_index;
878 attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
879 attr->max_rd_atomic = cmd.max_rd_atomic;
880 attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
881 attr->min_rnr_timer = cmd.min_rnr_timer;
882 attr->port_num = cmd.port_num;
883 attr->timeout = cmd.timeout;
884 attr->retry_cnt = cmd.retry_cnt;
885 attr->rnr_retry = cmd.rnr_retry;
886 attr->alt_port_num = cmd.alt_port_num;
887 attr->alt_timeout = cmd.alt_timeout;
888
889 memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
890 attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
891 attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
892 attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
893 attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
894 attr->ah_attr.dlid = cmd.dest.dlid;
895 attr->ah_attr.sl = cmd.dest.sl;
896 attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
897 attr->ah_attr.static_rate = cmd.dest.static_rate;
898 attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
899 attr->ah_attr.port_num = cmd.dest.port_num;
900
901 memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
902 attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
903 attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
904 attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
905 attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
906 attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
907 attr->alt_ah_attr.sl = cmd.alt_dest.sl;
908 attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
909 attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
910 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
911 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
912
913 ret = ib_modify_qp(qp, attr, cmd.attr_mask);
914 if (ret)
915 goto out;
916
917 ret = in_len;
918
919out:
920 up(&ib_uverbs_idr_mutex);
921 kfree(attr);
922
923 return ret;
924}
925
926ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
927 const char __user *buf, int in_len,
928 int out_len)
929{
930 struct ib_uverbs_destroy_qp cmd;
931 struct ib_qp *qp;
932 struct ib_uobject *uobj;
933 int ret = -EINVAL;
934
935 if (copy_from_user(&cmd, buf, sizeof cmd))
936 return -EFAULT;
937
938 down(&ib_uverbs_idr_mutex);
939
940 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
941 if (!qp || qp->uobject->context != file->ucontext)
942 goto out;
943
944 uobj = qp->uobject;
945
946 ret = ib_destroy_qp(qp);
947 if (ret)
948 goto out;
949
950 idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
951
952 spin_lock_irq(&file->ucontext->lock);
953 list_del(&uobj->list);
954 spin_unlock_irq(&file->ucontext->lock);
955
956 kfree(uobj);
957
958out:
959 up(&ib_uverbs_idr_mutex);
960
961 return ret ? ret : in_len;
962}
963
964ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
965 const char __user *buf, int in_len,
966 int out_len)
967{
968 struct ib_uverbs_attach_mcast cmd;
969 struct ib_qp *qp;
970 int ret = -EINVAL;
971
972 if (copy_from_user(&cmd, buf, sizeof cmd))
973 return -EFAULT;
974
975 down(&ib_uverbs_idr_mutex);
976
977 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
978 if (qp && qp->uobject->context == file->ucontext)
979 ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
980
981 up(&ib_uverbs_idr_mutex);
982
983 return ret ? ret : in_len;
984}
985
986ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
987 const char __user *buf, int in_len,
988 int out_len)
989{
990 struct ib_uverbs_detach_mcast cmd;
991 struct ib_qp *qp;
992 int ret = -EINVAL;
993
994 if (copy_from_user(&cmd, buf, sizeof cmd))
995 return -EFAULT;
996
997 down(&ib_uverbs_idr_mutex);
998
999 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1000 if (qp && qp->uobject->context == file->ucontext)
1001 ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
1002
1003 up(&ib_uverbs_idr_mutex);
1004
1005 return ret ? ret : in_len;
1006}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
new file mode 100644
index 000000000000..fbbe03d8c901
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -0,0 +1,698 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
34 */
35
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/device.h>
39#include <linux/err.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/file.h>
43#include <linux/mount.h>
44
45#include <asm/uaccess.h>
46
47#include "uverbs.h"
48
49MODULE_AUTHOR("Roland Dreier");
50MODULE_DESCRIPTION("InfiniBand userspace verbs access");
51MODULE_LICENSE("Dual BSD/GPL");
52
53#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
54
55enum {
56 IB_UVERBS_MAJOR = 231,
57 IB_UVERBS_BASE_MINOR = 192,
58 IB_UVERBS_MAX_DEVICES = 32
59};
60
61#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
62
63DECLARE_MUTEX(ib_uverbs_idr_mutex);
64DEFINE_IDR(ib_uverbs_pd_idr);
65DEFINE_IDR(ib_uverbs_mr_idr);
66DEFINE_IDR(ib_uverbs_mw_idr);
67DEFINE_IDR(ib_uverbs_ah_idr);
68DEFINE_IDR(ib_uverbs_cq_idr);
69DEFINE_IDR(ib_uverbs_qp_idr);
70
71static spinlock_t map_lock;
72static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
73
74static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
75 const char __user *buf, int in_len,
76 int out_len) = {
77 [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
78 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
79 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
80 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
81 [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
82 [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
83 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
84 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
85 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
86 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
87 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
88 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
89 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
90 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
91 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
92 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
93 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
94};
95
96static struct vfsmount *uverbs_event_mnt;
97
98static void ib_uverbs_add_one(struct ib_device *device);
99static void ib_uverbs_remove_one(struct ib_device *device);
100
101static int ib_dealloc_ucontext(struct ib_ucontext *context)
102{
103 struct ib_uobject *uobj, *tmp;
104
105 if (!context)
106 return 0;
107
108 down(&ib_uverbs_idr_mutex);
109
110 /* XXX Free AHs */
111
112 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
113 struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
114 idr_remove(&ib_uverbs_qp_idr, uobj->id);
115 ib_destroy_qp(qp);
116 list_del(&uobj->list);
117 kfree(uobj);
118 }
119
120 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
121 struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
122 idr_remove(&ib_uverbs_cq_idr, uobj->id);
123 ib_destroy_cq(cq);
124 list_del(&uobj->list);
125 kfree(uobj);
126 }
127
128 /* XXX Free SRQs */
129 /* XXX Free MWs */
130
131 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
132 struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
133 struct ib_umem_object *memobj;
134
135 idr_remove(&ib_uverbs_mr_idr, uobj->id);
136 ib_dereg_mr(mr);
137
138 memobj = container_of(uobj, struct ib_umem_object, uobject);
139 ib_umem_release_on_close(mr->device, &memobj->umem);
140
141 list_del(&uobj->list);
142 kfree(memobj);
143 }
144
145 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
146 struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
147 idr_remove(&ib_uverbs_pd_idr, uobj->id);
148 ib_dealloc_pd(pd);
149 list_del(&uobj->list);
150 kfree(uobj);
151 }
152
153 up(&ib_uverbs_idr_mutex);
154
155 return context->device->dealloc_ucontext(context);
156}
157
158static void ib_uverbs_release_file(struct kref *ref)
159{
160 struct ib_uverbs_file *file =
161 container_of(ref, struct ib_uverbs_file, ref);
162
163 module_put(file->device->ib_dev->owner);
164 kfree(file);
165}
166
167static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
168 size_t count, loff_t *pos)
169{
170 struct ib_uverbs_event_file *file = filp->private_data;
171 void *event;
172 int eventsz;
173 int ret = 0;
174
175 spin_lock_irq(&file->lock);
176
177 while (list_empty(&file->event_list) && file->fd >= 0) {
178 spin_unlock_irq(&file->lock);
179
180 if (filp->f_flags & O_NONBLOCK)
181 return -EAGAIN;
182
183 if (wait_event_interruptible(file->poll_wait,
184 !list_empty(&file->event_list) ||
185 file->fd < 0))
186 return -ERESTARTSYS;
187
188 spin_lock_irq(&file->lock);
189 }
190
191 if (file->fd < 0) {
192 spin_unlock_irq(&file->lock);
193 return -ENODEV;
194 }
195
196 if (file->is_async) {
197 event = list_entry(file->event_list.next,
198 struct ib_uverbs_async_event, list);
199 eventsz = sizeof (struct ib_uverbs_async_event_desc);
200 } else {
201 event = list_entry(file->event_list.next,
202 struct ib_uverbs_comp_event, list);
203 eventsz = sizeof (struct ib_uverbs_comp_event_desc);
204 }
205
206 if (eventsz > count) {
207 ret = -EINVAL;
208 event = NULL;
209 } else
210 list_del(file->event_list.next);
211
212 spin_unlock_irq(&file->lock);
213
214 if (event) {
215 if (copy_to_user(buf, event, eventsz))
216 ret = -EFAULT;
217 else
218 ret = eventsz;
219 }
220
221 kfree(event);
222
223 return ret;
224}
225
226static unsigned int ib_uverbs_event_poll(struct file *filp,
227 struct poll_table_struct *wait)
228{
229 unsigned int pollflags = 0;
230 struct ib_uverbs_event_file *file = filp->private_data;
231
232 poll_wait(filp, &file->poll_wait, wait);
233
234 spin_lock_irq(&file->lock);
235 if (file->fd < 0)
236 pollflags = POLLERR;
237 else if (!list_empty(&file->event_list))
238 pollflags = POLLIN | POLLRDNORM;
239 spin_unlock_irq(&file->lock);
240
241 return pollflags;
242}
243
244static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
245{
246 struct list_head *entry, *tmp;
247
248 spin_lock_irq(&file->lock);
249 if (file->fd != -1) {
250 file->fd = -1;
251 list_for_each_safe(entry, tmp, &file->event_list)
252 if (file->is_async)
253 kfree(list_entry(entry, struct ib_uverbs_async_event, list));
254 else
255 kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
256 }
257 spin_unlock_irq(&file->lock);
258}
259
260static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
261{
262 struct ib_uverbs_event_file *file = filp->private_data;
263
264 ib_uverbs_event_release(file);
265 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
266
267 return 0;
268}
269
270static struct file_operations uverbs_event_fops = {
271 /*
272 * No .owner field since we artificially create event files,
273 * so there is no increment to the module reference count in
274 * the open path. All event files come from a uverbs command
275 * file, which already takes a module reference, so this is OK.
276 */
277 .read = ib_uverbs_event_read,
278 .poll = ib_uverbs_event_poll,
279 .release = ib_uverbs_event_close
280};
281
282void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
283{
284 struct ib_uverbs_file *file = cq_context;
285 struct ib_uverbs_comp_event *entry;
286 unsigned long flags;
287
288 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
289 if (!entry)
290 return;
291
292 entry->desc.cq_handle = cq->uobject->user_handle;
293
294 spin_lock_irqsave(&file->comp_file[0].lock, flags);
295 list_add_tail(&entry->list, &file->comp_file[0].event_list);
296 spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
297
298 wake_up_interruptible(&file->comp_file[0].poll_wait);
299}
300
301static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
302 __u64 element, __u64 event)
303{
304 struct ib_uverbs_async_event *entry;
305 unsigned long flags;
306
307 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
308 if (!entry)
309 return;
310
311 entry->desc.element = element;
312 entry->desc.event_type = event;
313
314 spin_lock_irqsave(&file->async_file.lock, flags);
315 list_add_tail(&entry->list, &file->async_file.event_list);
316 spin_unlock_irqrestore(&file->async_file.lock, flags);
317
318 wake_up_interruptible(&file->async_file.poll_wait);
319}
320
321void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
322{
323 ib_uverbs_async_handler(context_ptr,
324 event->element.cq->uobject->user_handle,
325 event->event);
326}
327
328void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
329{
330 ib_uverbs_async_handler(context_ptr,
331 event->element.qp->uobject->user_handle,
332 event->event);
333}
334
335static void ib_uverbs_event_handler(struct ib_event_handler *handler,
336 struct ib_event *event)
337{
338 struct ib_uverbs_file *file =
339 container_of(handler, struct ib_uverbs_file, event_handler);
340
341 ib_uverbs_async_handler(file, event->element.port_num, event->event);
342}
343
344static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
345 struct ib_uverbs_file *uverbs_file)
346{
347 struct file *filp;
348
349 spin_lock_init(&file->lock);
350 INIT_LIST_HEAD(&file->event_list);
351 init_waitqueue_head(&file->poll_wait);
352 file->uverbs_file = uverbs_file;
353
354 file->fd = get_unused_fd();
355 if (file->fd < 0)
356 return file->fd;
357
358 filp = get_empty_filp();
359 if (!filp) {
360 put_unused_fd(file->fd);
361 return -ENFILE;
362 }
363
364 filp->f_op = &uverbs_event_fops;
365 filp->f_vfsmnt = mntget(uverbs_event_mnt);
366 filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
367 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
368 filp->f_flags = O_RDONLY;
369 filp->f_mode = FMODE_READ;
370 filp->private_data = file;
371
372 fd_install(file->fd, filp);
373
374 return 0;
375}
376
377static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
378 size_t count, loff_t *pos)
379{
380 struct ib_uverbs_file *file = filp->private_data;
381 struct ib_uverbs_cmd_hdr hdr;
382
383 if (count < sizeof hdr)
384 return -EINVAL;
385
386 if (copy_from_user(&hdr, buf, sizeof hdr))
387 return -EFAULT;
388
389 if (hdr.in_words * 4 != count)
390 return -EINVAL;
391
392 if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
393 return -EINVAL;
394
395 if (!file->ucontext &&
396 hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
397 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
398 return -EINVAL;
399
400 return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
401 hdr.in_words * 4, hdr.out_words * 4);
402}
403
404static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
405{
406 struct ib_uverbs_file *file = filp->private_data;
407
408 if (!file->ucontext)
409 return -ENODEV;
410 else
411 return file->device->ib_dev->mmap(file->ucontext, vma);
412}
413
414static int ib_uverbs_open(struct inode *inode, struct file *filp)
415{
416 struct ib_uverbs_device *dev =
417 container_of(inode->i_cdev, struct ib_uverbs_device, dev);
418 struct ib_uverbs_file *file;
419 int i = 0;
420 int ret;
421
422 if (!try_module_get(dev->ib_dev->owner))
423 return -ENODEV;
424
425 file = kmalloc(sizeof *file +
426 (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
427 GFP_KERNEL);
428 if (!file)
429 return -ENOMEM;
430
431 file->device = dev;
432 kref_init(&file->ref);
433
434 file->ucontext = NULL;
435
436 ret = ib_uverbs_event_init(&file->async_file, file);
437 if (ret)
438 goto err;
439
440 file->async_file.is_async = 1;
441
442 kref_get(&file->ref);
443
444 for (i = 0; i < dev->num_comp; ++i) {
445 ret = ib_uverbs_event_init(&file->comp_file[i], file);
446 if (ret)
447 goto err_async;
448 kref_get(&file->ref);
449 file->comp_file[i].is_async = 0;
450 }
451
452
453 filp->private_data = file;
454
455 INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
456 ib_uverbs_event_handler);
457 if (ib_register_event_handler(&file->event_handler))
458 goto err_async;
459
460 return 0;
461
462err_async:
463 while (i--)
464 ib_uverbs_event_release(&file->comp_file[i]);
465
466 ib_uverbs_event_release(&file->async_file);
467
468err:
469 kref_put(&file->ref, ib_uverbs_release_file);
470
471 return ret;
472}
473
474static int ib_uverbs_close(struct inode *inode, struct file *filp)
475{
476 struct ib_uverbs_file *file = filp->private_data;
477 int i;
478
479 ib_unregister_event_handler(&file->event_handler);
480 ib_uverbs_event_release(&file->async_file);
481 ib_dealloc_ucontext(file->ucontext);
482
483 for (i = 0; i < file->device->num_comp; ++i)
484 ib_uverbs_event_release(&file->comp_file[i]);
485
486 kref_put(&file->ref, ib_uverbs_release_file);
487
488 return 0;
489}
490
491static struct file_operations uverbs_fops = {
492 .owner = THIS_MODULE,
493 .write = ib_uverbs_write,
494 .open = ib_uverbs_open,
495 .release = ib_uverbs_close
496};
497
498static struct file_operations uverbs_mmap_fops = {
499 .owner = THIS_MODULE,
500 .write = ib_uverbs_write,
501 .mmap = ib_uverbs_mmap,
502 .open = ib_uverbs_open,
503 .release = ib_uverbs_close
504};
505
506static struct ib_client uverbs_client = {
507 .name = "uverbs",
508 .add = ib_uverbs_add_one,
509 .remove = ib_uverbs_remove_one
510};
511
512static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
513{
514 struct ib_uverbs_device *dev =
515 container_of(class_dev, struct ib_uverbs_device, class_dev);
516
517 return sprintf(buf, "%s\n", dev->ib_dev->name);
518}
519static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
520
521static void ib_uverbs_release_class_dev(struct class_device *class_dev)
522{
523 struct ib_uverbs_device *dev =
524 container_of(class_dev, struct ib_uverbs_device, class_dev);
525
526 cdev_del(&dev->dev);
527 clear_bit(dev->devnum, dev_map);
528 kfree(dev);
529}
530
531static struct class uverbs_class = {
532 .name = "infiniband_verbs",
533 .release = ib_uverbs_release_class_dev
534};
535
536static ssize_t show_abi_version(struct class *class, char *buf)
537{
538 return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
539}
540static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
541
542static void ib_uverbs_add_one(struct ib_device *device)
543{
544 struct ib_uverbs_device *uverbs_dev;
545
546 if (!device->alloc_ucontext)
547 return;
548
549 uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
550 if (!uverbs_dev)
551 return;
552
553 memset(uverbs_dev, 0, sizeof *uverbs_dev);
554
555 spin_lock(&map_lock);
556 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
557 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
558 spin_unlock(&map_lock);
559 goto err;
560 }
561 set_bit(uverbs_dev->devnum, dev_map);
562 spin_unlock(&map_lock);
563
564 uverbs_dev->ib_dev = device;
565 uverbs_dev->num_comp = 1;
566
567 if (device->mmap)
568 cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
569 else
570 cdev_init(&uverbs_dev->dev, &uverbs_fops);
571 uverbs_dev->dev.owner = THIS_MODULE;
572 kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
573 if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
574 goto err;
575
576 uverbs_dev->class_dev.class = &uverbs_class;
577 uverbs_dev->class_dev.dev = device->dma_device;
578 uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
579 snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
580 if (class_device_register(&uverbs_dev->class_dev))
581 goto err_cdev;
582
583 if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
584 goto err_class;
585
586 ib_set_client_data(device, &uverbs_client, uverbs_dev);
587
588 return;
589
590err_class:
591 class_device_unregister(&uverbs_dev->class_dev);
592
593err_cdev:
594 cdev_del(&uverbs_dev->dev);
595 clear_bit(uverbs_dev->devnum, dev_map);
596
597err:
598 kfree(uverbs_dev);
599 return;
600}
601
602static void ib_uverbs_remove_one(struct ib_device *device)
603{
604 struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
605
606 if (!uverbs_dev)
607 return;
608
609 class_device_unregister(&uverbs_dev->class_dev);
610}
611
612static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
613 const char *dev_name, void *data)
614{
615 return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
616 INFINIBANDEVENTFS_MAGIC);
617}
618
619static struct file_system_type uverbs_event_fs = {
620 /* No owner field so module can be unloaded */
621 .name = "infinibandeventfs",
622 .get_sb = uverbs_event_get_sb,
623 .kill_sb = kill_litter_super
624};
625
626static int __init ib_uverbs_init(void)
627{
628 int ret;
629
630 spin_lock_init(&map_lock);
631
632 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
633 "infiniband_verbs");
634 if (ret) {
635 printk(KERN_ERR "user_verbs: couldn't register device number\n");
636 goto out;
637 }
638
639 ret = class_register(&uverbs_class);
640 if (ret) {
641 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
642 goto out_chrdev;
643 }
644
645 ret = class_create_file(&uverbs_class, &class_attr_abi_version);
646 if (ret) {
647 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
648 goto out_class;
649 }
650
651 ret = register_filesystem(&uverbs_event_fs);
652 if (ret) {
653 printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
654 goto out_class;
655 }
656
657 uverbs_event_mnt = kern_mount(&uverbs_event_fs);
658 if (IS_ERR(uverbs_event_mnt)) {
659 ret = PTR_ERR(uverbs_event_mnt);
660 printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
661 goto out_fs;
662 }
663
664 ret = ib_register_client(&uverbs_client);
665 if (ret) {
666 printk(KERN_ERR "user_verbs: couldn't register client\n");
667 goto out_mnt;
668 }
669
670 return 0;
671
672out_mnt:
673 mntput(uverbs_event_mnt);
674
675out_fs:
676 unregister_filesystem(&uverbs_event_fs);
677
678out_class:
679 class_unregister(&uverbs_class);
680
681out_chrdev:
682 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
683
684out:
685 return ret;
686}
687
688static void __exit ib_uverbs_cleanup(void)
689{
690 ib_unregister_client(&uverbs_client);
691 mntput(uverbs_event_mnt);
692 unregister_filesystem(&uverbs_event_fs);
693 class_unregister(&uverbs_class);
694 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
695}
696
697module_init(ib_uverbs_init);
698module_exit(ib_uverbs_cleanup);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
new file mode 100644
index 000000000000..ed550f6595bd
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
34 */
35
36#include <linux/mm.h>
37#include <linux/dma-mapping.h>
38
39#include "uverbs.h"
40
41struct ib_umem_account_work {
42 struct work_struct work;
43 struct mm_struct *mm;
44 unsigned long diff;
45};
46
47
48static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
49{
50 struct ib_umem_chunk *chunk, *tmp;
51 int i;
52
53 list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
54 dma_unmap_sg(dev->dma_device, chunk->page_list,
55 chunk->nents, DMA_BIDIRECTIONAL);
56 for (i = 0; i < chunk->nents; ++i) {
57 if (umem->writable && dirty)
58 set_page_dirty_lock(chunk->page_list[i].page);
59 put_page(chunk->page_list[i].page);
60 }
61
62 kfree(chunk);
63 }
64}
65
66int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
67 void *addr, size_t size, int write)
68{
69 struct page **page_list;
70 struct ib_umem_chunk *chunk;
71 unsigned long locked;
72 unsigned long lock_limit;
73 unsigned long cur_base;
74 unsigned long npages;
75 int ret = 0;
76 int off;
77 int i;
78
79 if (!can_do_mlock())
80 return -EPERM;
81
82 page_list = (struct page **) __get_free_page(GFP_KERNEL);
83 if (!page_list)
84 return -ENOMEM;
85
86 mem->user_base = (unsigned long) addr;
87 mem->length = size;
88 mem->offset = (unsigned long) addr & ~PAGE_MASK;
89 mem->page_size = PAGE_SIZE;
90 mem->writable = write;
91
92 INIT_LIST_HEAD(&mem->chunk_list);
93
94 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
95
96 down_write(&current->mm->mmap_sem);
97
98 locked = npages + current->mm->locked_vm;
99 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
100
101 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
102 ret = -ENOMEM;
103 goto out;
104 }
105
106 cur_base = (unsigned long) addr & PAGE_MASK;
107
108 while (npages) {
109 ret = get_user_pages(current, current->mm, cur_base,
110 min_t(int, npages,
111 PAGE_SIZE / sizeof (struct page *)),
112 1, !write, page_list, NULL);
113
114 if (ret < 0)
115 goto out;
116
117 cur_base += ret * PAGE_SIZE;
118 npages -= ret;
119
120 off = 0;
121
122 while (ret) {
123 chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
124 min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
125 GFP_KERNEL);
126 if (!chunk) {
127 ret = -ENOMEM;
128 goto out;
129 }
130
131 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
132 for (i = 0; i < chunk->nents; ++i) {
133 chunk->page_list[i].page = page_list[i + off];
134 chunk->page_list[i].offset = 0;
135 chunk->page_list[i].length = PAGE_SIZE;
136 }
137
138 chunk->nmap = dma_map_sg(dev->dma_device,
139 &chunk->page_list[0],
140 chunk->nents,
141 DMA_BIDIRECTIONAL);
142 if (chunk->nmap <= 0) {
143 for (i = 0; i < chunk->nents; ++i)
144 put_page(chunk->page_list[i].page);
145 kfree(chunk);
146
147 ret = -ENOMEM;
148 goto out;
149 }
150
151 ret -= chunk->nents;
152 off += chunk->nents;
153 list_add_tail(&chunk->list, &mem->chunk_list);
154 }
155
156 ret = 0;
157 }
158
159out:
160 if (ret < 0)
161 __ib_umem_release(dev, mem, 0);
162 else
163 current->mm->locked_vm = locked;
164
165 up_write(&current->mm->mmap_sem);
166 free_page((unsigned long) page_list);
167
168 return ret;
169}
170
171void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
172{
173 __ib_umem_release(dev, umem, 1);
174
175 down_write(&current->mm->mmap_sem);
176 current->mm->locked_vm -=
177 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
178 up_write(&current->mm->mmap_sem);
179}
180
181static void ib_umem_account(void *work_ptr)
182{
183 struct ib_umem_account_work *work = work_ptr;
184
185 down_write(&work->mm->mmap_sem);
186 work->mm->locked_vm -= work->diff;
187 up_write(&work->mm->mmap_sem);
188 mmput(work->mm);
189 kfree(work);
190}
191
192void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
193{
194 struct ib_umem_account_work *work;
195 struct mm_struct *mm;
196
197 __ib_umem_release(dev, umem, 1);
198
199 mm = get_task_mm(current);
200 if (!mm)
201 return;
202
203 /*
204 * We may be called with the mm's mmap_sem already held. This
205 * can happen when a userspace munmap() is the call that drops
206 * the last reference to our file and calls our release
207 * method. If there are memory regions to destroy, we'll end
208 * up here and not be able to take the mmap_sem. Therefore we
209 * defer the vm_locked accounting to the system workqueue.
210 */
211
212 work = kmalloc(sizeof *work, GFP_KERNEL);
213 if (!work)
214 return;
215
216 INIT_WORK(&work->work, ib_umem_account, work);
217 work->mm = mm;
218 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
219
220 schedule_work(&work->work);
221}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7c08ed0cd7dd..2516f9646515 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -4,6 +4,7 @@
4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Cisco Systems. All rights reserved.
7 * 8 *
8 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU 10 * licenses. You may choose to be licensed under the terms of the GNU
@@ -47,10 +48,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
47{ 48{
48 struct ib_pd *pd; 49 struct ib_pd *pd;
49 50
50 pd = device->alloc_pd(device); 51 pd = device->alloc_pd(device, NULL, NULL);
51 52
52 if (!IS_ERR(pd)) { 53 if (!IS_ERR(pd)) {
53 pd->device = device; 54 pd->device = device;
55 pd->uobject = NULL;
54 atomic_set(&pd->usecnt, 0); 56 atomic_set(&pd->usecnt, 0);
55 } 57 }
56 58
@@ -76,8 +78,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
76 ah = pd->device->create_ah(pd, ah_attr); 78 ah = pd->device->create_ah(pd, ah_attr);
77 79
78 if (!IS_ERR(ah)) { 80 if (!IS_ERR(ah)) {
79 ah->device = pd->device; 81 ah->device = pd->device;
80 ah->pd = pd; 82 ah->pd = pd;
83 ah->uobject = NULL;
81 atomic_inc(&pd->usecnt); 84 atomic_inc(&pd->usecnt);
82 } 85 }
83 86
@@ -122,7 +125,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
122{ 125{
123 struct ib_qp *qp; 126 struct ib_qp *qp;
124 127
125 qp = pd->device->create_qp(pd, qp_init_attr); 128 qp = pd->device->create_qp(pd, qp_init_attr, NULL);
126 129
127 if (!IS_ERR(qp)) { 130 if (!IS_ERR(qp)) {
128 qp->device = pd->device; 131 qp->device = pd->device;
@@ -130,6 +133,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
130 qp->send_cq = qp_init_attr->send_cq; 133 qp->send_cq = qp_init_attr->send_cq;
131 qp->recv_cq = qp_init_attr->recv_cq; 134 qp->recv_cq = qp_init_attr->recv_cq;
132 qp->srq = qp_init_attr->srq; 135 qp->srq = qp_init_attr->srq;
136 qp->uobject = NULL;
133 qp->event_handler = qp_init_attr->event_handler; 137 qp->event_handler = qp_init_attr->event_handler;
134 qp->qp_context = qp_init_attr->qp_context; 138 qp->qp_context = qp_init_attr->qp_context;
135 qp->qp_type = qp_init_attr->qp_type; 139 qp->qp_type = qp_init_attr->qp_type;
@@ -197,10 +201,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
197{ 201{
198 struct ib_cq *cq; 202 struct ib_cq *cq;
199 203
200 cq = device->create_cq(device, cqe); 204 cq = device->create_cq(device, cqe, NULL, NULL);
201 205
202 if (!IS_ERR(cq)) { 206 if (!IS_ERR(cq)) {
203 cq->device = device; 207 cq->device = device;
208 cq->uobject = NULL;
204 cq->comp_handler = comp_handler; 209 cq->comp_handler = comp_handler;
205 cq->event_handler = event_handler; 210 cq->event_handler = event_handler;
206 cq->cq_context = cq_context; 211 cq->cq_context = cq_context;
@@ -245,8 +250,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
245 mr = pd->device->get_dma_mr(pd, mr_access_flags); 250 mr = pd->device->get_dma_mr(pd, mr_access_flags);
246 251
247 if (!IS_ERR(mr)) { 252 if (!IS_ERR(mr)) {
248 mr->device = pd->device; 253 mr->device = pd->device;
249 mr->pd = pd; 254 mr->pd = pd;
255 mr->uobject = NULL;
250 atomic_inc(&pd->usecnt); 256 atomic_inc(&pd->usecnt);
251 atomic_set(&mr->usecnt, 0); 257 atomic_set(&mr->usecnt, 0);
252 } 258 }
@@ -267,8 +273,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
267 mr_access_flags, iova_start); 273 mr_access_flags, iova_start);
268 274
269 if (!IS_ERR(mr)) { 275 if (!IS_ERR(mr)) {
270 mr->device = pd->device; 276 mr->device = pd->device;
271 mr->pd = pd; 277 mr->pd = pd;
278 mr->uobject = NULL;
272 atomic_inc(&pd->usecnt); 279 atomic_inc(&pd->usecnt);
273 atomic_set(&mr->usecnt, 0); 280 atomic_set(&mr->usecnt, 0);
274 } 281 }
@@ -344,8 +351,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
344 351
345 mw = pd->device->alloc_mw(pd); 352 mw = pd->device->alloc_mw(pd);
346 if (!IS_ERR(mw)) { 353 if (!IS_ERR(mw)) {
347 mw->device = pd->device; 354 mw->device = pd->device;
348 mw->pd = pd; 355 mw->pd = pd;
356 mw->uobject = NULL;
349 atomic_inc(&pd->usecnt); 357 atomic_inc(&pd->usecnt);
350 } 358 }
351 359
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 766e9031ec45..b5aea7b869f6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -742,6 +743,7 @@ err_out:
742} 743}
743 744
744int mthca_init_cq(struct mthca_dev *dev, int nent, 745int mthca_init_cq(struct mthca_dev *dev, int nent,
746 struct mthca_ucontext *ctx, u32 pdn,
745 struct mthca_cq *cq) 747 struct mthca_cq *cq)
746{ 748{
747 int size = nent * MTHCA_CQ_ENTRY_SIZE; 749 int size = nent * MTHCA_CQ_ENTRY_SIZE;
@@ -753,30 +755,33 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
753 755
754 might_sleep(); 756 might_sleep();
755 757
756 cq->ibcq.cqe = nent - 1; 758 cq->ibcq.cqe = nent - 1;
759 cq->is_kernel = !ctx;
757 760
758 cq->cqn = mthca_alloc(&dev->cq_table.alloc); 761 cq->cqn = mthca_alloc(&dev->cq_table.alloc);
759 if (cq->cqn == -1) 762 if (cq->cqn == -1)
760 return -ENOMEM; 763 return -ENOMEM;
761 764
762 if (mthca_is_memfree(dev)) { 765 if (mthca_is_memfree(dev)) {
763 cq->arm_sn = 1;
764
765 err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); 766 err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
766 if (err) 767 if (err)
767 goto err_out; 768 goto err_out;
768 769
769 err = -ENOMEM; 770 if (cq->is_kernel) {
771 cq->arm_sn = 1;
772
773 err = -ENOMEM;
770 774
771 cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, 775 cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
772 cq->cqn, &cq->set_ci_db); 776 cq->cqn, &cq->set_ci_db);
773 if (cq->set_ci_db_index < 0) 777 if (cq->set_ci_db_index < 0)
774 goto err_out_icm; 778 goto err_out_icm;
775 779
776 cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, 780 cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
777 cq->cqn, &cq->arm_db); 781 cq->cqn, &cq->arm_db);
778 if (cq->arm_db_index < 0) 782 if (cq->arm_db_index < 0)
779 goto err_out_ci; 783 goto err_out_ci;
784 }
780 } 785 }
781 786
782 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 787 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -785,12 +790,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
785 790
786 cq_context = mailbox->buf; 791 cq_context = mailbox->buf;
787 792
788 err = mthca_alloc_cq_buf(dev, size, cq); 793 if (cq->is_kernel) {
789 if (err) 794 err = mthca_alloc_cq_buf(dev, size, cq);
790 goto err_out_mailbox; 795 if (err)
796 goto err_out_mailbox;
791 797
792 for (i = 0; i < nent; ++i) 798 for (i = 0; i < nent; ++i)
793 set_cqe_hw(get_cqe(cq, i)); 799 set_cqe_hw(get_cqe(cq, i));
800 }
794 801
795 spin_lock_init(&cq->lock); 802 spin_lock_init(&cq->lock);
796 atomic_set(&cq->refcount, 1); 803 atomic_set(&cq->refcount, 1);
@@ -801,11 +808,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
801 MTHCA_CQ_STATE_DISARMED | 808 MTHCA_CQ_STATE_DISARMED |
802 MTHCA_CQ_FLAG_TR); 809 MTHCA_CQ_FLAG_TR);
803 cq_context->start = cpu_to_be64(0); 810 cq_context->start = cpu_to_be64(0);
804 cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | 811 cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
805 dev->driver_uar.index); 812 if (ctx)
813 cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
814 else
815 cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
806 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); 816 cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
807 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); 817 cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
808 cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); 818 cq_context->pd = cpu_to_be32(pdn);
809 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); 819 cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
810 cq_context->cqn = cpu_to_be32(cq->cqn); 820 cq_context->cqn = cpu_to_be32(cq->cqn);
811 821
@@ -843,18 +853,20 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
843 return 0; 853 return 0;
844 854
845err_out_free_mr: 855err_out_free_mr:
846 mthca_free_mr(dev, &cq->mr); 856 if (cq->is_kernel) {
847 mthca_free_cq_buf(dev, cq); 857 mthca_free_mr(dev, &cq->mr);
858 mthca_free_cq_buf(dev, cq);
859 }
848 860
849err_out_mailbox: 861err_out_mailbox:
850 mthca_free_mailbox(dev, mailbox); 862 mthca_free_mailbox(dev, mailbox);
851 863
852err_out_arm: 864err_out_arm:
853 if (mthca_is_memfree(dev)) 865 if (cq->is_kernel && mthca_is_memfree(dev))
854 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); 866 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
855 867
856err_out_ci: 868err_out_ci:
857 if (mthca_is_memfree(dev)) 869 if (cq->is_kernel && mthca_is_memfree(dev))
858 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); 870 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
859 871
860err_out_icm: 872err_out_icm:
@@ -892,7 +904,8 @@ void mthca_free_cq(struct mthca_dev *dev,
892 int j; 904 int j;
893 905
894 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", 906 printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
895 cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); 907 cq->cqn, cq->cons_index,
908 cq->is_kernel ? !!next_cqe_sw(cq) : 0);
896 for (j = 0; j < 16; ++j) 909 for (j = 0; j < 16; ++j)
897 printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); 910 printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
898 } 911 }
@@ -910,12 +923,13 @@ void mthca_free_cq(struct mthca_dev *dev,
910 atomic_dec(&cq->refcount); 923 atomic_dec(&cq->refcount);
911 wait_event(cq->wait, !atomic_read(&cq->refcount)); 924 wait_event(cq->wait, !atomic_read(&cq->refcount));
912 925
913 mthca_free_mr(dev, &cq->mr); 926 if (cq->is_kernel) {
914 mthca_free_cq_buf(dev, cq); 927 mthca_free_mr(dev, &cq->mr);
915 928 mthca_free_cq_buf(dev, cq);
916 if (mthca_is_memfree(dev)) { 929 if (mthca_is_memfree(dev)) {
917 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); 930 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
918 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); 931 mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
932 }
919 } 933 }
920 934
921 mthca_table_put(dev, dev->cq_table.table, cq->cqn); 935 mthca_table_put(dev, dev->cq_table.table, cq->cqn);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 4127f09dc5ec..5ecdd2eeeb0f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -378,7 +379,7 @@ void mthca_unregister_device(struct mthca_dev *dev);
378int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); 379int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
379void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); 380void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
380 381
381int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); 382int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
382void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); 383void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
383 384
384struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); 385struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
@@ -413,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
413int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); 414int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
414int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); 415int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
415int mthca_init_cq(struct mthca_dev *dev, int nent, 416int mthca_init_cq(struct mthca_dev *dev, int nent,
417 struct mthca_ucontext *ctx, u32 pdn,
416 struct mthca_cq *cq); 418 struct mthca_cq *cq);
417void mthca_free_cq(struct mthca_dev *dev, 419void mthca_free_cq(struct mthca_dev *dev,
418 struct mthca_cq *cq); 420 struct mthca_cq *cq);
@@ -438,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
438 struct mthca_cq *recv_cq, 440 struct mthca_cq *recv_cq,
439 enum ib_qp_type type, 441 enum ib_qp_type type,
440 enum ib_sig_type send_policy, 442 enum ib_sig_type send_policy,
443 struct ib_qp_cap *cap,
441 struct mthca_qp *qp); 444 struct mthca_qp *qp);
442int mthca_alloc_sqp(struct mthca_dev *dev, 445int mthca_alloc_sqp(struct mthca_dev *dev,
443 struct mthca_pd *pd, 446 struct mthca_pd *pd,
444 struct mthca_cq *send_cq, 447 struct mthca_cq *send_cq,
445 struct mthca_cq *recv_cq, 448 struct mthca_cq *recv_cq,
446 enum ib_sig_type send_policy, 449 enum ib_sig_type send_policy,
450 struct ib_qp_cap *cap,
447 int qpn, 451 int qpn,
448 int port, 452 int port,
449 struct mthca_sqp *sqp); 453 struct mthca_sqp *sqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 09519b604c08..2ef916859e17 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -665,7 +665,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
665 goto err_pd_table_free; 665 goto err_pd_table_free;
666 } 666 }
667 667
668 err = mthca_pd_alloc(dev, &dev->driver_pd); 668 err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
669 if (err) { 669 if (err) {
670 mthca_err(dev, "Failed to create driver PD, " 670 mthca_err(dev, "Failed to create driver PD, "
671 "aborting.\n"); 671 "aborting.\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 6d3b05dd9e3f..2a8646150355 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +48,15 @@ enum {
47 MTHCA_TABLE_CHUNK_SIZE = 1 << 18 48 MTHCA_TABLE_CHUNK_SIZE = 1 << 18
48}; 49};
49 50
51struct mthca_user_db_table {
52 struct semaphore mutex;
53 struct {
54 u64 uvirt;
55 struct scatterlist mem;
56 int refcount;
57 } page[0];
58};
59
50void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) 60void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
51{ 61{
52 struct mthca_icm_chunk *chunk, *tmp; 62 struct mthca_icm_chunk *chunk, *tmp;
@@ -344,13 +354,133 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
344 kfree(table); 354 kfree(table);
345} 355}
346 356
347static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) 357static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
348{ 358{
349 return dev->uar_table.uarc_base + 359 return dev->uar_table.uarc_base +
350 dev->driver_uar.index * dev->uar_table.uarc_size + 360 uar->index * dev->uar_table.uarc_size +
351 page * 4096; 361 page * 4096;
352} 362}
353 363
364int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
365 struct mthca_user_db_table *db_tab, int index, u64 uaddr)
366{
367 int ret = 0;
368 u8 status;
369 int i;
370
371 if (!mthca_is_memfree(dev))
372 return 0;
373
374 if (index < 0 || index > dev->uar_table.uarc_size / 8)
375 return -EINVAL;
376
377 down(&db_tab->mutex);
378
379 i = index / MTHCA_DB_REC_PER_PAGE;
380
381 if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
382 (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
383 (uaddr & 4095)) {
384 ret = -EINVAL;
385 goto out;
386 }
387
388 if (db_tab->page[i].refcount) {
389 ++db_tab->page[i].refcount;
390 goto out;
391 }
392
393 ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
394 &db_tab->page[i].mem.page, NULL);
395 if (ret < 0)
396 goto out;
397
398 db_tab->page[i].mem.length = 4096;
399 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
400
401 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
402 if (ret < 0) {
403 put_page(db_tab->page[i].mem.page);
404 goto out;
405 }
406
407 ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
408 mthca_uarc_virt(dev, uar, i), &status);
409 if (!ret && status)
410 ret = -EINVAL;
411 if (ret) {
412 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
413 put_page(db_tab->page[i].mem.page);
414 goto out;
415 }
416
417 db_tab->page[i].uvirt = uaddr;
418 db_tab->page[i].refcount = 1;
419
420out:
421 up(&db_tab->mutex);
422 return ret;
423}
424
425void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
426 struct mthca_user_db_table *db_tab, int index)
427{
428 if (!mthca_is_memfree(dev))
429 return;
430
431 /*
432 * To make our bookkeeping simpler, we don't unmap DB
433 * pages until we clean up the whole db table.
434 */
435
436 down(&db_tab->mutex);
437
438 --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
439
440 up(&db_tab->mutex);
441}
442
443struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
444{
445 struct mthca_user_db_table *db_tab;
446 int npages;
447 int i;
448
449 if (!mthca_is_memfree(dev))
450 return NULL;
451
452 npages = dev->uar_table.uarc_size / 4096;
453 db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
454 if (!db_tab)
455 return ERR_PTR(-ENOMEM);
456
457 init_MUTEX(&db_tab->mutex);
458 for (i = 0; i < npages; ++i) {
459 db_tab->page[i].refcount = 0;
460 db_tab->page[i].uvirt = 0;
461 }
462
463 return db_tab;
464}
465
466void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
467 struct mthca_user_db_table *db_tab)
468{
469 int i;
470 u8 status;
471
472 if (!mthca_is_memfree(dev))
473 return;
474
475 for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
476 if (db_tab->page[i].uvirt) {
477 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
478 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
479 put_page(db_tab->page[i].mem.page);
480 }
481 }
482}
483
354int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) 484int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
355{ 485{
356 int group; 486 int group;
@@ -407,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
407 } 537 }
408 memset(page->db_rec, 0, 4096); 538 memset(page->db_rec, 0, 4096);
409 539
410 ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); 540 ret = mthca_MAP_ICM_page(dev, page->mapping,
541 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
411 if (!ret && status) 542 if (!ret && status)
412 ret = -EINVAL; 543 ret = -EINVAL;
413 if (ret) { 544 if (ret) {
@@ -461,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
461 592
462 if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && 593 if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
463 i >= dev->db_tab->max_group1 - 1) { 594 i >= dev->db_tab->max_group1 - 1) {
464 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); 595 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
465 596
466 dma_free_coherent(&dev->pdev->dev, 4096, 597 dma_free_coherent(&dev->pdev->dev, 4096,
467 page->db_rec, page->mapping); 598 page->db_rec, page->mapping);
@@ -530,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
530 if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) 661 if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
531 mthca_warn(dev, "Kernel UARC page %d not empty\n", i); 662 mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
532 663
533 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); 664 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
534 665
535 dma_free_coherent(&dev->pdev->dev, 4096, 666 dma_free_coherent(&dev->pdev->dev, 4096,
536 dev->db_tab->page[i].db_rec, 667 dev->db_tab->page[i].db_rec,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index fe7be2a6bc4a..4761d844cb5f 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -148,7 +149,7 @@ struct mthca_db_table {
148 struct semaphore mutex; 149 struct semaphore mutex;
149}; 150};
150 151
151enum { 152enum mthca_db_type {
152 MTHCA_DB_TYPE_INVALID = 0x0, 153 MTHCA_DB_TYPE_INVALID = 0x0,
153 MTHCA_DB_TYPE_CQ_SET_CI = 0x1, 154 MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
154 MTHCA_DB_TYPE_CQ_ARM = 0x2, 155 MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,6 +159,17 @@ enum {
158 MTHCA_DB_TYPE_GROUP_SEP = 0x7 159 MTHCA_DB_TYPE_GROUP_SEP = 0x7
159}; 160};
160 161
162struct mthca_user_db_table;
163struct mthca_uar;
164
165int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
166 struct mthca_user_db_table *db_tab, int index, u64 uaddr);
167void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
168 struct mthca_user_db_table *db_tab, int index);
169struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
170void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
171 struct mthca_user_db_table *db_tab);
172
161int mthca_init_db_tab(struct mthca_dev *dev); 173int mthca_init_db_tab(struct mthca_dev *dev);
162void mthca_cleanup_db_tab(struct mthca_dev *dev); 174void mthca_cleanup_db_tab(struct mthca_dev *dev);
163int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); 175int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index ea66847e4ea3..c2c899844e98 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -37,23 +38,27 @@
37 38
38#include "mthca_dev.h" 39#include "mthca_dev.h"
39 40
40int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) 41int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
41{ 42{
42 int err; 43 int err = 0;
43 44
44 might_sleep(); 45 might_sleep();
45 46
47 pd->privileged = privileged;
48
46 atomic_set(&pd->sqp_count, 0); 49 atomic_set(&pd->sqp_count, 0);
47 pd->pd_num = mthca_alloc(&dev->pd_table.alloc); 50 pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
48 if (pd->pd_num == -1) 51 if (pd->pd_num == -1)
49 return -ENOMEM; 52 return -ENOMEM;
50 53
51 err = mthca_mr_alloc_notrans(dev, pd->pd_num, 54 if (privileged) {
52 MTHCA_MPT_FLAG_LOCAL_READ | 55 err = mthca_mr_alloc_notrans(dev, pd->pd_num,
53 MTHCA_MPT_FLAG_LOCAL_WRITE, 56 MTHCA_MPT_FLAG_LOCAL_READ |
54 &pd->ntmr); 57 MTHCA_MPT_FLAG_LOCAL_WRITE,
55 if (err) 58 &pd->ntmr);
56 mthca_free(&dev->pd_table.alloc, pd->pd_num); 59 if (err)
60 mthca_free(&dev->pd_table.alloc, pd->pd_num);
61 }
57 62
58 return err; 63 return err;
59} 64}
@@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
61void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) 66void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
62{ 67{
63 might_sleep(); 68 might_sleep();
64 mthca_free_mr(dev, &pd->ntmr); 69 if (pd->privileged)
70 mthca_free_mr(dev, &pd->ntmr);
65 mthca_free(&dev->pd_table.alloc, pd->pd_num); 71 mthca_free(&dev->pd_table.alloc, pd->pd_num);
66} 72}
67 73
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0b5adfd91597..7a58ce90e179 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -34,9 +35,12 @@
34 */ 35 */
35 36
36#include <ib_smi.h> 37#include <ib_smi.h>
38#include <linux/mm.h>
37 39
38#include "mthca_dev.h" 40#include "mthca_dev.h"
39#include "mthca_cmd.h" 41#include "mthca_cmd.h"
42#include "mthca_user.h"
43#include "mthca_memfree.h"
40 44
41static int mthca_query_device(struct ib_device *ibdev, 45static int mthca_query_device(struct ib_device *ibdev,
42 struct ib_device_attr *props) 46 struct ib_device_attr *props)
@@ -284,7 +288,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
284 return err; 288 return err;
285} 289}
286 290
287static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) 291static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
292 struct ib_udata *udata)
293{
294 struct mthca_alloc_ucontext_resp uresp;
295 struct mthca_ucontext *context;
296 int err;
297
298 memset(&uresp, 0, sizeof uresp);
299
300 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
301 if (mthca_is_memfree(to_mdev(ibdev)))
302 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
303 else
304 uresp.uarc_size = 0;
305
306 context = kmalloc(sizeof *context, GFP_KERNEL);
307 if (!context)
308 return ERR_PTR(-ENOMEM);
309
310 err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
311 if (err) {
312 kfree(context);
313 return ERR_PTR(err);
314 }
315
316 context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
317 if (IS_ERR(context->db_tab)) {
318 err = PTR_ERR(context->db_tab);
319 mthca_uar_free(to_mdev(ibdev), &context->uar);
320 kfree(context);
321 return ERR_PTR(err);
322 }
323
324 if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
325 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
326 mthca_uar_free(to_mdev(ibdev), &context->uar);
327 kfree(context);
328 return ERR_PTR(-EFAULT);
329 }
330
331 return &context->ibucontext;
332}
333
334static int mthca_dealloc_ucontext(struct ib_ucontext *context)
335{
336 mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
337 to_mucontext(context)->db_tab);
338 mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
339 kfree(to_mucontext(context));
340
341 return 0;
342}
343
344static int mthca_mmap_uar(struct ib_ucontext *context,
345 struct vm_area_struct *vma)
346{
347 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
348 return -EINVAL;
349
350 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
351
352 if (remap_pfn_range(vma, vma->vm_start,
353 to_mucontext(context)->uar.pfn,
354 PAGE_SIZE, vma->vm_page_prot))
355 return -EAGAIN;
356
357 return 0;
358}
359
360static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
361 struct ib_ucontext *context,
362 struct ib_udata *udata)
288{ 363{
289 struct mthca_pd *pd; 364 struct mthca_pd *pd;
290 int err; 365 int err;
@@ -293,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
293 if (!pd) 368 if (!pd)
294 return ERR_PTR(-ENOMEM); 369 return ERR_PTR(-ENOMEM);
295 370
296 err = mthca_pd_alloc(to_mdev(ibdev), pd); 371 err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
297 if (err) { 372 if (err) {
298 kfree(pd); 373 kfree(pd);
299 return ERR_PTR(err); 374 return ERR_PTR(err);
300 } 375 }
301 376
377 if (context) {
378 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
379 mthca_pd_free(to_mdev(ibdev), pd);
380 kfree(pd);
381 return ERR_PTR(-EFAULT);
382 }
383 }
384
302 return &pd->ibpd; 385 return &pd->ibpd;
303} 386}
304 387
@@ -338,8 +421,10 @@ static int mthca_ah_destroy(struct ib_ah *ah)
338} 421}
339 422
340static struct ib_qp *mthca_create_qp(struct ib_pd *pd, 423static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
341 struct ib_qp_init_attr *init_attr) 424 struct ib_qp_init_attr *init_attr,
425 struct ib_udata *udata)
342{ 426{
427 struct mthca_create_qp ucmd;
343 struct mthca_qp *qp; 428 struct mthca_qp *qp;
344 int err; 429 int err;
345 430
@@ -348,41 +433,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
348 case IB_QPT_UC: 433 case IB_QPT_UC:
349 case IB_QPT_UD: 434 case IB_QPT_UD:
350 { 435 {
436 struct mthca_ucontext *context;
437
351 qp = kmalloc(sizeof *qp, GFP_KERNEL); 438 qp = kmalloc(sizeof *qp, GFP_KERNEL);
352 if (!qp) 439 if (!qp)
353 return ERR_PTR(-ENOMEM); 440 return ERR_PTR(-ENOMEM);
354 441
355 qp->sq.max = init_attr->cap.max_send_wr; 442 if (pd->uobject) {
356 qp->rq.max = init_attr->cap.max_recv_wr; 443 context = to_mucontext(pd->uobject->context);
357 qp->sq.max_gs = init_attr->cap.max_send_sge; 444
358 qp->rq.max_gs = init_attr->cap.max_recv_sge; 445 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
446 return ERR_PTR(-EFAULT);
447
448 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
449 context->db_tab,
450 ucmd.sq_db_index, ucmd.sq_db_page);
451 if (err) {
452 kfree(qp);
453 return ERR_PTR(err);
454 }
455
456 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
457 context->db_tab,
458 ucmd.rq_db_index, ucmd.rq_db_page);
459 if (err) {
460 mthca_unmap_user_db(to_mdev(pd->device),
461 &context->uar,
462 context->db_tab,
463 ucmd.sq_db_index);
464 kfree(qp);
465 return ERR_PTR(err);
466 }
467
468 qp->mr.ibmr.lkey = ucmd.lkey;
469 qp->sq.db_index = ucmd.sq_db_index;
470 qp->rq.db_index = ucmd.rq_db_index;
471 }
359 472
360 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), 473 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
361 to_mcq(init_attr->send_cq), 474 to_mcq(init_attr->send_cq),
362 to_mcq(init_attr->recv_cq), 475 to_mcq(init_attr->recv_cq),
363 init_attr->qp_type, init_attr->sq_sig_type, 476 init_attr->qp_type, init_attr->sq_sig_type,
364 qp); 477 &init_attr->cap, qp);
478
479 if (err && pd->uobject) {
480 context = to_mucontext(pd->uobject->context);
481
482 mthca_unmap_user_db(to_mdev(pd->device),
483 &context->uar,
484 context->db_tab,
485 ucmd.sq_db_index);
486 mthca_unmap_user_db(to_mdev(pd->device),
487 &context->uar,
488 context->db_tab,
489 ucmd.rq_db_index);
490 }
491
365 qp->ibqp.qp_num = qp->qpn; 492 qp->ibqp.qp_num = qp->qpn;
366 break; 493 break;
367 } 494 }
368 case IB_QPT_SMI: 495 case IB_QPT_SMI:
369 case IB_QPT_GSI: 496 case IB_QPT_GSI:
370 { 497 {
498 /* Don't allow userspace to create special QPs */
499 if (pd->uobject)
500 return ERR_PTR(-EINVAL);
501
371 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); 502 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
372 if (!qp) 503 if (!qp)
373 return ERR_PTR(-ENOMEM); 504 return ERR_PTR(-ENOMEM);
374 505
375 qp->sq.max = init_attr->cap.max_send_wr;
376 qp->rq.max = init_attr->cap.max_recv_wr;
377 qp->sq.max_gs = init_attr->cap.max_send_sge;
378 qp->rq.max_gs = init_attr->cap.max_recv_sge;
379
380 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 506 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
381 507
382 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), 508 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
383 to_mcq(init_attr->send_cq), 509 to_mcq(init_attr->send_cq),
384 to_mcq(init_attr->recv_cq), 510 to_mcq(init_attr->recv_cq),
385 init_attr->sq_sig_type, 511 init_attr->sq_sig_type, &init_attr->cap,
386 qp->ibqp.qp_num, init_attr->port_num, 512 qp->ibqp.qp_num, init_attr->port_num,
387 to_msqp(qp)); 513 to_msqp(qp));
388 break; 514 break;
@@ -397,42 +523,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
397 return ERR_PTR(err); 523 return ERR_PTR(err);
398 } 524 }
399 525
400 init_attr->cap.max_inline_data = 0; 526 init_attr->cap.max_inline_data = 0;
527 init_attr->cap.max_send_wr = qp->sq.max;
528 init_attr->cap.max_recv_wr = qp->rq.max;
529 init_attr->cap.max_send_sge = qp->sq.max_gs;
530 init_attr->cap.max_recv_sge = qp->rq.max_gs;
401 531
402 return &qp->ibqp; 532 return &qp->ibqp;
403} 533}
404 534
405static int mthca_destroy_qp(struct ib_qp *qp) 535static int mthca_destroy_qp(struct ib_qp *qp)
406{ 536{
537 if (qp->uobject) {
538 mthca_unmap_user_db(to_mdev(qp->device),
539 &to_mucontext(qp->uobject->context)->uar,
540 to_mucontext(qp->uobject->context)->db_tab,
541 to_mqp(qp)->sq.db_index);
542 mthca_unmap_user_db(to_mdev(qp->device),
543 &to_mucontext(qp->uobject->context)->uar,
544 to_mucontext(qp->uobject->context)->db_tab,
545 to_mqp(qp)->rq.db_index);
546 }
407 mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); 547 mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
408 kfree(qp); 548 kfree(qp);
409 return 0; 549 return 0;
410} 550}
411 551
412static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) 552static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
553 struct ib_ucontext *context,
554 struct ib_udata *udata)
413{ 555{
556 struct mthca_create_cq ucmd;
414 struct mthca_cq *cq; 557 struct mthca_cq *cq;
415 int nent; 558 int nent;
416 int err; 559 int err;
417 560
561 if (context) {
562 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
563 return ERR_PTR(-EFAULT);
564
565 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
566 to_mucontext(context)->db_tab,
567 ucmd.set_db_index, ucmd.set_db_page);
568 if (err)
569 return ERR_PTR(err);
570
571 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
572 to_mucontext(context)->db_tab,
573 ucmd.arm_db_index, ucmd.arm_db_page);
574 if (err)
575 goto err_unmap_set;
576 }
577
418 cq = kmalloc(sizeof *cq, GFP_KERNEL); 578 cq = kmalloc(sizeof *cq, GFP_KERNEL);
419 if (!cq) 579 if (!cq) {
420 return ERR_PTR(-ENOMEM); 580 err = -ENOMEM;
581 goto err_unmap_arm;
582 }
583
584 if (context) {
585 cq->mr.ibmr.lkey = ucmd.lkey;
586 cq->set_ci_db_index = ucmd.set_db_index;
587 cq->arm_db_index = ucmd.arm_db_index;
588 }
421 589
422 for (nent = 1; nent <= entries; nent <<= 1) 590 for (nent = 1; nent <= entries; nent <<= 1)
423 ; /* nothing */ 591 ; /* nothing */
424 592
425 err = mthca_init_cq(to_mdev(ibdev), nent, cq); 593 err = mthca_init_cq(to_mdev(ibdev), nent,
426 if (err) { 594 context ? to_mucontext(context) : NULL,
427 kfree(cq); 595 context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
428 cq = ERR_PTR(err); 596 cq);
597 if (err)
598 goto err_free;
599
600 if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
601 mthca_free_cq(to_mdev(ibdev), cq);
602 goto err_free;
429 } 603 }
430 604
431 return &cq->ibcq; 605 return &cq->ibcq;
606
607err_free:
608 kfree(cq);
609
610err_unmap_arm:
611 if (context)
612 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
613 to_mucontext(context)->db_tab, ucmd.arm_db_index);
614
615err_unmap_set:
616 if (context)
617 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
618 to_mucontext(context)->db_tab, ucmd.set_db_index);
619
620 return ERR_PTR(err);
432} 621}
433 622
434static int mthca_destroy_cq(struct ib_cq *cq) 623static int mthca_destroy_cq(struct ib_cq *cq)
435{ 624{
625 if (cq->uobject) {
626 mthca_unmap_user_db(to_mdev(cq->device),
627 &to_mucontext(cq->uobject->context)->uar,
628 to_mucontext(cq->uobject->context)->db_tab,
629 to_mcq(cq)->arm_db_index);
630 mthca_unmap_user_db(to_mdev(cq->device),
631 &to_mucontext(cq->uobject->context)->uar,
632 to_mucontext(cq->uobject->context)->db_tab,
633 to_mcq(cq)->set_ci_db_index);
634 }
436 mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); 635 mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
437 kfree(cq); 636 kfree(cq);
438 637
@@ -568,6 +767,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
568 return &mr->ibmr; 767 return &mr->ibmr;
569} 768}
570 769
770static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
771 int acc, struct ib_udata *udata)
772{
773 struct mthca_dev *dev = to_mdev(pd->device);
774 struct ib_umem_chunk *chunk;
775 struct mthca_mr *mr;
776 u64 *pages;
777 int shift, n, len;
778 int i, j, k;
779 int err = 0;
780
781 shift = ffs(region->page_size) - 1;
782
783 mr = kmalloc(sizeof *mr, GFP_KERNEL);
784 if (!mr)
785 return ERR_PTR(-ENOMEM);
786
787 n = 0;
788 list_for_each_entry(chunk, &region->chunk_list, list)
789 n += chunk->nents;
790
791 mr->mtt = mthca_alloc_mtt(dev, n);
792 if (IS_ERR(mr->mtt)) {
793 err = PTR_ERR(mr->mtt);
794 goto err;
795 }
796
797 pages = (u64 *) __get_free_page(GFP_KERNEL);
798 if (!pages) {
799 err = -ENOMEM;
800 goto err_mtt;
801 }
802
803 i = n = 0;
804
805 list_for_each_entry(chunk, &region->chunk_list, list)
806 for (j = 0; j < chunk->nmap; ++j) {
807 len = sg_dma_len(&chunk->page_list[j]) >> shift;
808 for (k = 0; k < len; ++k) {
809 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
810 region->page_size * k;
811 /*
812 * Be friendly to WRITE_MTT command
813 * and leave two empty slots for the
814 * index and reserved fields of the
815 * mailbox.
816 */
817 if (i == PAGE_SIZE / sizeof (u64) - 2) {
818 err = mthca_write_mtt(dev, mr->mtt,
819 n, pages, i);
820 if (err)
821 goto mtt_done;
822 n += i;
823 i = 0;
824 }
825 }
826 }
827
828 if (i)
829 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
830mtt_done:
831 free_page((unsigned long) pages);
832 if (err)
833 goto err_mtt;
834
835 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
836 region->length, convert_access(acc), mr);
837
838 if (err)
839 goto err_mtt;
840
841 return &mr->ibmr;
842
843err_mtt:
844 mthca_free_mtt(dev, mr->mtt);
845
846err:
847 kfree(mr);
848 return ERR_PTR(err);
849}
850
571static int mthca_dereg_mr(struct ib_mr *mr) 851static int mthca_dereg_mr(struct ib_mr *mr)
572{ 852{
573 struct mthca_mr *mmr = to_mmr(mr); 853 struct mthca_mr *mmr = to_mmr(mr);
@@ -692,6 +972,8 @@ int mthca_register_device(struct mthca_dev *dev)
692 int i; 972 int i;
693 973
694 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); 974 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
975 dev->ib_dev.owner = THIS_MODULE;
976
695 dev->ib_dev.node_type = IB_NODE_CA; 977 dev->ib_dev.node_type = IB_NODE_CA;
696 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 978 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
697 dev->ib_dev.dma_device = &dev->pdev->dev; 979 dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -701,6 +983,9 @@ int mthca_register_device(struct mthca_dev *dev)
701 dev->ib_dev.modify_port = mthca_modify_port; 983 dev->ib_dev.modify_port = mthca_modify_port;
702 dev->ib_dev.query_pkey = mthca_query_pkey; 984 dev->ib_dev.query_pkey = mthca_query_pkey;
703 dev->ib_dev.query_gid = mthca_query_gid; 985 dev->ib_dev.query_gid = mthca_query_gid;
986 dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
987 dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
988 dev->ib_dev.mmap = mthca_mmap_uar;
704 dev->ib_dev.alloc_pd = mthca_alloc_pd; 989 dev->ib_dev.alloc_pd = mthca_alloc_pd;
705 dev->ib_dev.dealloc_pd = mthca_dealloc_pd; 990 dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
706 dev->ib_dev.create_ah = mthca_ah_create; 991 dev->ib_dev.create_ah = mthca_ah_create;
@@ -713,6 +998,7 @@ int mthca_register_device(struct mthca_dev *dev)
713 dev->ib_dev.poll_cq = mthca_poll_cq; 998 dev->ib_dev.poll_cq = mthca_poll_cq;
714 dev->ib_dev.get_dma_mr = mthca_get_dma_mr; 999 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
715 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; 1000 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
1001 dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
716 dev->ib_dev.dereg_mr = mthca_dereg_mr; 1002 dev->ib_dev.dereg_mr = mthca_dereg_mr;
717 1003
718 if (dev->mthca_flags & MTHCA_FLAG_FMR) { 1004 if (dev->mthca_flags & MTHCA_FLAG_FMR) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 4d976cccb1a8..1d032791cc8b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -54,6 +55,14 @@ struct mthca_uar {
54 int index; 55 int index;
55}; 56};
56 57
58struct mthca_user_db_table;
59
60struct mthca_ucontext {
61 struct ib_ucontext ibucontext;
62 struct mthca_uar uar;
63 struct mthca_user_db_table *db_tab;
64};
65
57struct mthca_mtt; 66struct mthca_mtt;
58 67
59struct mthca_mr { 68struct mthca_mr {
@@ -83,6 +92,7 @@ struct mthca_pd {
83 u32 pd_num; 92 u32 pd_num;
84 atomic_t sqp_count; 93 atomic_t sqp_count;
85 struct mthca_mr ntmr; 94 struct mthca_mr ntmr;
95 int privileged;
86}; 96};
87 97
88struct mthca_eq { 98struct mthca_eq {
@@ -167,6 +177,7 @@ struct mthca_cq {
167 int cqn; 177 int cqn;
168 u32 cons_index; 178 u32 cons_index;
169 int is_direct; 179 int is_direct;
180 int is_kernel;
170 181
171 /* Next fields are Arbel only */ 182 /* Next fields are Arbel only */
172 int set_ci_db_index; 183 int set_ci_db_index;
@@ -236,6 +247,11 @@ struct mthca_sqp {
236 dma_addr_t header_dma; 247 dma_addr_t header_dma;
237}; 248};
238 249
250static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
251{
252 return container_of(ibucontext, struct mthca_ucontext, ibucontext);
253}
254
239static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) 255static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
240{ 256{
241 return container_of(ibmr, struct mthca_fmr, ibmr); 257 return container_of(ibmr, struct mthca_fmr, ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 163a8ef4186f..f7126b14d5ae 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 6 * licenses. You may choose to be licensed under the terms of the GNU
@@ -46,7 +47,9 @@ enum {
46 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, 47 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
47 MTHCA_ACK_REQ_FREQ = 10, 48 MTHCA_ACK_REQ_FREQ = 10,
48 MTHCA_FLIGHT_LIMIT = 9, 49 MTHCA_FLIGHT_LIMIT = 9,
49 MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ 50 MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
51 MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
52 MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
50}; 53};
51 54
52enum { 55enum {
@@ -689,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
689 692
690 /* leave arbel_sched_queue as 0 */ 693 /* leave arbel_sched_queue as 0 */
691 694
692 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); 695 if (qp->ibqp.uobject)
696 qp_context->usr_page =
697 cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
698 else
699 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
693 qp_context->local_qpn = cpu_to_be32(qp->qpn); 700 qp_context->local_qpn = cpu_to_be32(qp->qpn);
694 if (attr_mask & IB_QP_DEST_QPN) { 701 if (attr_mask & IB_QP_DEST_QPN) {
695 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 702 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -954,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
954 961
955 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 962 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
956 1 << qp->sq.wqe_shift); 963 1 << qp->sq.wqe_shift);
964
965 /*
966 * If this is a userspace QP, we don't actually have to
967 * allocate anything. All we need is to calculate the WQE
968 * sizes and the send_wqe_offset, so we're done now.
969 */
970 if (pd->ibpd.uobject)
971 return 0;
972
957 size = PAGE_ALIGN(qp->send_wqe_offset + 973 size = PAGE_ALIGN(qp->send_wqe_offset +
958 (qp->sq.max << qp->sq.wqe_shift)); 974 (qp->sq.max << qp->sq.wqe_shift));
959 975
@@ -1053,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
1053 return err; 1069 return err;
1054} 1070}
1055 1071
1056static int mthca_alloc_memfree(struct mthca_dev *dev, 1072static void mthca_free_wqe_buf(struct mthca_dev *dev,
1057 struct mthca_qp *qp) 1073 struct mthca_qp *qp)
1058{ 1074{
1059 int ret = 0; 1075 int i;
1076 int size = PAGE_ALIGN(qp->send_wqe_offset +
1077 (qp->sq.max << qp->sq.wqe_shift));
1078
1079 if (qp->is_direct) {
1080 dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
1081 pci_unmap_addr(&qp->queue.direct, mapping));
1082 } else {
1083 for (i = 0; i < size / PAGE_SIZE; ++i) {
1084 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1085 qp->queue.page_list[i].buf,
1086 pci_unmap_addr(&qp->queue.page_list[i],
1087 mapping));
1088 }
1089 }
1090
1091 kfree(qp->wrid);
1092}
1093
1094static int mthca_map_memfree(struct mthca_dev *dev,
1095 struct mthca_qp *qp)
1096{
1097 int ret;
1060 1098
1061 if (mthca_is_memfree(dev)) { 1099 if (mthca_is_memfree(dev)) {
1062 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); 1100 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1067,35 +1105,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev,
1067 if (ret) 1105 if (ret)
1068 goto err_qpc; 1106 goto err_qpc;
1069 1107
1070 ret = mthca_table_get(dev, dev->qp_table.rdb_table, 1108 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1071 qp->qpn << dev->qp_table.rdb_shift); 1109 qp->qpn << dev->qp_table.rdb_shift);
1072 if (ret) 1110 if (ret)
1073 goto err_eqpc; 1111 goto err_eqpc;
1074
1075 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1076 qp->qpn, &qp->rq.db);
1077 if (qp->rq.db_index < 0) {
1078 ret = -ENOMEM;
1079 goto err_rdb;
1080 }
1081 1112
1082 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1083 qp->qpn, &qp->sq.db);
1084 if (qp->sq.db_index < 0) {
1085 ret = -ENOMEM;
1086 goto err_rq_db;
1087 }
1088 } 1113 }
1089 1114
1090 return 0; 1115 return 0;
1091 1116
1092err_rq_db:
1093 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1094
1095err_rdb:
1096 mthca_table_put(dev, dev->qp_table.rdb_table,
1097 qp->qpn << dev->qp_table.rdb_shift);
1098
1099err_eqpc: 1117err_eqpc:
1100 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); 1118 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1101 1119
@@ -1105,6 +1123,35 @@ err_qpc:
1105 return ret; 1123 return ret;
1106} 1124}
1107 1125
1126static void mthca_unmap_memfree(struct mthca_dev *dev,
1127 struct mthca_qp *qp)
1128{
1129 mthca_table_put(dev, dev->qp_table.rdb_table,
1130 qp->qpn << dev->qp_table.rdb_shift);
1131 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1132 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1133}
1134
1135static int mthca_alloc_memfree(struct mthca_dev *dev,
1136 struct mthca_qp *qp)
1137{
1138 int ret = 0;
1139
1140 if (mthca_is_memfree(dev)) {
1141 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1142 qp->qpn, &qp->rq.db);
1143 if (qp->rq.db_index < 0)
1144 return ret;
1145
1146 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1147 qp->qpn, &qp->sq.db);
1148 if (qp->sq.db_index < 0)
1149 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1150 }
1151
1152 return ret;
1153}
1154
1108static void mthca_free_memfree(struct mthca_dev *dev, 1155static void mthca_free_memfree(struct mthca_dev *dev,
1109 struct mthca_qp *qp) 1156 struct mthca_qp *qp)
1110{ 1157{
@@ -1112,11 +1159,6 @@ static void mthca_free_memfree(struct mthca_dev *dev,
1112 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); 1159 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1113 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); 1160 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1114 } 1161 }
1115
1116 mthca_table_put(dev, dev->qp_table.rdb_table,
1117 qp->qpn << dev->qp_table.rdb_shift);
1118 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1119 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1120} 1162}
1121 1163
1122static void mthca_wq_init(struct mthca_wq* wq) 1164static void mthca_wq_init(struct mthca_wq* wq)
@@ -1147,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1147 mthca_wq_init(&qp->sq); 1189 mthca_wq_init(&qp->sq);
1148 mthca_wq_init(&qp->rq); 1190 mthca_wq_init(&qp->rq);
1149 1191
1150 ret = mthca_alloc_memfree(dev, qp); 1192 ret = mthca_map_memfree(dev, qp);
1151 if (ret) 1193 if (ret)
1152 return ret; 1194 return ret;
1153 1195
1154 ret = mthca_alloc_wqe_buf(dev, pd, qp); 1196 ret = mthca_alloc_wqe_buf(dev, pd, qp);
1155 if (ret) { 1197 if (ret) {
1156 mthca_free_memfree(dev, qp); 1198 mthca_unmap_memfree(dev, qp);
1199 return ret;
1200 }
1201
1202 /*
1203 * If this is a userspace QP, we're done now. The doorbells
1204 * will be allocated and buffers will be initialized in
1205 * userspace.
1206 */
1207 if (pd->ibpd.uobject)
1208 return 0;
1209
1210 ret = mthca_alloc_memfree(dev, qp);
1211 if (ret) {
1212 mthca_free_wqe_buf(dev, qp);
1213 mthca_unmap_memfree(dev, qp);
1157 return ret; 1214 return ret;
1158 } 1215 }
1159 1216
@@ -1186,22 +1243,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1186 return 0; 1243 return 0;
1187} 1244}
1188 1245
1189static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) 1246static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1247 struct mthca_qp *qp)
1190{ 1248{
1191 int i; 1249 /* Sanity check QP size before proceeding */
1192 1250 if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
1193 if (!mthca_is_memfree(dev)) 1251 cap->max_send_sge > 64 || cap->max_recv_sge > 64)
1194 return; 1252 return -EINVAL;
1195 1253
1196 for (i = 0; 1 << i < qp->rq.max; ++i) 1254 if (mthca_is_memfree(dev)) {
1197 ; /* nothing */ 1255 qp->rq.max = cap->max_recv_wr ?
1256 roundup_pow_of_two(cap->max_recv_wr) : 0;
1257 qp->sq.max = cap->max_send_wr ?
1258 roundup_pow_of_two(cap->max_send_wr) : 0;
1259 } else {
1260 qp->rq.max = cap->max_recv_wr;
1261 qp->sq.max = cap->max_send_wr;
1262 }
1198 1263
1199 qp->rq.max = 1 << i; 1264 qp->rq.max_gs = cap->max_recv_sge;
1265 qp->sq.max_gs = max_t(int, cap->max_send_sge,
1266 ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
1267 MTHCA_INLINE_CHUNK_SIZE) /
1268 sizeof (struct mthca_data_seg));
1200 1269
1201 for (i = 0; 1 << i < qp->sq.max; ++i) 1270 /*
1202 ; /* nothing */ 1271 * For MLX transport we need 2 extra S/G entries:
1272 * one for the header and one for the checksum at the end
1273 */
1274 if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
1275 qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
1276 return -EINVAL;
1203 1277
1204 qp->sq.max = 1 << i; 1278 return 0;
1205} 1279}
1206 1280
1207int mthca_alloc_qp(struct mthca_dev *dev, 1281int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1210,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1210 struct mthca_cq *recv_cq, 1284 struct mthca_cq *recv_cq,
1211 enum ib_qp_type type, 1285 enum ib_qp_type type,
1212 enum ib_sig_type send_policy, 1286 enum ib_sig_type send_policy,
1287 struct ib_qp_cap *cap,
1213 struct mthca_qp *qp) 1288 struct mthca_qp *qp)
1214{ 1289{
1215 int err; 1290 int err;
1216 1291
1217 mthca_align_qp_size(dev, qp); 1292 err = mthca_set_qp_size(dev, cap, qp);
1293 if (err)
1294 return err;
1218 1295
1219 switch (type) { 1296 switch (type) {
1220 case IB_QPT_RC: qp->transport = RC; break; 1297 case IB_QPT_RC: qp->transport = RC; break;
@@ -1247,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1247 struct mthca_cq *send_cq, 1324 struct mthca_cq *send_cq,
1248 struct mthca_cq *recv_cq, 1325 struct mthca_cq *recv_cq,
1249 enum ib_sig_type send_policy, 1326 enum ib_sig_type send_policy,
1327 struct ib_qp_cap *cap,
1250 int qpn, 1328 int qpn,
1251 int port, 1329 int port,
1252 struct mthca_sqp *sqp) 1330 struct mthca_sqp *sqp)
1253{ 1331{
1254 int err = 0;
1255 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; 1332 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1333 int err;
1256 1334
1257 mthca_align_qp_size(dev, &sqp->qp); 1335 err = mthca_set_qp_size(dev, cap, &sqp->qp);
1336 if (err)
1337 return err;
1258 1338
1259 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; 1339 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
1260 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, 1340 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1313,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev,
1313 struct mthca_qp *qp) 1393 struct mthca_qp *qp)
1314{ 1394{
1315 u8 status; 1395 u8 status;
1316 int size;
1317 int i;
1318 struct mthca_cq *send_cq; 1396 struct mthca_cq *send_cq;
1319 struct mthca_cq *recv_cq; 1397 struct mthca_cq *recv_cq;
1320 1398
@@ -1344,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev,
1344 if (qp->state != IB_QPS_RESET) 1422 if (qp->state != IB_QPS_RESET)
1345 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); 1423 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
1346 1424
1347 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); 1425 /*
1348 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1426 * If this is a userspace QP, the buffers, MR, CQs and so on
1349 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); 1427 * will be cleaned up in userspace, so all we have to do is
1350 1428 * unref the mem-free tables and free the QPN in our table.
1351 mthca_free_mr(dev, &qp->mr); 1429 */
1352 1430 if (!qp->ibqp.uobject) {
1353 size = PAGE_ALIGN(qp->send_wqe_offset + 1431 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
1354 (qp->sq.max << qp->sq.wqe_shift)); 1432 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
1433 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
1355 1434
1356 if (qp->is_direct) { 1435 mthca_free_mr(dev, &qp->mr);
1357 pci_free_consistent(dev->pdev, size, 1436 mthca_free_memfree(dev, qp);
1358 qp->queue.direct.buf, 1437 mthca_free_wqe_buf(dev, qp);
1359 pci_unmap_addr(&qp->queue.direct, mapping));
1360 } else {
1361 for (i = 0; i < size / PAGE_SIZE; ++i) {
1362 pci_free_consistent(dev->pdev, PAGE_SIZE,
1363 qp->queue.page_list[i].buf,
1364 pci_unmap_addr(&qp->queue.page_list[i],
1365 mapping));
1366 }
1367 } 1438 }
1368 1439
1369 kfree(qp->wrid); 1440 mthca_unmap_memfree(dev, qp);
1370
1371 mthca_free_memfree(dev, qp);
1372 1441
1373 if (is_sqp(dev, qp)) { 1442 if (is_sqp(dev, qp)) {
1374 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); 1443 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 000000000000..3024c1b4547d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,81 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#ifndef MTHCA_USER_H
36#define MTHCA_USER_H
37
38#include <linux/types.h>
39
40/*
41 * Make sure that all structs defined in this file remain laid out so
42 * that they pack the same way on 32-bit and 64-bit architectures (to
43 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
44 * In particular do not use pointer types -- pass pointers in __u64
45 * instead.
46 */
47
48struct mthca_alloc_ucontext_resp {
49 __u32 qp_tab_size;
50 __u32 uarc_size;
51};
52
53struct mthca_alloc_pd_resp {
54 __u32 pdn;
55 __u32 reserved;
56};
57
58struct mthca_create_cq {
59 __u32 lkey;
60 __u32 pdn;
61 __u64 arm_db_page;
62 __u64 set_db_page;
63 __u32 arm_db_index;
64 __u32 set_db_index;
65};
66
67struct mthca_create_cq_resp {
68 __u32 cqn;
69 __u32 reserved;
70};
71
72struct mthca_create_qp {
73 __u32 lkey;
74 __u32 reserved;
75 __u64 sq_db_page;
76 __u64 rq_db_page;
77 __u32 sq_db_index;
78 __u32 rq_db_index;
79};
80
81#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h
new file mode 100644
index 000000000000..7c613706af72
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_verbs.h
@@ -0,0 +1,389 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
34 */
35
36#ifndef IB_USER_VERBS_H
37#define IB_USER_VERBS_H
38
39#include <linux/types.h>
40
41/*
42 * Increment this value if any changes that break userspace ABI
43 * compatibility are made.
44 */
45#define IB_USER_VERBS_ABI_VERSION 1
46
47enum {
48 IB_USER_VERBS_CMD_QUERY_PARAMS,
49 IB_USER_VERBS_CMD_GET_CONTEXT,
50 IB_USER_VERBS_CMD_QUERY_DEVICE,
51 IB_USER_VERBS_CMD_QUERY_PORT,
52 IB_USER_VERBS_CMD_QUERY_GID,
53 IB_USER_VERBS_CMD_QUERY_PKEY,
54 IB_USER_VERBS_CMD_ALLOC_PD,
55 IB_USER_VERBS_CMD_DEALLOC_PD,
56 IB_USER_VERBS_CMD_CREATE_AH,
57 IB_USER_VERBS_CMD_MODIFY_AH,
58 IB_USER_VERBS_CMD_QUERY_AH,
59 IB_USER_VERBS_CMD_DESTROY_AH,
60 IB_USER_VERBS_CMD_REG_MR,
61 IB_USER_VERBS_CMD_REG_SMR,
62 IB_USER_VERBS_CMD_REREG_MR,
63 IB_USER_VERBS_CMD_QUERY_MR,
64 IB_USER_VERBS_CMD_DEREG_MR,
65 IB_USER_VERBS_CMD_ALLOC_MW,
66 IB_USER_VERBS_CMD_BIND_MW,
67 IB_USER_VERBS_CMD_DEALLOC_MW,
68 IB_USER_VERBS_CMD_CREATE_CQ,
69 IB_USER_VERBS_CMD_RESIZE_CQ,
70 IB_USER_VERBS_CMD_DESTROY_CQ,
71 IB_USER_VERBS_CMD_POLL_CQ,
72 IB_USER_VERBS_CMD_PEEK_CQ,
73 IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
74 IB_USER_VERBS_CMD_CREATE_QP,
75 IB_USER_VERBS_CMD_QUERY_QP,
76 IB_USER_VERBS_CMD_MODIFY_QP,
77 IB_USER_VERBS_CMD_DESTROY_QP,
78 IB_USER_VERBS_CMD_POST_SEND,
79 IB_USER_VERBS_CMD_POST_RECV,
80 IB_USER_VERBS_CMD_ATTACH_MCAST,
81 IB_USER_VERBS_CMD_DETACH_MCAST
82};
83
84/*
85 * Make sure that all structs defined in this file remain laid out so
86 * that they pack the same way on 32-bit and 64-bit architectures (to
87 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
88 * In particular do not use pointer types -- pass pointers in __u64
89 * instead.
90 */
91
92struct ib_uverbs_async_event_desc {
93 __u64 element;
94 __u32 event_type; /* enum ib_event_type */
95 __u32 reserved;
96};
97
98struct ib_uverbs_comp_event_desc {
99 __u64 cq_handle;
100};
101
102/*
103 * All commands from userspace should start with a __u32 command field
104 * followed by __u16 in_words and out_words fields (which give the
105 * length of the command block and response buffer if any in 32-bit
106 * words). The kernel driver will read these fields first and read
107 * the rest of the command struct based on these value.
108 */
109
110struct ib_uverbs_cmd_hdr {
111 __u32 command;
112 __u16 in_words;
113 __u16 out_words;
114};
115
116/*
117 * No driver_data for "query params" command, since this is intended
118 * to be a core function with no possible device dependence.
119 */
120struct ib_uverbs_query_params {
121 __u64 response;
122};
123
124struct ib_uverbs_query_params_resp {
125 __u32 num_cq_events;
126};
127
128struct ib_uverbs_get_context {
129 __u64 response;
130 __u64 cq_fd_tab;
131 __u64 driver_data[0];
132};
133
134struct ib_uverbs_get_context_resp {
135 __u32 async_fd;
136 __u32 reserved;
137};
138
139struct ib_uverbs_query_device {
140 __u64 response;
141 __u64 driver_data[0];
142};
143
144struct ib_uverbs_query_device_resp {
145 __u64 fw_ver;
146 __u64 node_guid;
147 __u64 sys_image_guid;
148 __u64 max_mr_size;
149 __u64 page_size_cap;
150 __u32 vendor_id;
151 __u32 vendor_part_id;
152 __u32 hw_ver;
153 __u32 max_qp;
154 __u32 max_qp_wr;
155 __u32 device_cap_flags;
156 __u32 max_sge;
157 __u32 max_sge_rd;
158 __u32 max_cq;
159 __u32 max_cqe;
160 __u32 max_mr;
161 __u32 max_pd;
162 __u32 max_qp_rd_atom;
163 __u32 max_ee_rd_atom;
164 __u32 max_res_rd_atom;
165 __u32 max_qp_init_rd_atom;
166 __u32 max_ee_init_rd_atom;
167 __u32 atomic_cap;
168 __u32 max_ee;
169 __u32 max_rdd;
170 __u32 max_mw;
171 __u32 max_raw_ipv6_qp;
172 __u32 max_raw_ethy_qp;
173 __u32 max_mcast_grp;
174 __u32 max_mcast_qp_attach;
175 __u32 max_total_mcast_qp_attach;
176 __u32 max_ah;
177 __u32 max_fmr;
178 __u32 max_map_per_fmr;
179 __u32 max_srq;
180 __u32 max_srq_wr;
181 __u32 max_srq_sge;
182 __u16 max_pkeys;
183 __u8 local_ca_ack_delay;
184 __u8 phys_port_cnt;
185 __u8 reserved[4];
186};
187
188struct ib_uverbs_query_port {
189 __u64 response;
190 __u8 port_num;
191 __u8 reserved[7];
192 __u64 driver_data[0];
193};
194
195struct ib_uverbs_query_port_resp {
196 __u32 port_cap_flags;
197 __u32 max_msg_sz;
198 __u32 bad_pkey_cntr;
199 __u32 qkey_viol_cntr;
200 __u32 gid_tbl_len;
201 __u16 pkey_tbl_len;
202 __u16 lid;
203 __u16 sm_lid;
204 __u8 state;
205 __u8 max_mtu;
206 __u8 active_mtu;
207 __u8 lmc;
208 __u8 max_vl_num;
209 __u8 sm_sl;
210 __u8 subnet_timeout;
211 __u8 init_type_reply;
212 __u8 active_width;
213 __u8 active_speed;
214 __u8 phys_state;
215 __u8 reserved[3];
216};
217
218struct ib_uverbs_query_gid {
219 __u64 response;
220 __u8 port_num;
221 __u8 index;
222 __u8 reserved[6];
223 __u64 driver_data[0];
224};
225
226struct ib_uverbs_query_gid_resp {
227 __u8 gid[16];
228};
229
230struct ib_uverbs_query_pkey {
231 __u64 response;
232 __u8 port_num;
233 __u8 index;
234 __u8 reserved[6];
235 __u64 driver_data[0];
236};
237
238struct ib_uverbs_query_pkey_resp {
239 __u16 pkey;
240 __u16 reserved;
241};
242
243struct ib_uverbs_alloc_pd {
244 __u64 response;
245 __u64 driver_data[0];
246};
247
248struct ib_uverbs_alloc_pd_resp {
249 __u32 pd_handle;
250};
251
252struct ib_uverbs_dealloc_pd {
253 __u32 pd_handle;
254};
255
256struct ib_uverbs_reg_mr {
257 __u64 response;
258 __u64 start;
259 __u64 length;
260 __u64 hca_va;
261 __u32 pd_handle;
262 __u32 access_flags;
263 __u64 driver_data[0];
264};
265
266struct ib_uverbs_reg_mr_resp {
267 __u32 mr_handle;
268 __u32 lkey;
269 __u32 rkey;
270};
271
272struct ib_uverbs_dereg_mr {
273 __u32 mr_handle;
274};
275
276struct ib_uverbs_create_cq {
277 __u64 response;
278 __u64 user_handle;
279 __u32 cqe;
280 __u32 event_handler;
281 __u64 driver_data[0];
282};
283
284struct ib_uverbs_create_cq_resp {
285 __u32 cq_handle;
286 __u32 cqe;
287};
288
289struct ib_uverbs_destroy_cq {
290 __u32 cq_handle;
291};
292
293struct ib_uverbs_create_qp {
294 __u64 response;
295 __u64 user_handle;
296 __u32 pd_handle;
297 __u32 send_cq_handle;
298 __u32 recv_cq_handle;
299 __u32 srq_handle;
300 __u32 max_send_wr;
301 __u32 max_recv_wr;
302 __u32 max_send_sge;
303 __u32 max_recv_sge;
304 __u32 max_inline_data;
305 __u8 sq_sig_all;
306 __u8 qp_type;
307 __u8 is_srq;
308 __u8 reserved;
309 __u64 driver_data[0];
310};
311
312struct ib_uverbs_create_qp_resp {
313 __u32 qp_handle;
314 __u32 qpn;
315};
316
317/*
318 * This struct needs to remain a multiple of 8 bytes to keep the
319 * alignment of the modify QP parameters.
320 */
321struct ib_uverbs_qp_dest {
322 __u8 dgid[16];
323 __u32 flow_label;
324 __u16 dlid;
325 __u16 reserved;
326 __u8 sgid_index;
327 __u8 hop_limit;
328 __u8 traffic_class;
329 __u8 sl;
330 __u8 src_path_bits;
331 __u8 static_rate;
332 __u8 is_global;
333 __u8 port_num;
334};
335
336struct ib_uverbs_modify_qp {
337 struct ib_uverbs_qp_dest dest;
338 struct ib_uverbs_qp_dest alt_dest;
339 __u32 qp_handle;
340 __u32 attr_mask;
341 __u32 qkey;
342 __u32 rq_psn;
343 __u32 sq_psn;
344 __u32 dest_qp_num;
345 __u32 qp_access_flags;
346 __u16 pkey_index;
347 __u16 alt_pkey_index;
348 __u8 qp_state;
349 __u8 cur_qp_state;
350 __u8 path_mtu;
351 __u8 path_mig_state;
352 __u8 en_sqd_async_notify;
353 __u8 max_rd_atomic;
354 __u8 max_dest_rd_atomic;
355 __u8 min_rnr_timer;
356 __u8 port_num;
357 __u8 timeout;
358 __u8 retry_cnt;
359 __u8 rnr_retry;
360 __u8 alt_port_num;
361 __u8 alt_timeout;
362 __u8 reserved[2];
363 __u64 driver_data[0];
364};
365
366struct ib_uverbs_modify_qp_resp {
367};
368
369struct ib_uverbs_destroy_qp {
370 __u32 qp_handle;
371};
372
373struct ib_uverbs_attach_mcast {
374 __u8 gid[16];
375 __u32 qp_handle;
376 __u16 mlid;
377 __u16 reserved;
378 __u64 driver_data[0];
379};
380
381struct ib_uverbs_detach_mcast {
382 __u8 gid[16];
383 __u32 qp_handle;
384 __u16 mlid;
385 __u16 reserved;
386 __u64 driver_data[0];
387};
388
389#endif /* IB_USER_VERBS_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h
index cf01f044a223..e5bd9a10c201 100644
--- a/drivers/infiniband/include/ib_verbs.h
+++ b/drivers/infiniband/include/ib_verbs.h
@@ -4,6 +4,7 @@
4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Cisco Systems. All rights reserved.
7 * 8 *
8 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU 10 * licenses. You may choose to be licensed under the terms of the GNU
@@ -41,7 +42,10 @@
41 42
42#include <linux/types.h> 43#include <linux/types.h>
43#include <linux/device.h> 44#include <linux/device.h>
45
44#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <asm/scatterlist.h>
48#include <asm/uaccess.h>
45 49
46union ib_gid { 50union ib_gid {
47 u8 raw[16]; 51 u8 raw[16];
@@ -544,7 +548,7 @@ struct ib_send_wr {
544 int num_sge; 548 int num_sge;
545 enum ib_wr_opcode opcode; 549 enum ib_wr_opcode opcode;
546 int send_flags; 550 int send_flags;
547 u32 imm_data; 551 __be32 imm_data;
548 union { 552 union {
549 struct { 553 struct {
550 u64 remote_addr; 554 u64 remote_addr;
@@ -618,29 +622,86 @@ struct ib_fmr_attr {
618 u8 page_size; 622 u8 page_size;
619}; 623};
620 624
625struct ib_ucontext {
626 struct ib_device *device;
627 struct list_head pd_list;
628 struct list_head mr_list;
629 struct list_head mw_list;
630 struct list_head cq_list;
631 struct list_head qp_list;
632 struct list_head srq_list;
633 struct list_head ah_list;
634 spinlock_t lock;
635};
636
637struct ib_uobject {
638 u64 user_handle; /* handle given to us by userspace */
639 struct ib_ucontext *context; /* associated user context */
640 struct list_head list; /* link to context's list */
641 u32 id; /* index into kernel idr */
642};
643
644struct ib_umem {
645 unsigned long user_base;
646 unsigned long virt_base;
647 size_t length;
648 int offset;
649 int page_size;
650 int writable;
651 struct list_head chunk_list;
652};
653
654struct ib_umem_chunk {
655 struct list_head list;
656 int nents;
657 int nmap;
658 struct scatterlist page_list[0];
659};
660
661struct ib_udata {
662 void __user *inbuf;
663 void __user *outbuf;
664 size_t inlen;
665 size_t outlen;
666};
667
668#define IB_UMEM_MAX_PAGE_CHUNK \
669 ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
670 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
671 (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
672
673struct ib_umem_object {
674 struct ib_uobject uobject;
675 struct ib_umem umem;
676};
677
621struct ib_pd { 678struct ib_pd {
622 struct ib_device *device; 679 struct ib_device *device;
623 atomic_t usecnt; /* count all resources */ 680 struct ib_uobject *uobject;
681 atomic_t usecnt; /* count all resources */
624}; 682};
625 683
626struct ib_ah { 684struct ib_ah {
627 struct ib_device *device; 685 struct ib_device *device;
628 struct ib_pd *pd; 686 struct ib_pd *pd;
687 struct ib_uobject *uobject;
629}; 688};
630 689
631typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 690typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
632 691
633struct ib_cq { 692struct ib_cq {
634 struct ib_device *device; 693 struct ib_device *device;
635 ib_comp_handler comp_handler; 694 struct ib_uobject *uobject;
636 void (*event_handler)(struct ib_event *, void *); 695 ib_comp_handler comp_handler;
637 void * cq_context; 696 void (*event_handler)(struct ib_event *, void *);
638 int cqe; 697 void * cq_context;
639 atomic_t usecnt; /* count number of work queues */ 698 int cqe;
699 atomic_t usecnt; /* count number of work queues */
640}; 700};
641 701
642struct ib_srq { 702struct ib_srq {
643 struct ib_device *device; 703 struct ib_device *device;
704 struct ib_uobject *uobject;
644 struct ib_pd *pd; 705 struct ib_pd *pd;
645 void *srq_context; 706 void *srq_context;
646 atomic_t usecnt; 707 atomic_t usecnt;
@@ -652,6 +713,7 @@ struct ib_qp {
652 struct ib_cq *send_cq; 713 struct ib_cq *send_cq;
653 struct ib_cq *recv_cq; 714 struct ib_cq *recv_cq;
654 struct ib_srq *srq; 715 struct ib_srq *srq;
716 struct ib_uobject *uobject;
655 void (*event_handler)(struct ib_event *, void *); 717 void (*event_handler)(struct ib_event *, void *);
656 void *qp_context; 718 void *qp_context;
657 u32 qp_num; 719 u32 qp_num;
@@ -659,16 +721,18 @@ struct ib_qp {
659}; 721};
660 722
661struct ib_mr { 723struct ib_mr {
662 struct ib_device *device; 724 struct ib_device *device;
663 struct ib_pd *pd; 725 struct ib_pd *pd;
664 u32 lkey; 726 struct ib_uobject *uobject;
665 u32 rkey; 727 u32 lkey;
666 atomic_t usecnt; /* count number of MWs */ 728 u32 rkey;
729 atomic_t usecnt; /* count number of MWs */
667}; 730};
668 731
669struct ib_mw { 732struct ib_mw {
670 struct ib_device *device; 733 struct ib_device *device;
671 struct ib_pd *pd; 734 struct ib_pd *pd;
735 struct ib_uobject *uobject;
672 u32 rkey; 736 u32 rkey;
673}; 737};
674 738
@@ -737,7 +801,14 @@ struct ib_device {
737 int (*modify_port)(struct ib_device *device, 801 int (*modify_port)(struct ib_device *device,
738 u8 port_num, int port_modify_mask, 802 u8 port_num, int port_modify_mask,
739 struct ib_port_modify *port_modify); 803 struct ib_port_modify *port_modify);
740 struct ib_pd * (*alloc_pd)(struct ib_device *device); 804 struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
805 struct ib_udata *udata);
806 int (*dealloc_ucontext)(struct ib_ucontext *context);
807 int (*mmap)(struct ib_ucontext *context,
808 struct vm_area_struct *vma);
809 struct ib_pd * (*alloc_pd)(struct ib_device *device,
810 struct ib_ucontext *context,
811 struct ib_udata *udata);
741 int (*dealloc_pd)(struct ib_pd *pd); 812 int (*dealloc_pd)(struct ib_pd *pd);
742 struct ib_ah * (*create_ah)(struct ib_pd *pd, 813 struct ib_ah * (*create_ah)(struct ib_pd *pd,
743 struct ib_ah_attr *ah_attr); 814 struct ib_ah_attr *ah_attr);
@@ -747,7 +818,8 @@ struct ib_device {
747 struct ib_ah_attr *ah_attr); 818 struct ib_ah_attr *ah_attr);
748 int (*destroy_ah)(struct ib_ah *ah); 819 int (*destroy_ah)(struct ib_ah *ah);
749 struct ib_qp * (*create_qp)(struct ib_pd *pd, 820 struct ib_qp * (*create_qp)(struct ib_pd *pd,
750 struct ib_qp_init_attr *qp_init_attr); 821 struct ib_qp_init_attr *qp_init_attr,
822 struct ib_udata *udata);
751 int (*modify_qp)(struct ib_qp *qp, 823 int (*modify_qp)(struct ib_qp *qp,
752 struct ib_qp_attr *qp_attr, 824 struct ib_qp_attr *qp_attr,
753 int qp_attr_mask); 825 int qp_attr_mask);
@@ -762,8 +834,9 @@ struct ib_device {
762 int (*post_recv)(struct ib_qp *qp, 834 int (*post_recv)(struct ib_qp *qp,
763 struct ib_recv_wr *recv_wr, 835 struct ib_recv_wr *recv_wr,
764 struct ib_recv_wr **bad_recv_wr); 836 struct ib_recv_wr **bad_recv_wr);
765 struct ib_cq * (*create_cq)(struct ib_device *device, 837 struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
766 int cqe); 838 struct ib_ucontext *context,
839 struct ib_udata *udata);
767 int (*destroy_cq)(struct ib_cq *cq); 840 int (*destroy_cq)(struct ib_cq *cq);
768 int (*resize_cq)(struct ib_cq *cq, int *cqe); 841 int (*resize_cq)(struct ib_cq *cq, int *cqe);
769 int (*poll_cq)(struct ib_cq *cq, int num_entries, 842 int (*poll_cq)(struct ib_cq *cq, int num_entries,
@@ -780,6 +853,10 @@ struct ib_device {
780 int num_phys_buf, 853 int num_phys_buf,
781 int mr_access_flags, 854 int mr_access_flags,
782 u64 *iova_start); 855 u64 *iova_start);
856 struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
857 struct ib_umem *region,
858 int mr_access_flags,
859 struct ib_udata *udata);
783 int (*query_mr)(struct ib_mr *mr, 860 int (*query_mr)(struct ib_mr *mr,
784 struct ib_mr_attr *mr_attr); 861 struct ib_mr_attr *mr_attr);
785 int (*dereg_mr)(struct ib_mr *mr); 862 int (*dereg_mr)(struct ib_mr *mr);
@@ -817,6 +894,7 @@ struct ib_device {
817 struct ib_mad *in_mad, 894 struct ib_mad *in_mad,
818 struct ib_mad *out_mad); 895 struct ib_mad *out_mad);
819 896
897 struct module *owner;
820 struct class_device class_dev; 898 struct class_device class_dev;
821 struct kobject ports_parent; 899 struct kobject ports_parent;
822 struct list_head port_list; 900 struct list_head port_list;
@@ -852,6 +930,16 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
852void ib_set_client_data(struct ib_device *device, struct ib_client *client, 930void ib_set_client_data(struct ib_device *device, struct ib_client *client,
853 void *data); 931 void *data);
854 932
933static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
934{
935 return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
936}
937
938static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
939{
940 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
941}
942
855int ib_register_event_handler (struct ib_event_handler *event_handler); 943int ib_register_event_handler (struct ib_event_handler *event_handler);
856int ib_unregister_event_handler(struct ib_event_handler *event_handler); 944int ib_unregister_event_handler(struct ib_event_handler *event_handler);
857void ib_dispatch_event(struct ib_event *event); 945void ib_dispatch_event(struct ib_event *event);