aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-03-04 19:15:11 -0500
committerRoland Dreier <rolandd@cisco.com>2007-05-08 21:00:37 -0400
commitf7c6a7b5d59980b076abbf2ceeb8735591290285 (patch)
tree29c35b47052bba87f031a4744d8ad12ff5187149 /drivers/infiniband
parent36f021b579d195cdc5fa6f3e2bab198b4bf70643 (diff)
IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
Export ib_umem_get()/ib_umem_release() and put low-level drivers in control of when to call ib_umem_get() to pin and DMA map userspace, rather than always calling it in ib_uverbs_reg_mr() before calling the low-level driver's reg_user_mr method. Also move these functions to be in the ib_core module instead of ib_uverbs, so that driver modules using them do not depend on ib_uverbs. This has a number of advantages: - It is better design from the standpoint of making generic code a library that can be used or overridden by device-specific code as the details of specific devices dictate. - Drivers that do not need to pin userspace memory regions do not need to take the performance hit of calling ib_mem_get(). For example, although I have not tried to implement it in this patch, the ipath driver should be able to avoid pinning memory and just use copy_{to,from}_user() to access userspace memory regions. - Buffers that need special mapping treatment can be identified by the low-level driver. For example, it may be possible to solve some Altix-specific memory ordering issues with mthca CQs in userspace by mapping CQ buffers with extra flags. - Drivers that need to pin and DMA map userspace memory for things other than memory regions can use ib_umem_get() directly, instead of hacks using extra parameters to their reg_phys_mr method. For example, the mlx4 driver that is pending being merged needs to pin and DMA map QP and CQ buffers, but it does not need to create a memory key for these buffers. So the cleanest solution is for mlx4 to call ib_umem_get() in the create_qp and create_cq methods. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig5
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/umem.c (renamed from drivers/infiniband/core/uverbs_mem.c)136
-rw-r--r--drivers/infiniband/core/uverbs.h6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c60
-rw-r--r--drivers/infiniband/core/uverbs_main.c11
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c42
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c28
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c69
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c38
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
18 files changed, 273 insertions, 178 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66b36de9fa6f..82afba5c0bf4 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
29 libibverbs, libibcm and a hardware driver library from 29 libibverbs, libibcm and a hardware driver library from
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32config INFINIBAND_USER_MEM
33 bool
34 depends on INFINIBAND_USER_ACCESS != n
35 default y
36
32config INFINIBAND_ADDR_TRANS 37config INFINIBAND_ADDR_TRANS
33 bool 38 bool
34 depends on INFINIBAND && INET 39 depends on INFINIBAND && INET
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4b9b17..cb1ab3ea4998 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 9
10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
11 device.o fmr_pool.o cache.o 11 device.o fmr_pool.o cache.o
12ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
12 13
13ib_mad-y := mad.o smi.o agent.o mad_rmpp.o 14ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
14 15
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
28 29
29ib_ucm-y := ucm.o 30ib_ucm-y := ucm.o
30 31
31ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ 32ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
32 uverbs_marshall.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb425b033..592c90aa3183 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
613{ 613{
614 ib_cache_cleanup(); 614 ib_cache_cleanup();
615 ib_sysfs_cleanup(); 615 ib_sysfs_cleanup();
616 /* Make sure that any pending umem accounting work is done. */
617 flush_scheduled_work();
616} 618}
617 619
618module_init(ib_core_init); 620module_init(ib_core_init);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c
index c95fe952abd5..48e854cf416f 100644
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/umem.c
@@ -64,35 +64,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
64 } 64 }
65} 65}
66 66
67int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, 67/**
68 void *addr, size_t size, int write) 68 * ib_umem_get - Pin and DMA map userspace memory.
69 * @context: userspace context to pin memory for
70 * @addr: userspace virtual address to start at
71 * @size: length of region to pin
72 * @access: IB_ACCESS_xxx flags for memory being pinned
73 */
74struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
75 size_t size, int access)
69{ 76{
77 struct ib_umem *umem;
70 struct page **page_list; 78 struct page **page_list;
71 struct ib_umem_chunk *chunk; 79 struct ib_umem_chunk *chunk;
72 unsigned long locked; 80 unsigned long locked;
73 unsigned long lock_limit; 81 unsigned long lock_limit;
74 unsigned long cur_base; 82 unsigned long cur_base;
75 unsigned long npages; 83 unsigned long npages;
76 int ret = 0; 84 int ret;
77 int off; 85 int off;
78 int i; 86 int i;
79 87
80 if (!can_do_mlock()) 88 if (!can_do_mlock())
81 return -EPERM; 89 return ERR_PTR(-EPERM);
82 90
83 page_list = (struct page **) __get_free_page(GFP_KERNEL); 91 umem = kmalloc(sizeof *umem, GFP_KERNEL);
84 if (!page_list) 92 if (!umem)
85 return -ENOMEM; 93 return ERR_PTR(-ENOMEM);
86 94
87 mem->user_base = (unsigned long) addr; 95 umem->context = context;
88 mem->length = size; 96 umem->length = size;
89 mem->offset = (unsigned long) addr & ~PAGE_MASK; 97 umem->offset = addr & ~PAGE_MASK;
90 mem->page_size = PAGE_SIZE; 98 umem->page_size = PAGE_SIZE;
91 mem->writable = write; 99 /*
100 * We ask for writable memory if any access flags other than
101 * "remote read" are set. "Local write" and "remote write"
102 * obviously require write access. "Remote atomic" can do
103 * things like fetch and add, which will modify memory, and
104 * "MW bind" can change permissions by binding a window.
105 */
106 umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
92 107
93 INIT_LIST_HEAD(&mem->chunk_list); 108 INIT_LIST_HEAD(&umem->chunk_list);
109
110 page_list = (struct page **) __get_free_page(GFP_KERNEL);
111 if (!page_list) {
112 kfree(umem);
113 return ERR_PTR(-ENOMEM);
114 }
94 115
95 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; 116 npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
96 117
97 down_write(&current->mm->mmap_sem); 118 down_write(&current->mm->mmap_sem);
98 119
@@ -104,13 +125,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
104 goto out; 125 goto out;
105 } 126 }
106 127
107 cur_base = (unsigned long) addr & PAGE_MASK; 128 cur_base = addr & PAGE_MASK;
108 129
109 while (npages) { 130 while (npages) {
110 ret = get_user_pages(current, current->mm, cur_base, 131 ret = get_user_pages(current, current->mm, cur_base,
111 min_t(int, npages, 132 min_t(int, npages,
112 PAGE_SIZE / sizeof (struct page *)), 133 PAGE_SIZE / sizeof (struct page *)),
113 1, !write, page_list, NULL); 134 1, !umem->writable, page_list, NULL);
114 135
115 if (ret < 0) 136 if (ret < 0)
116 goto out; 137 goto out;
@@ -136,7 +157,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
136 chunk->page_list[i].length = PAGE_SIZE; 157 chunk->page_list[i].length = PAGE_SIZE;
137 } 158 }
138 159
139 chunk->nmap = ib_dma_map_sg(dev, 160 chunk->nmap = ib_dma_map_sg(context->device,
140 &chunk->page_list[0], 161 &chunk->page_list[0],
141 chunk->nents, 162 chunk->nents,
142 DMA_BIDIRECTIONAL); 163 DMA_BIDIRECTIONAL);
@@ -151,33 +172,25 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
151 172
152 ret -= chunk->nents; 173 ret -= chunk->nents;
153 off += chunk->nents; 174 off += chunk->nents;
154 list_add_tail(&chunk->list, &mem->chunk_list); 175 list_add_tail(&chunk->list, &umem->chunk_list);
155 } 176 }
156 177
157 ret = 0; 178 ret = 0;
158 } 179 }
159 180
160out: 181out:
161 if (ret < 0) 182 if (ret < 0) {
162 __ib_umem_release(dev, mem, 0); 183 __ib_umem_release(context->device, umem, 0);
163 else 184 kfree(umem);
185 } else
164 current->mm->locked_vm = locked; 186 current->mm->locked_vm = locked;
165 187
166 up_write(&current->mm->mmap_sem); 188 up_write(&current->mm->mmap_sem);
167 free_page((unsigned long) page_list); 189 free_page((unsigned long) page_list);
168 190
169 return ret; 191 return ret < 0 ? ERR_PTR(ret) : umem;
170}
171
172void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
173{
174 __ib_umem_release(dev, umem, 1);
175
176 down_write(&current->mm->mmap_sem);
177 current->mm->locked_vm -=
178 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
179 up_write(&current->mm->mmap_sem);
180} 192}
193EXPORT_SYMBOL(ib_umem_get);
181 194
182static void ib_umem_account(struct work_struct *_work) 195static void ib_umem_account(struct work_struct *_work)
183{ 196{
@@ -191,35 +204,70 @@ static void ib_umem_account(struct work_struct *_work)
191 kfree(work); 204 kfree(work);
192} 205}
193 206
194void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) 207/**
208 * ib_umem_release - release memory pinned with ib_umem_get
209 * @umem: umem struct to release
210 */
211void ib_umem_release(struct ib_umem *umem)
195{ 212{
196 struct ib_umem_account_work *work; 213 struct ib_umem_account_work *work;
214 struct ib_ucontext *context = umem->context;
197 struct mm_struct *mm; 215 struct mm_struct *mm;
216 unsigned long diff;
198 217
199 __ib_umem_release(dev, umem, 1); 218 __ib_umem_release(umem->context->device, umem, 1);
200 219
201 mm = get_task_mm(current); 220 mm = get_task_mm(current);
202 if (!mm) 221 if (!mm)
203 return; 222 return;
204 223
224 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
225 kfree(umem);
226
205 /* 227 /*
206 * We may be called with the mm's mmap_sem already held. This 228 * We may be called with the mm's mmap_sem already held. This
207 * can happen when a userspace munmap() is the call that drops 229 * can happen when a userspace munmap() is the call that drops
208 * the last reference to our file and calls our release 230 * the last reference to our file and calls our release
209 * method. If there are memory regions to destroy, we'll end 231 * method. If there are memory regions to destroy, we'll end
210 * up here and not be able to take the mmap_sem. Therefore we 232 * up here and not be able to take the mmap_sem. In that case
211 * defer the vm_locked accounting to the system workqueue. 233 * we defer the vm_locked accounting to the system workqueue.
212 */ 234 */
235 if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
236 work = kmalloc(sizeof *work, GFP_KERNEL);
237 if (!work) {
238 mmput(mm);
239 return;
240 }
213 241
214 work = kmalloc(sizeof *work, GFP_KERNEL); 242 INIT_WORK(&work->work, ib_umem_account);
215 if (!work) { 243 work->mm = mm;
216 mmput(mm); 244 work->diff = diff;
245
246 schedule_work(&work->work);
217 return; 247 return;
218 } 248 } else
249 down_write(&mm->mmap_sem);
250
251 current->mm->locked_vm -= diff;
252 up_write(&mm->mmap_sem);
253 mmput(mm);
254}
255EXPORT_SYMBOL(ib_umem_release);
256
257int ib_umem_page_count(struct ib_umem *umem)
258{
259 struct ib_umem_chunk *chunk;
260 int shift;
261 int i;
262 int n;
263
264 shift = ilog2(umem->page_size);
219 265
220 INIT_WORK(&work->work, ib_umem_account); 266 n = 0;
221 work->mm = mm; 267 list_for_each_entry(chunk, &umem->chunk_list, list)
222 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 268 for (i = 0; i < chunk->nmap; ++i)
269 n += sg_dma_len(&chunk->page_list[i]) >> shift;
223 270
224 schedule_work(&work->work); 271 return n;
225} 272}
273EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c033ff..c33546f9e961 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
45#include <linux/completion.h> 45#include <linux/completion.h>
46 46
47#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
48#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 49#include <rdma/ib_user_verbs.h>
49 50
50/* 51/*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
163void ib_uverbs_event_handler(struct ib_event_handler *handler, 164void ib_uverbs_event_handler(struct ib_event_handler *handler,
164 struct ib_event *event); 165 struct ib_event *event);
165 166
166int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
167 void *addr, size_t size, int write);
168void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
169void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
170
171#define IB_UVERBS_DECLARE_CMD(name) \ 167#define IB_UVERBS_DECLARE_CMD(name) \
172 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 168 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
173 const char __user *buf, int in_len, \ 169 const char __user *buf, int in_len, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index bab66769be14..01d70084aebe 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
6 * 6 *
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
295 INIT_LIST_HEAD(&ucontext->qp_list); 295 INIT_LIST_HEAD(&ucontext->qp_list);
296 INIT_LIST_HEAD(&ucontext->srq_list); 296 INIT_LIST_HEAD(&ucontext->srq_list);
297 INIT_LIST_HEAD(&ucontext->ah_list); 297 INIT_LIST_HEAD(&ucontext->ah_list);
298 ucontext->closing = 0;
298 299
299 resp.num_comp_vectors = file->device->num_comp_vectors; 300 resp.num_comp_vectors = file->device->num_comp_vectors;
300 301
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
573 struct ib_uverbs_reg_mr cmd; 574 struct ib_uverbs_reg_mr cmd;
574 struct ib_uverbs_reg_mr_resp resp; 575 struct ib_uverbs_reg_mr_resp resp;
575 struct ib_udata udata; 576 struct ib_udata udata;
576 struct ib_umem_object *obj; 577 struct ib_uobject *uobj;
577 struct ib_pd *pd; 578 struct ib_pd *pd;
578 struct ib_mr *mr; 579 struct ib_mr *mr;
579 int ret; 580 int ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
599 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) 600 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
600 return -EINVAL; 601 return -EINVAL;
601 602
602 obj = kmalloc(sizeof *obj, GFP_KERNEL); 603 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
603 if (!obj) 604 if (!uobj)
604 return -ENOMEM; 605 return -ENOMEM;
605 606
606 init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); 607 init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
607 down_write(&obj->uobject.mutex); 608 down_write(&uobj->mutex);
608
609 /*
610 * We ask for writable memory if any access flags other than
611 * "remote read" are set. "Local write" and "remote write"
612 * obviously require write access. "Remote atomic" can do
613 * things like fetch and add, which will modify memory, and
614 * "MW bind" can change permissions by binding a window.
615 */
616 ret = ib_umem_get(file->device->ib_dev, &obj->umem,
617 (void *) (unsigned long) cmd.start, cmd.length,
618 !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
619 if (ret)
620 goto err_free;
621
622 obj->umem.virt_base = cmd.hca_va;
623 609
624 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 610 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
625 if (!pd) { 611 if (!pd) {
626 ret = -EINVAL; 612 ret = -EINVAL;
627 goto err_release; 613 goto err_free;
628 } 614 }
629 615
630 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); 616 mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
617 cmd.access_flags, &udata);
631 if (IS_ERR(mr)) { 618 if (IS_ERR(mr)) {
632 ret = PTR_ERR(mr); 619 ret = PTR_ERR(mr);
633 goto err_put; 620 goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
635 622
636 mr->device = pd->device; 623 mr->device = pd->device;
637 mr->pd = pd; 624 mr->pd = pd;
638 mr->uobject = &obj->uobject; 625 mr->uobject = uobj;
639 atomic_inc(&pd->usecnt); 626 atomic_inc(&pd->usecnt);
640 atomic_set(&mr->usecnt, 0); 627 atomic_set(&mr->usecnt, 0);
641 628
642 obj->uobject.object = mr; 629 uobj->object = mr;
643 ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); 630 ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
644 if (ret) 631 if (ret)
645 goto err_unreg; 632 goto err_unreg;
646 633
647 memset(&resp, 0, sizeof resp); 634 memset(&resp, 0, sizeof resp);
648 resp.lkey = mr->lkey; 635 resp.lkey = mr->lkey;
649 resp.rkey = mr->rkey; 636 resp.rkey = mr->rkey;
650 resp.mr_handle = obj->uobject.id; 637 resp.mr_handle = uobj->id;
651 638
652 if (copy_to_user((void __user *) (unsigned long) cmd.response, 639 if (copy_to_user((void __user *) (unsigned long) cmd.response,
653 &resp, sizeof resp)) { 640 &resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
658 put_pd_read(pd); 645 put_pd_read(pd);
659 646
660 mutex_lock(&file->mutex); 647 mutex_lock(&file->mutex);
661 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); 648 list_add_tail(&uobj->list, &file->ucontext->mr_list);
662 mutex_unlock(&file->mutex); 649 mutex_unlock(&file->mutex);
663 650
664 obj->uobject.live = 1; 651 uobj->live = 1;
665 652
666 up_write(&obj->uobject.mutex); 653 up_write(&uobj->mutex);
667 654
668 return in_len; 655 return in_len;
669 656
670err_copy: 657err_copy:
671 idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); 658 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
672 659
673err_unreg: 660err_unreg:
674 ib_dereg_mr(mr); 661 ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
676err_put: 663err_put:
677 put_pd_read(pd); 664 put_pd_read(pd);
678 665
679err_release:
680 ib_umem_release(file->device->ib_dev, &obj->umem);
681
682err_free: 666err_free:
683 put_uobj_write(&obj->uobject); 667 put_uobj_write(uobj);
684 return ret; 668 return ret;
685} 669}
686 670
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
691 struct ib_uverbs_dereg_mr cmd; 675 struct ib_uverbs_dereg_mr cmd;
692 struct ib_mr *mr; 676 struct ib_mr *mr;
693 struct ib_uobject *uobj; 677 struct ib_uobject *uobj;
694 struct ib_umem_object *memobj;
695 int ret = -EINVAL; 678 int ret = -EINVAL;
696 679
697 if (copy_from_user(&cmd, buf, sizeof cmd)) 680 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
701 if (!uobj) 684 if (!uobj)
702 return -EINVAL; 685 return -EINVAL;
703 686
704 memobj = container_of(uobj, struct ib_umem_object, uobject); 687 mr = uobj->object;
705 mr = uobj->object;
706 688
707 ret = ib_dereg_mr(mr); 689 ret = ib_dereg_mr(mr);
708 if (!ret) 690 if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
719 list_del(&uobj->list); 701 list_del(&uobj->list);
720 mutex_unlock(&file->mutex); 702 mutex_unlock(&file->mutex);
721 703
722 ib_umem_release(file->device->ib_dev, &memobj->umem);
723
724 put_uobj(uobj); 704 put_uobj(uobj);
725 705
726 return in_len; 706 return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d44e54799651..14d7ccd89195 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
183 if (!context) 183 if (!context)
184 return 0; 184 return 0;
185 185
186 context->closing = 1;
187
186 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { 188 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
187 struct ib_ah *ah = uobj->object; 189 struct ib_ah *ah = uobj->object;
188 190
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
230 232
231 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 233 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
232 struct ib_mr *mr = uobj->object; 234 struct ib_mr *mr = uobj->object;
233 struct ib_device *mrdev = mr->device;
234 struct ib_umem_object *memobj;
235 235
236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj); 236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
237 ib_dereg_mr(mr); 237 ib_dereg_mr(mr);
238 238 kfree(uobj);
239 memobj = container_of(uobj, struct ib_umem_object, uobject);
240 ib_umem_release_on_close(mrdev, &memobj->umem);
241
242 kfree(memobj);
243 } 239 }
244 240
245 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 241 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
906 unregister_filesystem(&uverbs_event_fs); 902 unregister_filesystem(&uverbs_event_fs);
907 class_destroy(uverbs_class); 903 class_destroy(uverbs_class);
908 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 904 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
909 flush_scheduled_work();
910 idr_destroy(&ib_uverbs_pd_idr); 905 idr_destroy(&ib_uverbs_pd_idr);
911 idr_destroy(&ib_uverbs_mr_idr); 906 idr_destroy(&ib_uverbs_mr_idr);
912 idr_destroy(&ib_uverbs_mw_idr); 907 idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 109166223c09..997cf1530762 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
56#include <asm/byteorder.h> 56#include <asm/byteorder.h>
57 57
58#include <rdma/ib_smi.h> 58#include <rdma/ib_smi.h>
59#include <rdma/ib_umem.h>
59#include <rdma/ib_user_verbs.h> 60#include <rdma/ib_user_verbs.h>
60#include "c2.h" 61#include "c2.h"
61#include "c2_provider.h" 62#include "c2_provider.h"
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
396 } 397 }
397 398
398 mr->pd = to_c2pd(ib_pd); 399 mr->pd = to_c2pd(ib_pd);
400 mr->umem = NULL;
399 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " 401 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
400 "*iova_start %llx, first pa %llx, last pa %llx\n", 402 "*iova_start %llx, first pa %llx, last pa %llx\n",
401 __FUNCTION__, page_shift, pbl_depth, total_len, 403 __FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
428 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); 430 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
429} 431}
430 432
431static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 433static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
432 int acc, struct ib_udata *udata) 434 u64 virt, int acc, struct ib_udata *udata)
433{ 435{
434 u64 *pages; 436 u64 *pages;
435 u64 kva = 0; 437 u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
441 struct c2_mr *c2mr; 443 struct c2_mr *c2mr;
442 444
443 pr_debug("%s:%u\n", __FUNCTION__, __LINE__); 445 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
444 shift = ffs(region->page_size) - 1;
445 446
446 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); 447 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
447 if (!c2mr) 448 if (!c2mr)
448 return ERR_PTR(-ENOMEM); 449 return ERR_PTR(-ENOMEM);
449 c2mr->pd = c2pd; 450 c2mr->pd = c2pd;
450 451
452 c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
453 if (IS_ERR(c2mr->umem)) {
454 err = PTR_ERR(c2mr->umem);
455 kfree(c2mr);
456 return ERR_PTR(err);
457 }
458
459 shift = ffs(c2mr->umem->page_size) - 1;
460
451 n = 0; 461 n = 0;
452 list_for_each_entry(chunk, &region->chunk_list, list) 462 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
453 n += chunk->nents; 463 n += chunk->nents;
454 464
455 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 465 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
459 } 469 }
460 470
461 i = 0; 471 i = 0;
462 list_for_each_entry(chunk, &region->chunk_list, list) { 472 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
463 for (j = 0; j < chunk->nmap; ++j) { 473 for (j = 0; j < chunk->nmap; ++j) {
464 len = sg_dma_len(&chunk->page_list[j]) >> shift; 474 len = sg_dma_len(&chunk->page_list[j]) >> shift;
465 for (k = 0; k < len; ++k) { 475 for (k = 0; k < len; ++k) {
466 pages[i++] = 476 pages[i++] =
467 sg_dma_address(&chunk->page_list[j]) + 477 sg_dma_address(&chunk->page_list[j]) +
468 (region->page_size * k); 478 (c2mr->umem->page_size * k);
469 } 479 }
470 } 480 }
471 } 481 }
472 482
473 kva = (u64)region->virt_base; 483 kva = virt;
474 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), 484 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
475 pages, 485 pages,
476 region->page_size, 486 c2mr->umem->page_size,
477 i, 487 i,
478 region->length, 488 length,
479 region->offset, 489 c2mr->umem->offset,
480 &kva, 490 &kva,
481 c2_convert_access(acc), 491 c2_convert_access(acc),
482 c2mr); 492 c2mr);
483 kfree(pages); 493 kfree(pages);
484 if (err) { 494 if (err)
485 kfree(c2mr); 495 goto err;
486 return ERR_PTR(err);
487 }
488 return &c2mr->ibmr; 496 return &c2mr->ibmr;
489 497
490err: 498err:
499 ib_umem_release(c2mr->umem);
491 kfree(c2mr); 500 kfree(c2mr);
492 return ERR_PTR(err); 501 return ERR_PTR(err);
493} 502}
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
502 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); 511 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
503 if (err) 512 if (err)
504 pr_debug("c2_stag_dealloc failed: %d\n", err); 513 pr_debug("c2_stag_dealloc failed: %d\n", err);
505 else 514 else {
515 if (mr->umem)
516 ib_umem_release(mr->umem);
506 kfree(mr); 517 kfree(mr);
518 }
507 519
508 return err; 520 return err;
509} 521}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc906223220f..1076df2ee96a 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
73struct c2_mr { 73struct c2_mr {
74 struct ib_mr ibmr; 74 struct ib_mr ibmr;
75 struct c2_pd *pd; 75 struct c2_pd *pd;
76 struct ib_umem *umem;
76}; 77};
77 78
78struct c2_av; 79struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index a891493fd340..e7c2c3948037 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
47#include <rdma/iw_cm.h> 47#include <rdma/iw_cm.h>
48#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
49#include <rdma/ib_smi.h> 49#include <rdma/ib_smi.h>
50#include <rdma/ib_umem.h>
50#include <rdma/ib_user_verbs.h> 51#include <rdma/ib_user_verbs.h>
51 52
52#include "cxio_hal.h" 53#include "cxio_hal.h"
@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
443 remove_handle(rhp, &rhp->mmidr, mmid); 444 remove_handle(rhp, &rhp->mmidr, mmid);
444 if (mhp->kva) 445 if (mhp->kva)
445 kfree((void *) (unsigned long) mhp->kva); 446 kfree((void *) (unsigned long) mhp->kva);
447 if (mhp->umem)
448 ib_umem_release(mhp->umem);
446 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); 449 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
447 kfree(mhp); 450 kfree(mhp);
448 return 0; 451 return 0;
@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
577} 580}
578 581
579 582
580static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 583static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
581 int acc, struct ib_udata *udata) 584 u64 virt, int acc, struct ib_udata *udata)
582{ 585{
583 __be64 *pages; 586 __be64 *pages;
584 int shift, n, len; 587 int shift, n, len;
@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
591 struct iwch_reg_user_mr_resp uresp; 594 struct iwch_reg_user_mr_resp uresp;
592 595
593 PDBG("%s ib_pd %p\n", __FUNCTION__, pd); 596 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
594 shift = ffs(region->page_size) - 1;
595 597
596 php = to_iwch_pd(pd); 598 php = to_iwch_pd(pd);
597 rhp = php->rhp; 599 rhp = php->rhp;
@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
599 if (!mhp) 601 if (!mhp)
600 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
601 603
604 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
605 if (IS_ERR(mhp->umem)) {
606 err = PTR_ERR(mhp->umem);
607 kfree(mhp);
608 return ERR_PTR(err);
609 }
610
611 shift = ffs(mhp->umem->page_size) - 1;
612
602 n = 0; 613 n = 0;
603 list_for_each_entry(chunk, &region->chunk_list, list) 614 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
604 n += chunk->nents; 615 n += chunk->nents;
605 616
606 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 617 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
611 622
612 i = n = 0; 623 i = n = 0;
613 624
614 list_for_each_entry(chunk, &region->chunk_list, list) 625 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
615 for (j = 0; j < chunk->nmap; ++j) { 626 for (j = 0; j < chunk->nmap; ++j) {
616 len = sg_dma_len(&chunk->page_list[j]) >> shift; 627 len = sg_dma_len(&chunk->page_list[j]) >> shift;
617 for (k = 0; k < len; ++k) { 628 for (k = 0; k < len; ++k) {
618 pages[i++] = cpu_to_be64(sg_dma_address( 629 pages[i++] = cpu_to_be64(sg_dma_address(
619 &chunk->page_list[j]) + 630 &chunk->page_list[j]) +
620 region->page_size * k); 631 mhp->umem->page_size * k);
621 } 632 }
622 } 633 }
623 634
@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
625 mhp->attr.pdid = php->pdid; 636 mhp->attr.pdid = php->pdid;
626 mhp->attr.zbva = 0; 637 mhp->attr.zbva = 0;
627 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 638 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
628 mhp->attr.va_fbo = region->virt_base; 639 mhp->attr.va_fbo = virt;
629 mhp->attr.page_size = shift - 12; 640 mhp->attr.page_size = shift - 12;
630 mhp->attr.len = (u32) region->length; 641 mhp->attr.len = (u32) length;
631 mhp->attr.pbl_size = i; 642 mhp->attr.pbl_size = i;
632 err = iwch_register_mem(rhp, php, mhp, shift, pages); 643 err = iwch_register_mem(rhp, php, mhp, shift, pages);
633 kfree(pages); 644 kfree(pages);
@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
650 return &mhp->ibmr; 661 return &mhp->ibmr;
651 662
652err: 663err:
664 ib_umem_release(mhp->umem);
653 kfree(mhp); 665 kfree(mhp);
654 return ERR_PTR(err); 666 return ERR_PTR(err);
655} 667}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56756bd..48833f3f3bd0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
73 73
74struct iwch_mr { 74struct iwch_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct iwch_dev *rhp; 77 struct iwch_dev *rhp;
77 u64 kva; 78 u64 kva;
78 struct tpt_attributes attr; 79 struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 10fb8fbafa0c..f64d42b08674 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -176,6 +176,7 @@ struct ehca_mr {
176 struct ib_mr ib_mr; /* must always be first in ehca_mr */ 176 struct ib_mr ib_mr; /* must always be first in ehca_mr */
177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ 177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
178 } ib; 178 } ib;
179 struct ib_umem *umem;
179 spinlock_t mrlock; 180 spinlock_t mrlock;
180 181
181 enum ehca_mr_flag flags; 182 enum ehca_mr_flag flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index e14b029332c8..37e7fe0908cf 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
78 int num_phys_buf, 78 int num_phys_buf,
79 int mr_access_flags, u64 *iova_start); 79 int mr_access_flags, u64 *iova_start);
80 80
81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
82 struct ib_umem *region,
83 int mr_access_flags, struct ib_udata *udata); 82 int mr_access_flags, struct ib_udata *udata);
84 83
85int ehca_rereg_phys_mr(struct ib_mr *mr, 84int ehca_rereg_phys_mr(struct ib_mr *mr,
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab563633f..84c5bb498563 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
39 * POSSIBILITY OF SUCH DAMAGE. 39 * POSSIBILITY OF SUCH DAMAGE.
40 */ 40 */
41 41
42#include <rdma/ib_umem.h>
43
42#include <asm/current.h> 44#include <asm/current.h>
43 45
44#include "ehca_iverbs.h" 46#include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
238 240
239/*----------------------------------------------------------------------*/ 241/*----------------------------------------------------------------------*/
240 242
241struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 243struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
242 struct ib_umem *region, 244 int mr_access_flags, struct ib_udata *udata)
243 int mr_access_flags,
244 struct ib_udata *udata)
245{ 245{
246 struct ib_mr *ib_mr; 246 struct ib_mr *ib_mr;
247 struct ehca_mr *e_mr; 247 struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
257 ehca_gen_err("bad pd=%p", pd); 257 ehca_gen_err("bad pd=%p", pd);
258 return ERR_PTR(-EFAULT); 258 return ERR_PTR(-EFAULT);
259 } 259 }
260 if (!region) { 260
261 ehca_err(pd->device, "bad input values: region=%p", region);
262 ib_mr = ERR_PTR(-EINVAL);
263 goto reg_user_mr_exit0;
264 }
265 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && 261 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
266 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || 262 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
267 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && 263 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
275 ib_mr = ERR_PTR(-EINVAL); 271 ib_mr = ERR_PTR(-EINVAL);
276 goto reg_user_mr_exit0; 272 goto reg_user_mr_exit0;
277 } 273 }
278 if (region->page_size != PAGE_SIZE) {
279 ehca_err(pd->device, "page size not supported, "
280 "region->page_size=%x", region->page_size);
281 ib_mr = ERR_PTR(-EINVAL);
282 goto reg_user_mr_exit0;
283 }
284 274
285 if ((region->length == 0) || 275 if (length == 0 || virt + length < virt) {
286 ((region->virt_base + region->length) < region->virt_base)) {
287 ehca_err(pd->device, "bad input values: length=%lx " 276 ehca_err(pd->device, "bad input values: length=%lx "
288 "virt_base=%lx", region->length, region->virt_base); 277 "virt_base=%lx", length, virt);
289 ib_mr = ERR_PTR(-EINVAL); 278 ib_mr = ERR_PTR(-EINVAL);
290 goto reg_user_mr_exit0; 279 goto reg_user_mr_exit0;
291 } 280 }
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
297 goto reg_user_mr_exit0; 286 goto reg_user_mr_exit0;
298 } 287 }
299 288
289 e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
290 mr_access_flags);
291 if (IS_ERR(e_mr->umem)) {
292 ib_mr = (void *) e_mr->umem;
293 goto reg_user_mr_exit1;
294 }
295
296 if (e_mr->umem->page_size != PAGE_SIZE) {
297 ehca_err(pd->device, "page size not supported, "
298 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
299 ib_mr = ERR_PTR(-EINVAL);
300 goto reg_user_mr_exit2;
301 }
302
300 /* determine number of MR pages */ 303 /* determine number of MR pages */
301 num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + 304 num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
302 PAGE_SIZE - 1) / PAGE_SIZE); 305 PAGE_SIZE);
303 num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + 306 num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
304 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); 307 EHCA_PAGESIZE);
305 308
306 /* register MR on HCA */ 309 /* register MR on HCA */
307 pginfo.type = EHCA_MR_PGI_USER; 310 pginfo.type = EHCA_MR_PGI_USER;
308 pginfo.num_pages = num_pages_mr; 311 pginfo.num_pages = num_pages_mr;
309 pginfo.num_4k = num_pages_4k; 312 pginfo.num_4k = num_pages_4k;
310 pginfo.region = region; 313 pginfo.region = e_mr->umem;
311 pginfo.next_4k = region->offset / EHCA_PAGESIZE; 314 pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
312 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, 315 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
313 (&region->chunk_list), 316 (&e_mr->umem->chunk_list),
314 list); 317 list);
315 318
316 ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, 319 ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
317 region->length, mr_access_flags, e_pd, &pginfo, 320 &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
318 &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
319 if (ret) { 321 if (ret) {
320 ib_mr = ERR_PTR(ret); 322 ib_mr = ERR_PTR(ret);
321 goto reg_user_mr_exit1; 323 goto reg_user_mr_exit2;
322 } 324 }
323 325
324 /* successful registration of all pages */ 326 /* successful registration of all pages */
325 return &e_mr->ib.ib_mr; 327 return &e_mr->ib.ib_mr;
326 328
329reg_user_mr_exit2:
330 ib_umem_release(e_mr->umem);
327reg_user_mr_exit1: 331reg_user_mr_exit1:
328 ehca_mr_delete(e_mr); 332 ehca_mr_delete(e_mr);
329reg_user_mr_exit0: 333reg_user_mr_exit0:
330 if (IS_ERR(ib_mr)) 334 if (IS_ERR(ib_mr))
331 ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" 335 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
332 " udata=%p", 336 " udata=%p",
333 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); 337 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
334 return ib_mr; 338 return ib_mr;
335} /* end ehca_reg_user_mr() */ 339} /* end ehca_reg_user_mr() */
336 340
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
596 goto dereg_mr_exit0; 600 goto dereg_mr_exit0;
597 } 601 }
598 602
603 if (e_mr->umem)
604 ib_umem_release(e_mr->umem);
605
599 /* successful deregistration */ 606 /* successful deregistration */
600 ehca_mr_delete(e_mr); 607 ehca_mr_delete(e_mr);
601 608
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 31e70732e369..bdeef8d4f279 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <rdma/ib_umem.h>
34#include <rdma/ib_pack.h> 35#include <rdma/ib_pack.h>
35#include <rdma/ib_smi.h> 36#include <rdma/ib_smi.h>
36 37
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
147 mr->mr.offset = 0; 148 mr->mr.offset = 0;
148 mr->mr.access_flags = acc; 149 mr->mr.access_flags = acc;
149 mr->mr.max_segs = num_phys_buf; 150 mr->mr.max_segs = num_phys_buf;
151 mr->umem = NULL;
150 152
151 m = 0; 153 m = 0;
152 n = 0; 154 n = 0;
@@ -170,46 +172,56 @@ bail:
170/** 172/**
171 * ipath_reg_user_mr - register a userspace memory region 173 * ipath_reg_user_mr - register a userspace memory region
172 * @pd: protection domain for this memory region 174 * @pd: protection domain for this memory region
173 * @region: the user memory region 175 * @start: starting userspace address
176 * @length: length of region to register
177 * @virt_addr: virtual address to use (from HCA's point of view)
174 * @mr_access_flags: access flags for this memory region 178 * @mr_access_flags: access flags for this memory region
175 * @udata: unused by the InfiniPath driver 179 * @udata: unused by the InfiniPath driver
176 * 180 *
177 * Returns the memory region on success, otherwise returns an errno. 181 * Returns the memory region on success, otherwise returns an errno.
178 */ 182 */
179struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 183struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
180 int mr_access_flags, struct ib_udata *udata) 184 u64 virt_addr, int mr_access_flags,
185 struct ib_udata *udata)
181{ 186{
182 struct ipath_mr *mr; 187 struct ipath_mr *mr;
188 struct ib_umem *umem;
183 struct ib_umem_chunk *chunk; 189 struct ib_umem_chunk *chunk;
184 int n, m, i; 190 int n, m, i;
185 struct ib_mr *ret; 191 struct ib_mr *ret;
186 192
187 if (region->length == 0) { 193 if (length == 0) {
188 ret = ERR_PTR(-EINVAL); 194 ret = ERR_PTR(-EINVAL);
189 goto bail; 195 goto bail;
190 } 196 }
191 197
198 umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
199 if (IS_ERR(umem))
200 return (void *) umem;
201
192 n = 0; 202 n = 0;
193 list_for_each_entry(chunk, &region->chunk_list, list) 203 list_for_each_entry(chunk, &umem->chunk_list, list)
194 n += chunk->nents; 204 n += chunk->nents;
195 205
196 mr = alloc_mr(n, &to_idev(pd->device)->lk_table); 206 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
197 if (!mr) { 207 if (!mr) {
198 ret = ERR_PTR(-ENOMEM); 208 ret = ERR_PTR(-ENOMEM);
209 ib_umem_release(umem);
199 goto bail; 210 goto bail;
200 } 211 }
201 212
202 mr->mr.pd = pd; 213 mr->mr.pd = pd;
203 mr->mr.user_base = region->user_base; 214 mr->mr.user_base = start;
204 mr->mr.iova = region->virt_base; 215 mr->mr.iova = virt_addr;
205 mr->mr.length = region->length; 216 mr->mr.length = length;
206 mr->mr.offset = region->offset; 217 mr->mr.offset = umem->offset;
207 mr->mr.access_flags = mr_access_flags; 218 mr->mr.access_flags = mr_access_flags;
208 mr->mr.max_segs = n; 219 mr->mr.max_segs = n;
220 mr->umem = umem;
209 221
210 m = 0; 222 m = 0;
211 n = 0; 223 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 224 list_for_each_entry(chunk, &umem->chunk_list, list) {
213 for (i = 0; i < chunk->nents; i++) { 225 for (i = 0; i < chunk->nents; i++) {
214 void *vaddr; 226 void *vaddr;
215 227
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
219 goto bail; 231 goto bail;
220 } 232 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr; 233 mr->mr.map[m]->segs[n].vaddr = vaddr;
222 mr->mr.map[m]->segs[n].length = region->page_size; 234 mr->mr.map[m]->segs[n].length = umem->page_size;
223 n++; 235 n++;
224 if (n == IPATH_SEGSZ) { 236 if (n == IPATH_SEGSZ) {
225 m++; 237 m++;
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
253 i--; 265 i--;
254 kfree(mr->mr.map[i]); 266 kfree(mr->mr.map[i]);
255 } 267 }
268
269 if (mr->umem)
270 ib_umem_release(mr->umem);
271
256 kfree(mr); 272 kfree(mr);
257 return 0; 273 return 0;
258} 274}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7064fc222727..088b837ebea8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -251,6 +251,7 @@ struct ipath_sge {
251/* Memory region */ 251/* Memory region */
252struct ipath_mr { 252struct ipath_mr {
253 struct ib_mr ibmr; 253 struct ib_mr ibmr;
254 struct ib_umem *umem;
254 struct ipath_mregion mr; /* must be last */ 255 struct ipath_mregion mr; /* must be last */
255}; 256};
256 257
@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
751 struct ib_phys_buf *buffer_list, 752 struct ib_phys_buf *buffer_list,
752 int num_phys_buf, int acc, u64 *iova_start); 753 int num_phys_buf, int acc, u64 *iova_start);
753 754
754struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 755struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
755 int mr_access_flags, 756 u64 virt_addr, int mr_access_flags,
756 struct ib_udata *udata); 757 struct ib_udata *udata);
757 758
758int ipath_dereg_mr(struct ib_mr *ibmr); 759int ipath_dereg_mr(struct ib_mr *ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1c05486c3c68..6bcde1cb9688 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
37 */ 37 */
38 38
39#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
40#include <rdma/ib_umem.h>
40#include <rdma/ib_user_verbs.h> 41#include <rdma/ib_user_verbs.h>
41#include <linux/mm.h> 42#include <linux/mm.h>
42 43
@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
908 return ERR_PTR(err); 909 return ERR_PTR(err);
909 } 910 }
910 911
912 mr->umem = NULL;
913
911 return &mr->ibmr; 914 return &mr->ibmr;
912} 915}
913 916
@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
1003 } 1006 }
1004 1007
1005 kfree(page_list); 1008 kfree(page_list);
1009 mr->umem = NULL;
1010
1006 return &mr->ibmr; 1011 return &mr->ibmr;
1007} 1012}
1008 1013
1009static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 1014static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1010 int acc, struct ib_udata *udata) 1015 u64 virt, int acc, struct ib_udata *udata)
1011{ 1016{
1012 struct mthca_dev *dev = to_mdev(pd->device); 1017 struct mthca_dev *dev = to_mdev(pd->device);
1013 struct ib_umem_chunk *chunk; 1018 struct ib_umem_chunk *chunk;
@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1018 int err = 0; 1023 int err = 0;
1019 int write_mtt_size; 1024 int write_mtt_size;
1020 1025
1021 shift = ffs(region->page_size) - 1;
1022
1023 mr = kmalloc(sizeof *mr, GFP_KERNEL); 1026 mr = kmalloc(sizeof *mr, GFP_KERNEL);
1024 if (!mr) 1027 if (!mr)
1025 return ERR_PTR(-ENOMEM); 1028 return ERR_PTR(-ENOMEM);
1026 1029
1030 mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
1031 if (IS_ERR(mr->umem)) {
1032 err = PTR_ERR(mr->umem);
1033 goto err;
1034 }
1035
1036 shift = ffs(mr->umem->page_size) - 1;
1037
1027 n = 0; 1038 n = 0;
1028 list_for_each_entry(chunk, &region->chunk_list, list) 1039 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1029 n += chunk->nents; 1040 n += chunk->nents;
1030 1041
1031 mr->mtt = mthca_alloc_mtt(dev, n); 1042 mr->mtt = mthca_alloc_mtt(dev, n);
1032 if (IS_ERR(mr->mtt)) { 1043 if (IS_ERR(mr->mtt)) {
1033 err = PTR_ERR(mr->mtt); 1044 err = PTR_ERR(mr->mtt);
1034 goto err; 1045 goto err_umem;
1035 } 1046 }
1036 1047
1037 pages = (u64 *) __get_free_page(GFP_KERNEL); 1048 pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1044 1055
1045 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); 1056 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
1046 1057
1047 list_for_each_entry(chunk, &region->chunk_list, list) 1058 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1048 for (j = 0; j < chunk->nmap; ++j) { 1059 for (j = 0; j < chunk->nmap; ++j) {
1049 len = sg_dma_len(&chunk->page_list[j]) >> shift; 1060 len = sg_dma_len(&chunk->page_list[j]) >> shift;
1050 for (k = 0; k < len; ++k) { 1061 for (k = 0; k < len; ++k) {
1051 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 1062 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
1052 region->page_size * k; 1063 mr->umem->page_size * k;
1053 /* 1064 /*
1054 * Be friendly to write_mtt and pass it chunks 1065 * Be friendly to write_mtt and pass it chunks
1055 * of appropriate size. 1066 * of appropriate size.
@@ -1071,8 +1082,8 @@ mtt_done:
1071 if (err) 1082 if (err)
1072 goto err_mtt; 1083 goto err_mtt;
1073 1084
1074 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, 1085 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
1075 region->length, convert_access(acc), mr); 1086 convert_access(acc), mr);
1076 1087
1077 if (err) 1088 if (err)
1078 goto err_mtt; 1089 goto err_mtt;
@@ -1082,6 +1093,9 @@ mtt_done:
1082err_mtt: 1093err_mtt:
1083 mthca_free_mtt(dev, mr->mtt); 1094 mthca_free_mtt(dev, mr->mtt);
1084 1095
1096err_umem:
1097 ib_umem_release(mr->umem);
1098
1085err: 1099err:
1086 kfree(mr); 1100 kfree(mr);
1087 return ERR_PTR(err); 1101 return ERR_PTR(err);
@@ -1090,8 +1104,12 @@ err:
1090static int mthca_dereg_mr(struct ib_mr *mr) 1104static int mthca_dereg_mr(struct ib_mr *mr)
1091{ 1105{
1092 struct mthca_mr *mmr = to_mmr(mr); 1106 struct mthca_mr *mmr = to_mmr(mr);
1107
1093 mthca_free_mr(to_mdev(mr->device), mmr); 1108 mthca_free_mr(to_mdev(mr->device), mmr);
1109 if (mmr->umem)
1110 ib_umem_release(mmr->umem);
1094 kfree(mmr); 1111 kfree(mmr);
1112
1095 return 0; 1113 return 0;
1096} 1114}
1097 1115
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac2e094..262616c8ebb6 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
73 73
74struct mthca_mr { 74struct mthca_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct mthca_mtt *mtt; 77 struct mthca_mtt *mtt;
77}; 78};
78 79