aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2005-07-07 20:57:14 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-07-07 21:23:48 -0400
commiteb8ffbfed50e7945c024a80e3688d5beffa3b641 (patch)
tree6b32628db0560e2f093efc50e7a0630b9bb678d0
parentbc38a6abdd5a50e007d0fcd9b9b6280132b79e62 (diff)
[PATCH] IB uverbs: memory pinning implementation
Add support for pinning userspace memory regions and returning a list of pages in the region. This includes tracking pinned memory against vm_locked and preventing unprivileged users from exceeding RLIMIT_MEMLOCK. Signed-off-by: Roland Dreier <rolandd@cisco.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/infiniband/core/uverbs_mem.c221
1 files changed, 221 insertions, 0 deletions
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
new file mode 100644
index 000000000000..ed550f6595bd
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
34 */
35
36#include <linux/mm.h>
37#include <linux/dma-mapping.h>
38
39#include "uverbs.h"
40
41struct ib_umem_account_work {
42 struct work_struct work;
43 struct mm_struct *mm;
44 unsigned long diff;
45};
46
47
48static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
49{
50 struct ib_umem_chunk *chunk, *tmp;
51 int i;
52
53 list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
54 dma_unmap_sg(dev->dma_device, chunk->page_list,
55 chunk->nents, DMA_BIDIRECTIONAL);
56 for (i = 0; i < chunk->nents; ++i) {
57 if (umem->writable && dirty)
58 set_page_dirty_lock(chunk->page_list[i].page);
59 put_page(chunk->page_list[i].page);
60 }
61
62 kfree(chunk);
63 }
64}
65
66int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
67 void *addr, size_t size, int write)
68{
69 struct page **page_list;
70 struct ib_umem_chunk *chunk;
71 unsigned long locked;
72 unsigned long lock_limit;
73 unsigned long cur_base;
74 unsigned long npages;
75 int ret = 0;
76 int off;
77 int i;
78
79 if (!can_do_mlock())
80 return -EPERM;
81
82 page_list = (struct page **) __get_free_page(GFP_KERNEL);
83 if (!page_list)
84 return -ENOMEM;
85
86 mem->user_base = (unsigned long) addr;
87 mem->length = size;
88 mem->offset = (unsigned long) addr & ~PAGE_MASK;
89 mem->page_size = PAGE_SIZE;
90 mem->writable = write;
91
92 INIT_LIST_HEAD(&mem->chunk_list);
93
94 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
95
96 down_write(&current->mm->mmap_sem);
97
98 locked = npages + current->mm->locked_vm;
99 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
100
101 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
102 ret = -ENOMEM;
103 goto out;
104 }
105
106 cur_base = (unsigned long) addr & PAGE_MASK;
107
108 while (npages) {
109 ret = get_user_pages(current, current->mm, cur_base,
110 min_t(int, npages,
111 PAGE_SIZE / sizeof (struct page *)),
112 1, !write, page_list, NULL);
113
114 if (ret < 0)
115 goto out;
116
117 cur_base += ret * PAGE_SIZE;
118 npages -= ret;
119
120 off = 0;
121
122 while (ret) {
123 chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
124 min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
125 GFP_KERNEL);
126 if (!chunk) {
127 ret = -ENOMEM;
128 goto out;
129 }
130
131 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
132 for (i = 0; i < chunk->nents; ++i) {
133 chunk->page_list[i].page = page_list[i + off];
134 chunk->page_list[i].offset = 0;
135 chunk->page_list[i].length = PAGE_SIZE;
136 }
137
138 chunk->nmap = dma_map_sg(dev->dma_device,
139 &chunk->page_list[0],
140 chunk->nents,
141 DMA_BIDIRECTIONAL);
142 if (chunk->nmap <= 0) {
143 for (i = 0; i < chunk->nents; ++i)
144 put_page(chunk->page_list[i].page);
145 kfree(chunk);
146
147 ret = -ENOMEM;
148 goto out;
149 }
150
151 ret -= chunk->nents;
152 off += chunk->nents;
153 list_add_tail(&chunk->list, &mem->chunk_list);
154 }
155
156 ret = 0;
157 }
158
159out:
160 if (ret < 0)
161 __ib_umem_release(dev, mem, 0);
162 else
163 current->mm->locked_vm = locked;
164
165 up_write(&current->mm->mmap_sem);
166 free_page((unsigned long) page_list);
167
168 return ret;
169}
170
171void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
172{
173 __ib_umem_release(dev, umem, 1);
174
175 down_write(&current->mm->mmap_sem);
176 current->mm->locked_vm -=
177 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
178 up_write(&current->mm->mmap_sem);
179}
180
181static void ib_umem_account(void *work_ptr)
182{
183 struct ib_umem_account_work *work = work_ptr;
184
185 down_write(&work->mm->mmap_sem);
186 work->mm->locked_vm -= work->diff;
187 up_write(&work->mm->mmap_sem);
188 mmput(work->mm);
189 kfree(work);
190}
191
192void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
193{
194 struct ib_umem_account_work *work;
195 struct mm_struct *mm;
196
197 __ib_umem_release(dev, umem, 1);
198
199 mm = get_task_mm(current);
200 if (!mm)
201 return;
202
203 /*
204 * We may be called with the mm's mmap_sem already held. This
205 * can happen when a userspace munmap() is the call that drops
206 * the last reference to our file and calls our release
207 * method. If there are memory regions to destroy, we'll end
208 * up here and not be able to take the mmap_sem. Therefore we
209 * defer the vm_locked accounting to the system workqueue.
210 */
211
212 work = kmalloc(sizeof *work, GFP_KERNEL);
213 if (!work)
214 return;
215
216 INIT_WORK(&work->work, ib_umem_account, work);
217 work->mm = mm;
218 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
219
220 schedule_work(&work->work);
221}