summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/os
diff options
context:
space:
mode:
authorPreetham Chandru R <pchandru@nvidia.com>2018-09-20 09:07:13 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-11-29 17:36:30 -0500
commitd6278955f66dbfb5f992f9298e32ca86d66c6259 (patch)
treee09e7fab2584398e8d6b370cea424f28343a2014 /drivers/gpu/nvgpu/os
parentb3bda98fbdd8673dc31073bddee09386cfc3622c (diff)
gpu: nvgpu: RDMA implementation
This change adds RDMA supports for tegra iGPU. 1. Cuda Process allocates the memory and passes the VA and size to the custom kernel driver. 2. The custom kernel driver maps the user allocated buf and does the DMA to/from it. 3. Only supports iGPU + cudaHostAlloc sysmem 4. Works only for a given process. 5. Address should be sysmem page aligned and size should be multiple of sysmem page size. 6. The custom kernel driver must register a free_callback when get_page() function is called. Bug 200438879 Signed-off-by: Preetham Chandru R <pchandru@nvidia.com> Change-Id: I43ec45734eb46d30341d0701550206c16e051106 Reviewed-on: https://git-master.nvidia.com/r/1953780 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/os')
-rw-r--r--drivers/gpu/nvgpu/os/linux/nvidia_p2p.c296
1 files changed, 296 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c b/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c
new file mode 100644
index 00000000..dcd37bee
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c
@@ -0,0 +1,296 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/slab.h>
24#include <nvgpu/linux/nvidia_p2p.h>
25
26static void nvidia_p2p_mn_release(struct mmu_notifier *mn,
27 struct mm_struct *mm)
28{
29 struct nvidia_p2p_page_table *page_table = container_of(mn,
30 struct nvidia_p2p_page_table,
31 mn);
32
33 page_table->free_callback(page_table->data);
34}
35
36static void nvidia_p2p_mn_invl_range_start(struct mmu_notifier *mn,
37 struct mm_struct *mm, unsigned long start, unsigned long end)
38{
39 struct nvidia_p2p_page_table *page_table = container_of(mn,
40 struct nvidia_p2p_page_table,
41 mn);
42 u64 vaddr = 0;
43 u64 size = 0;
44
45 vaddr = page_table->vaddr;
46 size = page_table->size;
47
48 if (vaddr >= start && vaddr <= end) {
49 mmu_notifier_unregister_no_release(&page_table->mn, page_table->mm);
50 page_table->free_callback(page_table->data);
51 }
52}
53
54static struct mmu_notifier_ops nvidia_p2p_mmu_ops = {
55 .release = nvidia_p2p_mn_release,
56 .invalidate_range_start = nvidia_p2p_mn_invl_range_start,
57};
58
59int nvidia_p2p_get_pages(u64 vaddr, u64 size,
60 struct nvidia_p2p_page_table **page_table,
61 void (*free_callback)(void *data), void *data)
62{
63 int ret = 0;
64 int user_pages = 0;
65 int locked = 0;
66 int nr_pages = size >> PAGE_SHIFT;
67 struct page **pages;
68
69 if (nr_pages <= 0) {
70 return -EINVAL;
71 }
72
73 *page_table = kzalloc(sizeof(**page_table), GFP_KERNEL);
74 if (!*page_table) {
75 return -ENOMEM;
76 }
77
78 pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
79 if (!pages) {
80 ret = -ENOMEM;
81 goto free_page_table;
82 }
83 down_read(&current->mm->mmap_sem);
84 locked = 1;
85 user_pages = get_user_pages_locked(vaddr & PAGE_MASK, nr_pages,
86 FOLL_WRITE | FOLL_FORCE,
87 pages, &locked);
88 up_read(&current->mm->mmap_sem);
89 if (user_pages != nr_pages) {
90 ret = user_pages < 0 ? user_pages : -ENOMEM;
91 goto free_pages;
92 }
93
94 (*page_table)->pages = pages;
95 (*page_table)->entries = user_pages;
96 (*page_table)->page_size = NVIDIA_P2P_PAGE_SIZE_4KB;
97 (*page_table)->size = size;
98
99 (*page_table)->mn.ops = &nvidia_p2p_mmu_ops;
100 (*page_table)->mm = current->mm;
101 (*page_table)->free_callback = free_callback;
102 (*page_table)->data = data;
103 (*page_table)->vaddr = vaddr;
104 nvgpu_mutex_init(&(*page_table)->lock);
105 (*page_table)->mapped = NVIDIA_P2P_PINNED;
106
107 ret = mmu_notifier_register(&(*page_table)->mn, (*page_table)->mm);
108 if (ret) {
109 goto free_pages;
110 }
111
112 return 0;
113free_pages:
114 while (--user_pages >= 0) {
115 put_page(pages[user_pages]);
116 }
117 kfree(pages);
118free_page_table:
119 kfree(*page_table);
120 *page_table = NULL;
121 return ret;
122}
123EXPORT_SYMBOL(nvidia_p2p_get_pages);
124
125int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table)
126{
127 if (!page_table) {
128 return -EINVAL;
129 }
130
131 mmu_notifier_unregister(&page_table->mn, page_table->mm);
132
133 return 0;
134}
135EXPORT_SYMBOL(nvidia_p2p_put_pages);
136
137int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table)
138{
139 int user_pages = 0;
140 struct page **pages = NULL;
141
142 if (!page_table) {
143 return 0;
144 }
145
146 nvgpu_mutex_acquire(&page_table->lock);
147
148 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
149 WARN(1, "Attempting to free unmapped pages");
150 }
151
152 if (page_table->mapped & NVIDIA_P2P_PINNED) {
153 pages = page_table->pages;
154 user_pages = page_table->entries;
155
156 while (--user_pages >= 0) {
157 put_page(pages[user_pages]);
158 }
159
160 kfree(pages);
161 page_table->mapped &= (u32)~NVIDIA_P2P_PINNED;
162 }
163
164 nvgpu_mutex_release(&page_table->lock);
165
166 return 0;
167}
168EXPORT_SYMBOL(nvidia_p2p_free_page_table);
169
170int nvidia_p2p_map_pages(struct device *dev,
171 struct nvidia_p2p_page_table *page_table,
172 struct nvidia_p2p_dma_mapping **dma_mapping,
173 enum dma_data_direction direction)
174{
175 struct sg_table *sgt = NULL;
176 struct scatterlist *sg;
177 struct page **pages = NULL;
178 u32 nr_pages = 0;
179 int ret = 0;
180 int i, count;
181
182 if (!page_table) {
183 return -EINVAL;
184 }
185
186 nvgpu_mutex_acquire(&page_table->lock);
187
188 pages = page_table->pages;
189 nr_pages = page_table->entries;
190 if (nr_pages <= 0) {
191 nvgpu_mutex_release(&page_table->lock);
192 return -EINVAL;
193 }
194
195 *dma_mapping = kzalloc(sizeof(**dma_mapping), GFP_KERNEL);
196 if (!*dma_mapping) {
197 nvgpu_mutex_release(&page_table->lock);
198 return -ENOMEM;
199 }
200 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
201 if (!sgt) {
202 ret = -ENOMEM;
203 goto free_dma_mapping;
204 }
205 ret = sg_alloc_table_from_pages(sgt, pages,
206 nr_pages, 0, page_table->size, GFP_KERNEL);
207 if (ret) {
208 goto free_sgt;
209 }
210 (*dma_mapping)->sgt = sgt;
211 (*dma_mapping)->dev = dev;
212 (*dma_mapping)->direction = direction;
213 (*dma_mapping)->page_table = page_table;
214
215 count = dma_map_sg(dev, sgt->sgl, sgt->nents, direction);
216 if (count < 1) {
217 goto free_sg_table;
218 }
219
220 (*dma_mapping)->entries = count;
221
222 (*dma_mapping)->hw_address = kcalloc(count, sizeof(u64), GFP_KERNEL);
223 if (!((*dma_mapping)->hw_address)) {
224 ret = -ENOMEM;
225 goto unmap_sg;
226 }
227 (*dma_mapping)->hw_len = kcalloc(count, sizeof(u64), GFP_KERNEL);
228 if (!((*dma_mapping)->hw_len)) {
229 ret = -ENOMEM;
230 goto free_hw_address;
231 }
232
233 for_each_sg(sgt->sgl, sg, count, i) {
234 (*dma_mapping)->hw_address[i] = sg_dma_address(sg);
235 (*dma_mapping)->hw_len[i] = sg_dma_len(sg);
236 }
237 (*dma_mapping)->page_table->mapped |= NVIDIA_P2P_MAPPED;
238 nvgpu_mutex_release(&page_table->lock);
239
240 return 0;
241free_hw_address:
242 kfree((*dma_mapping)->hw_address);
243unmap_sg:
244 dma_unmap_sg(dev, sgt->sgl,
245 sgt->nents, direction);
246free_sg_table:
247 sg_free_table(sgt);
248free_sgt:
249 kfree(sgt);
250free_dma_mapping:
251 kfree(*dma_mapping);
252 *dma_mapping = NULL;
253 nvgpu_mutex_release(&page_table->lock);
254
255 return ret;
256}
257EXPORT_SYMBOL(nvidia_p2p_map_pages);
258
259int nvidia_p2p_unmap_pages(struct nvidia_p2p_dma_mapping *dma_mapping)
260{
261 struct nvidia_p2p_page_table *page_table = NULL;
262
263 if (!dma_mapping) {
264 return -EINVAL;
265 }
266
267 page_table = dma_mapping->page_table;
268 if (!page_table) {
269 return -EFAULT;
270 }
271
272 nvgpu_mutex_acquire(&page_table->lock);
273 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
274 kfree(dma_mapping->hw_len);
275 kfree(dma_mapping->hw_address);
276 if (dma_mapping->entries)
277 dma_unmap_sg(dma_mapping->dev,
278 dma_mapping->sgt->sgl,
279 dma_mapping->sgt->nents,
280 dma_mapping->direction);
281 sg_free_table(dma_mapping->sgt);
282 kfree(dma_mapping->sgt);
283 kfree(dma_mapping);
284 page_table->mapped &= (u32)~NVIDIA_P2P_MAPPED;
285 }
286 nvgpu_mutex_release(&page_table->lock);
287
288 return 0;
289}
290EXPORT_SYMBOL(nvidia_p2p_unmap_pages);
291
292int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping)
293{
294 return nvidia_p2p_unmap_pages(dma_mapping);
295}
296EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);