summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPreetham Chandru R <pchandru@nvidia.com>2018-09-20 09:07:13 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-11-29 17:36:30 -0500
commitd6278955f66dbfb5f992f9298e32ca86d66c6259 (patch)
treee09e7fab2584398e8d6b370cea424f28343a2014 /drivers
parentb3bda98fbdd8673dc31073bddee09386cfc3622c (diff)
gpu: nvgpu: RDMA implementation
This change adds RDMA supports for tegra iGPU. 1. Cuda Process allocates the memory and passes the VA and size to the custom kernel driver. 2. The custom kernel driver maps the user allocated buf and does the DMA to/from it. 3. Only supports iGPU + cudaHostAlloc sysmem 4. Works only for a given process. 5. Address should be sysmem page aligned and size should be multiple of sysmem page size. 6. The custom kernel driver must register a free_callback when get_page() function is called. Bug 200438879 Signed-off-by: Preetham Chandru R <pchandru@nvidia.com> Change-Id: I43ec45734eb46d30341d0701550206c16e051106 Reviewed-on: https://git-master.nvidia.com/r/1953780 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Kconfig7
-rw-r--r--drivers/gpu/nvgpu/Makefile3
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/nvidia_p2p.h196
-rw-r--r--drivers/gpu/nvgpu/os/linux/nvidia_p2p.c296
4 files changed, 502 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
index bbd5f6f0..7dba61a3 100644
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -40,6 +40,13 @@ config GK20A_PM_QOS
40 Enable support to pass PM_QOS constraints to devfreq based 40 Enable support to pass PM_QOS constraints to devfreq based
41 scaling. 41 scaling.
42 42
43config GK20A_RDMA
44 bool "Support GK20A RDMA"
45 depends on GK20A && MMU_NOTIFIER
46 default n
47 help
48 Say Y here to enable GK20A RDMA features.
49
43config NVGPU_TRACK_MEM_USAGE 50config NVGPU_TRACK_MEM_USAGE
44 bool "Track the usage of system memory in nvgpu" 51 bool "Track the usage of system memory in nvgpu"
45 depends on GK20A 52 depends on GK20A
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index e0fd70e6..fdfaf092 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -176,6 +176,9 @@ nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
176 os/linux/cde_gm20b.o \ 176 os/linux/cde_gm20b.o \
177 os/linux/cde_gp10b.o 177 os/linux/cde_gp10b.o
178 178
179nvgpu-$(CONFIG_GK20A_RDMA) += \
180 os/linux/nvidia_p2p.o
181
179ifeq ($(CONFIG_DEBUG_FS),y) 182ifeq ($(CONFIG_DEBUG_FS),y)
180nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ 183nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
181 os/linux/debug_cde.o 184 os/linux/debug_cde.o
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvidia_p2p.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvidia_p2p.h
new file mode 100644
index 00000000..c1dee7cf
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvidia_p2p.h
@@ -0,0 +1,196 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef __NVIDIA_P2P_H__
24#define __NVIDIA_P2P_H__
25
26#include <linux/dma-mapping.h>
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/init.h>
30#include <linux/miscdevice.h>
31#include <linux/fs.h>
32#include <linux/mm.h>
33#include <linux/mman.h>
34#include <linux/mmu_notifier.h>
35#include <linux/types.h>
36#include <linux/uaccess.h>
37#include <linux/sched.h>
38#include <linux/spinlock.h>
39#include <linux/slab.h>
40#include <linux/highmem.h>
41#include <linux/mutex.h>
42#include <linux/spinlock.h>
43#include <nvgpu/linux/lock.h>
44
45#define NVIDIA_P2P_UNINITIALIZED 0x0
46#define NVIDIA_P2P_PINNED 0x1
47#define NVIDIA_P2P_MAPPED 0x2
48
49enum nvidia_p2p_page_size_type {
50 NVIDIA_P2P_PAGE_SIZE_4KB = 0,
51 NVIDIA_P2P_PAGE_SIZE_64KB,
52 NVIDIA_P2P_PAGE_SIZE_128KB,
53 NVIDIA_P2P_PAGE_SIZE_COUNT
54};
55
56struct nvidia_p2p_page_table {
57 u32 page_size;
58 u64 size;
59 u32 entries;
60 struct page **pages;
61
62 u64 vaddr;
63 u32 mapped;
64
65 struct mm_struct *mm;
66 struct mmu_notifier mn;
67 struct nvgpu_mutex lock;
68 void (*free_callback)(void *data);
69 void *data;
70};
71
72struct nvidia_p2p_dma_mapping {
73 dma_addr_t *hw_address;
74 u32 *hw_len;
75 u32 entries;
76
77 struct sg_table *sgt;
78 struct device *dev;
79 struct nvidia_p2p_page_table *page_table;
80 enum dma_data_direction direction;
81};
82
83/*
84 * @brief
85 * Make the pages underlying a range of GPU virtual memory
86 * accessible to a third-party device.
87 *
88 * @param[in] vaddr
89 * A GPU Virtual Address
90 * @param[in] size
91 * The size of the requested mapping.
92 * Size must be a multiple of Page size.
93 * @param[out] **page_table
94 * A pointer to struct nvidia_p2p_page_table
95 * @param[in] free_callback
96 * A non-NULL pointer to the function to be invoked when the pages
97 * underlying the virtual address range are freed
98 * implicitly. Must be non NULL.
99 * @param[in] data
100 * A non-NULL opaque pointer to private data to be passed to the
101 * callback function.
102 *
103 * @return
104 * 0 upon successful completion.
105 * Negative number if any error
106 */
107int nvidia_p2p_get_pages(u64 vaddr, u64 size,
108 struct nvidia_p2p_page_table **page_table,
109 void (*free_callback)(void *data), void *data);
110/*
111 * @brief
112 * Release the pages previously made accessible to
113 * a third-party device.
114 *
115 * @param[in] *page_table
116 * A pointer to struct nvidia_p2p_page_table
117 *
118 * @return
119 * 0 upon successful completion.
120 * -ENOMEM if the driver failed to allocate memory or if
121 * insufficient resources were available to complete the operation.
122 * Negative number if any other error
123 */
124int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table);
125
126/*
127 * @brief
128 * Release the pages previously made accessible to
129 * a third-party device. This is called during the
130 * execution of the free_callback().
131 *
132 * @param[in] *page_table
133 * A pointer to struct nvidia_p2p_page_table
134 *
135 * @return
136 * 0 upon successful completion.
137 * -ENOMEM if the driver failed to allocate memory or if
138 * insufficient resources were available to complete the operation.
139 * Negative number if any other error
140 */
141int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table);
142
143/*
144 * @brief
145 * Map the pages retrieved using nvidia_p2p_get_pages and
146 * pass the dma address to a third-party device.
147 *
148 * @param[in] *dev
149 * The peer device that needs to DMA to/from the
150 * mapping.
151 * @param[in] *page_table
152 * A pointer to struct nvidia_p2p_page_table
153 * @param[out] **map
154 * A pointer to struct nvidia_p2p_dma_mapping.
155 * The DMA mapping containing the DMA addresses to use.
156 * @param[in] direction
157 * DMA direction
158 *
159 * @return
160 * 0 upon successful completion.
161 * Negative number if any other error
162 */
163int nvidia_p2p_map_pages(struct device *dev,
164 struct nvidia_p2p_page_table *page_table,
165 struct nvidia_p2p_dma_mapping **map,
166 enum dma_data_direction direction);
167/*
168 * @brief
169 * Unmap the pages previously mapped using nvidia_p2p_map_pages
170 *
171 * @param[in] *map
172 * A pointer to struct nvidia_p2p_dma_mapping.
173 * The DMA mapping containing the DMA addresses to use.
174 *
175 * @return
176 * 0 upon successful completion.
177 * Negative number if any other error
178 */
179int nvidia_p2p_unmap_pages(struct nvidia_p2p_dma_mapping *map);
180
181/*
182 * @brief
183 * Unmap the pages previously mapped using nvidia_p2p_map_pages.
184 * This is called during the execution of the free_callback().
185 *
186 * @param[in] *map
187 * A pointer to struct nvidia_p2p_dma_mapping.
188 * The DMA mapping containing the DMA addresses to use.
189 *
190 * @return
191 * 0 upon successful completion.
192 * Negative number if any other error
193 */
194int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping);
195
196#endif
diff --git a/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c b/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c
new file mode 100644
index 00000000..dcd37bee
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvidia_p2p.c
@@ -0,0 +1,296 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/slab.h>
24#include <nvgpu/linux/nvidia_p2p.h>
25
26static void nvidia_p2p_mn_release(struct mmu_notifier *mn,
27 struct mm_struct *mm)
28{
29 struct nvidia_p2p_page_table *page_table = container_of(mn,
30 struct nvidia_p2p_page_table,
31 mn);
32
33 page_table->free_callback(page_table->data);
34}
35
36static void nvidia_p2p_mn_invl_range_start(struct mmu_notifier *mn,
37 struct mm_struct *mm, unsigned long start, unsigned long end)
38{
39 struct nvidia_p2p_page_table *page_table = container_of(mn,
40 struct nvidia_p2p_page_table,
41 mn);
42 u64 vaddr = 0;
43 u64 size = 0;
44
45 vaddr = page_table->vaddr;
46 size = page_table->size;
47
48 if (vaddr >= start && vaddr <= end) {
49 mmu_notifier_unregister_no_release(&page_table->mn, page_table->mm);
50 page_table->free_callback(page_table->data);
51 }
52}
53
54static struct mmu_notifier_ops nvidia_p2p_mmu_ops = {
55 .release = nvidia_p2p_mn_release,
56 .invalidate_range_start = nvidia_p2p_mn_invl_range_start,
57};
58
59int nvidia_p2p_get_pages(u64 vaddr, u64 size,
60 struct nvidia_p2p_page_table **page_table,
61 void (*free_callback)(void *data), void *data)
62{
63 int ret = 0;
64 int user_pages = 0;
65 int locked = 0;
66 int nr_pages = size >> PAGE_SHIFT;
67 struct page **pages;
68
69 if (nr_pages <= 0) {
70 return -EINVAL;
71 }
72
73 *page_table = kzalloc(sizeof(**page_table), GFP_KERNEL);
74 if (!*page_table) {
75 return -ENOMEM;
76 }
77
78 pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
79 if (!pages) {
80 ret = -ENOMEM;
81 goto free_page_table;
82 }
83 down_read(&current->mm->mmap_sem);
84 locked = 1;
85 user_pages = get_user_pages_locked(vaddr & PAGE_MASK, nr_pages,
86 FOLL_WRITE | FOLL_FORCE,
87 pages, &locked);
88 up_read(&current->mm->mmap_sem);
89 if (user_pages != nr_pages) {
90 ret = user_pages < 0 ? user_pages : -ENOMEM;
91 goto free_pages;
92 }
93
94 (*page_table)->pages = pages;
95 (*page_table)->entries = user_pages;
96 (*page_table)->page_size = NVIDIA_P2P_PAGE_SIZE_4KB;
97 (*page_table)->size = size;
98
99 (*page_table)->mn.ops = &nvidia_p2p_mmu_ops;
100 (*page_table)->mm = current->mm;
101 (*page_table)->free_callback = free_callback;
102 (*page_table)->data = data;
103 (*page_table)->vaddr = vaddr;
104 nvgpu_mutex_init(&(*page_table)->lock);
105 (*page_table)->mapped = NVIDIA_P2P_PINNED;
106
107 ret = mmu_notifier_register(&(*page_table)->mn, (*page_table)->mm);
108 if (ret) {
109 goto free_pages;
110 }
111
112 return 0;
113free_pages:
114 while (--user_pages >= 0) {
115 put_page(pages[user_pages]);
116 }
117 kfree(pages);
118free_page_table:
119 kfree(*page_table);
120 *page_table = NULL;
121 return ret;
122}
123EXPORT_SYMBOL(nvidia_p2p_get_pages);
124
125int nvidia_p2p_put_pages(struct nvidia_p2p_page_table *page_table)
126{
127 if (!page_table) {
128 return -EINVAL;
129 }
130
131 mmu_notifier_unregister(&page_table->mn, page_table->mm);
132
133 return 0;
134}
135EXPORT_SYMBOL(nvidia_p2p_put_pages);
136
137int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table)
138{
139 int user_pages = 0;
140 struct page **pages = NULL;
141
142 if (!page_table) {
143 return 0;
144 }
145
146 nvgpu_mutex_acquire(&page_table->lock);
147
148 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
149 WARN(1, "Attempting to free unmapped pages");
150 }
151
152 if (page_table->mapped & NVIDIA_P2P_PINNED) {
153 pages = page_table->pages;
154 user_pages = page_table->entries;
155
156 while (--user_pages >= 0) {
157 put_page(pages[user_pages]);
158 }
159
160 kfree(pages);
161 page_table->mapped &= (u32)~NVIDIA_P2P_PINNED;
162 }
163
164 nvgpu_mutex_release(&page_table->lock);
165
166 return 0;
167}
168EXPORT_SYMBOL(nvidia_p2p_free_page_table);
169
170int nvidia_p2p_map_pages(struct device *dev,
171 struct nvidia_p2p_page_table *page_table,
172 struct nvidia_p2p_dma_mapping **dma_mapping,
173 enum dma_data_direction direction)
174{
175 struct sg_table *sgt = NULL;
176 struct scatterlist *sg;
177 struct page **pages = NULL;
178 u32 nr_pages = 0;
179 int ret = 0;
180 int i, count;
181
182 if (!page_table) {
183 return -EINVAL;
184 }
185
186 nvgpu_mutex_acquire(&page_table->lock);
187
188 pages = page_table->pages;
189 nr_pages = page_table->entries;
190 if (nr_pages <= 0) {
191 nvgpu_mutex_release(&page_table->lock);
192 return -EINVAL;
193 }
194
195 *dma_mapping = kzalloc(sizeof(**dma_mapping), GFP_KERNEL);
196 if (!*dma_mapping) {
197 nvgpu_mutex_release(&page_table->lock);
198 return -ENOMEM;
199 }
200 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
201 if (!sgt) {
202 ret = -ENOMEM;
203 goto free_dma_mapping;
204 }
205 ret = sg_alloc_table_from_pages(sgt, pages,
206 nr_pages, 0, page_table->size, GFP_KERNEL);
207 if (ret) {
208 goto free_sgt;
209 }
210 (*dma_mapping)->sgt = sgt;
211 (*dma_mapping)->dev = dev;
212 (*dma_mapping)->direction = direction;
213 (*dma_mapping)->page_table = page_table;
214
215 count = dma_map_sg(dev, sgt->sgl, sgt->nents, direction);
216 if (count < 1) {
217 goto free_sg_table;
218 }
219
220 (*dma_mapping)->entries = count;
221
222 (*dma_mapping)->hw_address = kcalloc(count, sizeof(u64), GFP_KERNEL);
223 if (!((*dma_mapping)->hw_address)) {
224 ret = -ENOMEM;
225 goto unmap_sg;
226 }
227 (*dma_mapping)->hw_len = kcalloc(count, sizeof(u64), GFP_KERNEL);
228 if (!((*dma_mapping)->hw_len)) {
229 ret = -ENOMEM;
230 goto free_hw_address;
231 }
232
233 for_each_sg(sgt->sgl, sg, count, i) {
234 (*dma_mapping)->hw_address[i] = sg_dma_address(sg);
235 (*dma_mapping)->hw_len[i] = sg_dma_len(sg);
236 }
237 (*dma_mapping)->page_table->mapped |= NVIDIA_P2P_MAPPED;
238 nvgpu_mutex_release(&page_table->lock);
239
240 return 0;
241free_hw_address:
242 kfree((*dma_mapping)->hw_address);
243unmap_sg:
244 dma_unmap_sg(dev, sgt->sgl,
245 sgt->nents, direction);
246free_sg_table:
247 sg_free_table(sgt);
248free_sgt:
249 kfree(sgt);
250free_dma_mapping:
251 kfree(*dma_mapping);
252 *dma_mapping = NULL;
253 nvgpu_mutex_release(&page_table->lock);
254
255 return ret;
256}
257EXPORT_SYMBOL(nvidia_p2p_map_pages);
258
259int nvidia_p2p_unmap_pages(struct nvidia_p2p_dma_mapping *dma_mapping)
260{
261 struct nvidia_p2p_page_table *page_table = NULL;
262
263 if (!dma_mapping) {
264 return -EINVAL;
265 }
266
267 page_table = dma_mapping->page_table;
268 if (!page_table) {
269 return -EFAULT;
270 }
271
272 nvgpu_mutex_acquire(&page_table->lock);
273 if (page_table->mapped & NVIDIA_P2P_MAPPED) {
274 kfree(dma_mapping->hw_len);
275 kfree(dma_mapping->hw_address);
276 if (dma_mapping->entries)
277 dma_unmap_sg(dma_mapping->dev,
278 dma_mapping->sgt->sgl,
279 dma_mapping->sgt->nents,
280 dma_mapping->direction);
281 sg_free_table(dma_mapping->sgt);
282 kfree(dma_mapping->sgt);
283 kfree(dma_mapping);
284 page_table->mapped &= (u32)~NVIDIA_P2P_MAPPED;
285 }
286 nvgpu_mutex_release(&page_table->lock);
287
288 return 0;
289}
290EXPORT_SYMBOL(nvidia_p2p_unmap_pages);
291
292int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping)
293{
294 return nvidia_p2p_unmap_pages(dma_mapping);
295}
296EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);