summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2014-05-05 21:14:22 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:01 -0400
commit1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)
gpu: nvgpu: support gk20a virtualization
The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/mm_vgpu.c')
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c425
1 files changed, 425 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
new file mode 100644
index 00000000..6ed1dece
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -0,0 +1,425 @@
1/*
2 * Virtualized GPU Memory Management
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18
19/* note: keep the page sizes sorted lowest to highest here */
20static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
22
23static int vgpu_init_mm_setup_sw(struct gk20a *g)
24{
25 struct mm_gk20a *mm = &g->mm;
26
27 gk20a_dbg_fn("");
28
29 if (mm->sw_ready) {
30 gk20a_dbg_fn("skip init");
31 return 0;
32 }
33
34 mm->g = g;
35 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
36 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
37 mm->pde_stride = mm->big_page_size << 10;
38 mm->pde_stride_shift = ilog2(mm->pde_stride);
39 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
40
41 /*TBD: make channel vm size configurable */
42 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
43
44 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
45
46 mm->sw_ready = true;
47
48 return 0;
49}
50
51int vgpu_init_mm_support(struct gk20a *g)
52{
53 gk20a_dbg_fn("");
54
55 return vgpu_init_mm_setup_sw(g);
56}
57
58static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
59 u64 map_offset,
60 struct sg_table *sgt,
61 u64 buffer_offset,
62 u64 size,
63 int pgsz_idx,
64 u8 kind_v,
65 u32 ctag_offset,
66 u32 flags,
67 int rw_flag,
68 bool clear_ctags)
69{
70 int err = 0;
71 struct device *d = dev_from_vm(vm);
72 struct gk20a *g = gk20a_from_vm(vm);
73 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
74 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
77 u64 addr = gk20a_mm_iova_addr(sgt->sgl);
78 u8 prot;
79
80 gk20a_dbg_fn("");
81
82 /* Allocate (or validate when map_offset != 0) the virtual address. */
83 if (!map_offset) {
84 map_offset = gk20a_vm_alloc_va(vm, size,
85 pgsz_idx);
86 if (!map_offset) {
87 gk20a_err(d, "failed to allocate va space");
88 err = -ENOMEM;
89 goto fail;
90 }
91 }
92
93 if (rw_flag == gk20a_mem_flag_read_only)
94 prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
95 else if (rw_flag == gk20a_mem_flag_write_only)
96 prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
97 else
98 prot = TEGRA_VGPU_MAP_PROT_NONE;
99
100 msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
101 msg.handle = platform->virt_handle;
102 p->handle = vm->handle;
103 p->addr = addr;
104 p->gpu_va = map_offset;
105 p->size = size;
106 p->pgsz_idx = pgsz_idx;
107 p->iova = mapping ? 1 : 0;
108 p->kind = kind_v;
109 p->cacheable =
110 (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
111 p->prot = prot;
112 p->ctag_offset = ctag_offset;
113 p->clear_ctags = clear_ctags;
114 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
115 if (err || msg.ret)
116 goto fail;
117
118 vm->tlb_dirty = true;
119 return map_offset;
120fail:
121 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
122 return 0;
123}
124
125static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
126 u64 vaddr,
127 u64 size,
128 int pgsz_idx,
129 bool va_allocated,
130 int rw_flag)
131{
132 struct gk20a *g = gk20a_from_vm(vm);
133 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 if (va_allocated) {
141 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
142 if (err) {
143 dev_err(dev_from_vm(vm),
144 "failed to free va");
145 return;
146 }
147 }
148
149 msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
150 msg.handle = platform->virt_handle;
151 p->handle = vm->handle;
152 p->gpu_va = vaddr;
153 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
154 if (err || msg.ret)
155 dev_err(dev_from_vm(vm),
156 "failed to update gmmu ptes on unmap");
157
158 vm->tlb_dirty = true;
159}
160
161static void vgpu_vm_remove_support(struct vm_gk20a *vm)
162{
163 struct gk20a *g = vm->mm->g;
164 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
165 struct mapped_buffer_node *mapped_buffer;
166 struct vm_reserved_va_node *va_node, *va_node_tmp;
167 struct tegra_vgpu_cmd_msg msg;
168 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
169 struct rb_node *node;
170 int err;
171
172 gk20a_dbg_fn("");
173 mutex_lock(&vm->update_gmmu_lock);
174
175 /* TBD: add a flag here for the unmap code to recognize teardown
176 * and short-circuit any otherwise expensive operations. */
177
178 node = rb_first(&vm->mapped_buffers);
179 while (node) {
180 mapped_buffer =
181 container_of(node, struct mapped_buffer_node, node);
182 gk20a_vm_unmap_locked(mapped_buffer);
183 node = rb_first(&vm->mapped_buffers);
184 }
185
186 /* destroy remaining reserved memory areas */
187 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
188 reserved_va_list) {
189 list_del(&va_node->reserved_va_list);
190 kfree(va_node);
191 }
192
193 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
194 msg.handle = platform->virt_handle;
195 p->handle = vm->handle;
196 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
197 WARN_ON(err || msg.ret);
198
199 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
200 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
201
202 mutex_unlock(&vm->update_gmmu_lock);
203
204 /* release zero page if used */
205 if (vm->zero_page_cpuva)
206 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
207 vm->zero_page_cpuva, vm->zero_page_iova);
208
209 /* vm is not used anymore. release it. */
210 kfree(vm);
211}
212
213u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
214{
215 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
216 struct dma_iommu_mapping *mapping =
217 to_dma_iommu_mapping(dev_from_gk20a(g));
218 u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
219 struct tegra_vgpu_cmd_msg msg;
220 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
221 int err;
222
223 msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
224 msg.handle = platform->virt_handle;
225 p->addr = addr;
226 p->size = size;
227 p->iova = mapping ? 1 : 0;
228 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
229 if (err || msg.ret)
230 addr = 0;
231 else
232 addr = p->gpu_va;
233
234 return addr;
235}
236
237/* address space interfaces for the gk20a module */
238static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
239{
240 struct gk20a_as *as = as_share->as;
241 struct gk20a *g = gk20a_from_as(as);
242 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
243 struct tegra_vgpu_cmd_msg msg;
244 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
245 struct mm_gk20a *mm = &g->mm;
246 struct vm_gk20a *vm;
247 u64 vma_size;
248 u32 num_pages, low_hole_pages;
249 char name[32];
250 int err;
251
252 gk20a_dbg_fn("");
253
254 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
255 if (!vm)
256 return -ENOMEM;
257
258 as_share->vm = vm;
259
260 vm->mm = mm;
261 vm->as_share = as_share;
262
263 vm->big_pages = true;
264
265 vm->va_start = mm->pde_stride; /* create a one pde hole */
266 vm->va_limit = mm->channel.size; /* note this means channel.size is
267 really just the max */
268
269 msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
270 msg.handle = platform->virt_handle;
271 p->size = vm->va_limit;
272 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
273 if (err || msg.ret)
274 return -ENOMEM;
275
276 vm->handle = p->handle;
277
278 /* low-half: alloc small pages */
279 /* high-half: alloc big pages */
280 vma_size = mm->channel.size >> 1;
281
282 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
283 gmmu_page_sizes[gmmu_page_size_small]>>10);
284 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
285
286 /* num_pages above is without regard to the low-side hole. */
287 low_hole_pages = (vm->va_start >>
288 gmmu_page_shifts[gmmu_page_size_small]);
289
290 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
291 low_hole_pages, /* start */
292 num_pages - low_hole_pages, /* length */
293 1); /* align */
294
295 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
296 gmmu_page_sizes[gmmu_page_size_big]>>10);
297
298 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
299 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
300 num_pages, /* start */
301 num_pages, /* length */
302 1); /* align */
303
304 vm->mapped_buffers = RB_ROOT;
305
306 mutex_init(&vm->update_gmmu_lock);
307 kref_init(&vm->ref);
308 INIT_LIST_HEAD(&vm->reserved_va_list);
309
310 vm->enable_ctag = true;
311
312 return 0;
313}
314
315static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
316 struct channel_gk20a *ch)
317{
318 struct vm_gk20a *vm = as_share->vm;
319 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
320 struct tegra_vgpu_cmd_msg msg;
321 struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
322 int err;
323
324 gk20a_dbg_fn("");
325
326 ch->vm = vm;
327 msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
328 msg.handle = platform->virt_handle;
329 p->as_handle = vm->handle;
330 p->chan_handle = ch->virt_ctx;
331 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332
333 if (err || msg.ret) {
334 ch->vm = NULL;
335 err = -ENOMEM;
336 }
337
338 return err;
339}
340
341static void vgpu_cache_maint(u64 handle, u8 op)
342{
343 struct tegra_vgpu_cmd_msg msg;
344 struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
345 int err;
346
347 msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
348 msg.handle = handle;
349 p->op = op;
350 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
351 WARN_ON(err || msg.ret);
352}
353
354static int vgpu_mm_fb_flush(struct gk20a *g)
355{
356 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
357
358 gk20a_dbg_fn("");
359
360 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
361 return 0;
362}
363
364static void vgpu_mm_l2_invalidate(struct gk20a *g)
365{
366 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
367
368 gk20a_dbg_fn("");
369
370 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
371}
372
373static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
374{
375 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
376 u8 op;
377
378 gk20a_dbg_fn("");
379
380 if (invalidate)
381 op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
382 else
383 op = TEGRA_VGPU_L2_MAINT_FLUSH;
384
385 vgpu_cache_maint(platform->virt_handle, op);
386}
387
388static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
389{
390 struct gk20a *g = gk20a_from_vm(vm);
391 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
392 struct tegra_vgpu_cmd_msg msg;
393 struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
394 int err;
395
396 gk20a_dbg_fn("");
397
398 /* No need to invalidate if tlb is clean */
399 mutex_lock(&vm->update_gmmu_lock);
400 if (!vm->tlb_dirty) {
401 mutex_unlock(&vm->update_gmmu_lock);
402 return;
403 }
404
405 msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
406 msg.handle = platform->virt_handle;
407 p->handle = vm->handle;
408 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
409 WARN_ON(err || msg.ret);
410 vm->tlb_dirty = false;
411 mutex_unlock(&vm->update_gmmu_lock);
412}
413
414void vgpu_init_mm_ops(struct gpu_ops *gops)
415{
416 gops->mm.gmmu_map = vgpu_locked_gmmu_map;
417 gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
418 gops->mm.vm_remove = vgpu_vm_remove_support;
419 gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
420 gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
421 gops->mm.fb_flush = vgpu_mm_fb_flush;
422 gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
423 gops->mm.l2_flush = vgpu_mm_l2_flush;
424 gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
425}