diff options
author | Aingara Paramakuru <aparamakuru@nvidia.com> | 2014-05-05 21:14:22 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:11:01 -0400 |
commit | 1fd722f592c2e0523c5e399a2406a4e387057188 (patch) | |
tree | 3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |
parent | 69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff) |
gpu: nvgpu: support gk20a virtualization
The nvgpu driver now supports using the Tegra graphics virtualization
interfaces to support gk20a in a virtualized environment.
Bug 1509608
Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/440122
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/mm_vgpu.c')
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c new file mode 100644 index 00000000..6ed1dece --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Memory Management | ||
3 | * | ||
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/dma-mapping.h> | ||
17 | #include "vgpu/vgpu.h" | ||
18 | |||
19 | /* note: keep the page sizes sorted lowest to highest here */ | ||
20 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
21 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
22 | |||
23 | static int vgpu_init_mm_setup_sw(struct gk20a *g) | ||
24 | { | ||
25 | struct mm_gk20a *mm = &g->mm; | ||
26 | |||
27 | gk20a_dbg_fn(""); | ||
28 | |||
29 | if (mm->sw_ready) { | ||
30 | gk20a_dbg_fn("skip init"); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | mm->g = g; | ||
35 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
36 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
37 | mm->pde_stride = mm->big_page_size << 10; | ||
38 | mm->pde_stride_shift = ilog2(mm->pde_stride); | ||
39 | BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ | ||
40 | |||
41 | /*TBD: make channel vm size configurable */ | ||
42 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | ||
43 | |||
44 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | ||
45 | |||
46 | mm->sw_ready = true; | ||
47 | |||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | int vgpu_init_mm_support(struct gk20a *g) | ||
52 | { | ||
53 | gk20a_dbg_fn(""); | ||
54 | |||
55 | return vgpu_init_mm_setup_sw(g); | ||
56 | } | ||
57 | |||
58 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | ||
59 | u64 map_offset, | ||
60 | struct sg_table *sgt, | ||
61 | u64 buffer_offset, | ||
62 | u64 size, | ||
63 | int pgsz_idx, | ||
64 | u8 kind_v, | ||
65 | u32 ctag_offset, | ||
66 | u32 flags, | ||
67 | int rw_flag, | ||
68 | bool clear_ctags) | ||
69 | { | ||
70 | int err = 0; | ||
71 | struct device *d = dev_from_vm(vm); | ||
72 | struct gk20a *g = gk20a_from_vm(vm); | ||
73 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
74 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | ||
75 | struct tegra_vgpu_cmd_msg msg; | ||
76 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
77 | u64 addr = gk20a_mm_iova_addr(sgt->sgl); | ||
78 | u8 prot; | ||
79 | |||
80 | gk20a_dbg_fn(""); | ||
81 | |||
82 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
83 | if (!map_offset) { | ||
84 | map_offset = gk20a_vm_alloc_va(vm, size, | ||
85 | pgsz_idx); | ||
86 | if (!map_offset) { | ||
87 | gk20a_err(d, "failed to allocate va space"); | ||
88 | err = -ENOMEM; | ||
89 | goto fail; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | if (rw_flag == gk20a_mem_flag_read_only) | ||
94 | prot = TEGRA_VGPU_MAP_PROT_READ_ONLY; | ||
95 | else if (rw_flag == gk20a_mem_flag_write_only) | ||
96 | prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY; | ||
97 | else | ||
98 | prot = TEGRA_VGPU_MAP_PROT_NONE; | ||
99 | |||
100 | msg.cmd = TEGRA_VGPU_CMD_AS_MAP; | ||
101 | msg.handle = platform->virt_handle; | ||
102 | p->handle = vm->handle; | ||
103 | p->addr = addr; | ||
104 | p->gpu_va = map_offset; | ||
105 | p->size = size; | ||
106 | p->pgsz_idx = pgsz_idx; | ||
107 | p->iova = mapping ? 1 : 0; | ||
108 | p->kind = kind_v; | ||
109 | p->cacheable = | ||
110 | (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0; | ||
111 | p->prot = prot; | ||
112 | p->ctag_offset = ctag_offset; | ||
113 | p->clear_ctags = clear_ctags; | ||
114 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
115 | if (err || msg.ret) | ||
116 | goto fail; | ||
117 | |||
118 | vm->tlb_dirty = true; | ||
119 | return map_offset; | ||
120 | fail: | ||
121 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
126 | u64 vaddr, | ||
127 | u64 size, | ||
128 | int pgsz_idx, | ||
129 | bool va_allocated, | ||
130 | int rw_flag) | ||
131 | { | ||
132 | struct gk20a *g = gk20a_from_vm(vm); | ||
133 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
134 | struct tegra_vgpu_cmd_msg msg; | ||
135 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
136 | int err; | ||
137 | |||
138 | gk20a_dbg_fn(""); | ||
139 | |||
140 | if (va_allocated) { | ||
141 | err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); | ||
142 | if (err) { | ||
143 | dev_err(dev_from_vm(vm), | ||
144 | "failed to free va"); | ||
145 | return; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP; | ||
150 | msg.handle = platform->virt_handle; | ||
151 | p->handle = vm->handle; | ||
152 | p->gpu_va = vaddr; | ||
153 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
154 | if (err || msg.ret) | ||
155 | dev_err(dev_from_vm(vm), | ||
156 | "failed to update gmmu ptes on unmap"); | ||
157 | |||
158 | vm->tlb_dirty = true; | ||
159 | } | ||
160 | |||
161 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) | ||
162 | { | ||
163 | struct gk20a *g = vm->mm->g; | ||
164 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
165 | struct mapped_buffer_node *mapped_buffer; | ||
166 | struct vm_reserved_va_node *va_node, *va_node_tmp; | ||
167 | struct tegra_vgpu_cmd_msg msg; | ||
168 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | ||
169 | struct rb_node *node; | ||
170 | int err; | ||
171 | |||
172 | gk20a_dbg_fn(""); | ||
173 | mutex_lock(&vm->update_gmmu_lock); | ||
174 | |||
175 | /* TBD: add a flag here for the unmap code to recognize teardown | ||
176 | * and short-circuit any otherwise expensive operations. */ | ||
177 | |||
178 | node = rb_first(&vm->mapped_buffers); | ||
179 | while (node) { | ||
180 | mapped_buffer = | ||
181 | container_of(node, struct mapped_buffer_node, node); | ||
182 | gk20a_vm_unmap_locked(mapped_buffer); | ||
183 | node = rb_first(&vm->mapped_buffers); | ||
184 | } | ||
185 | |||
186 | /* destroy remaining reserved memory areas */ | ||
187 | list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, | ||
188 | reserved_va_list) { | ||
189 | list_del(&va_node->reserved_va_list); | ||
190 | kfree(va_node); | ||
191 | } | ||
192 | |||
193 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; | ||
194 | msg.handle = platform->virt_handle; | ||
195 | p->handle = vm->handle; | ||
196 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
197 | WARN_ON(err || msg.ret); | ||
198 | |||
199 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
200 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
201 | |||
202 | mutex_unlock(&vm->update_gmmu_lock); | ||
203 | |||
204 | /* release zero page if used */ | ||
205 | if (vm->zero_page_cpuva) | ||
206 | dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, | ||
207 | vm->zero_page_cpuva, vm->zero_page_iova); | ||
208 | |||
209 | /* vm is not used anymore. release it. */ | ||
210 | kfree(vm); | ||
211 | } | ||
212 | |||
213 | u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size) | ||
214 | { | ||
215 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
216 | struct dma_iommu_mapping *mapping = | ||
217 | to_dma_iommu_mapping(dev_from_gk20a(g)); | ||
218 | u64 addr = gk20a_mm_iova_addr((*sgt)->sgl); | ||
219 | struct tegra_vgpu_cmd_msg msg; | ||
220 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | ||
221 | int err; | ||
222 | |||
223 | msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1; | ||
224 | msg.handle = platform->virt_handle; | ||
225 | p->addr = addr; | ||
226 | p->size = size; | ||
227 | p->iova = mapping ? 1 : 0; | ||
228 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
229 | if (err || msg.ret) | ||
230 | addr = 0; | ||
231 | else | ||
232 | addr = p->gpu_va; | ||
233 | |||
234 | return addr; | ||
235 | } | ||
236 | |||
237 | /* address space interfaces for the gk20a module */ | ||
238 | static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share) | ||
239 | { | ||
240 | struct gk20a_as *as = as_share->as; | ||
241 | struct gk20a *g = gk20a_from_as(as); | ||
242 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
243 | struct tegra_vgpu_cmd_msg msg; | ||
244 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | ||
245 | struct mm_gk20a *mm = &g->mm; | ||
246 | struct vm_gk20a *vm; | ||
247 | u64 vma_size; | ||
248 | u32 num_pages, low_hole_pages; | ||
249 | char name[32]; | ||
250 | int err; | ||
251 | |||
252 | gk20a_dbg_fn(""); | ||
253 | |||
254 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | ||
255 | if (!vm) | ||
256 | return -ENOMEM; | ||
257 | |||
258 | as_share->vm = vm; | ||
259 | |||
260 | vm->mm = mm; | ||
261 | vm->as_share = as_share; | ||
262 | |||
263 | vm->big_pages = true; | ||
264 | |||
265 | vm->va_start = mm->pde_stride; /* create a one pde hole */ | ||
266 | vm->va_limit = mm->channel.size; /* note this means channel.size is | ||
267 | really just the max */ | ||
268 | |||
269 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; | ||
270 | msg.handle = platform->virt_handle; | ||
271 | p->size = vm->va_limit; | ||
272 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
273 | if (err || msg.ret) | ||
274 | return -ENOMEM; | ||
275 | |||
276 | vm->handle = p->handle; | ||
277 | |||
278 | /* low-half: alloc small pages */ | ||
279 | /* high-half: alloc big pages */ | ||
280 | vma_size = mm->channel.size >> 1; | ||
281 | |||
282 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
283 | gmmu_page_sizes[gmmu_page_size_small]>>10); | ||
284 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); | ||
285 | |||
286 | /* num_pages above is without regard to the low-side hole. */ | ||
287 | low_hole_pages = (vm->va_start >> | ||
288 | gmmu_page_shifts[gmmu_page_size_small]); | ||
289 | |||
290 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, | ||
291 | low_hole_pages, /* start */ | ||
292 | num_pages - low_hole_pages, /* length */ | ||
293 | 1); /* align */ | ||
294 | |||
295 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
296 | gmmu_page_sizes[gmmu_page_size_big]>>10); | ||
297 | |||
298 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); | ||
299 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, | ||
300 | num_pages, /* start */ | ||
301 | num_pages, /* length */ | ||
302 | 1); /* align */ | ||
303 | |||
304 | vm->mapped_buffers = RB_ROOT; | ||
305 | |||
306 | mutex_init(&vm->update_gmmu_lock); | ||
307 | kref_init(&vm->ref); | ||
308 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
309 | |||
310 | vm->enable_ctag = true; | ||
311 | |||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share, | ||
316 | struct channel_gk20a *ch) | ||
317 | { | ||
318 | struct vm_gk20a *vm = as_share->vm; | ||
319 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
320 | struct tegra_vgpu_cmd_msg msg; | ||
321 | struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share; | ||
322 | int err; | ||
323 | |||
324 | gk20a_dbg_fn(""); | ||
325 | |||
326 | ch->vm = vm; | ||
327 | msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE; | ||
328 | msg.handle = platform->virt_handle; | ||
329 | p->as_handle = vm->handle; | ||
330 | p->chan_handle = ch->virt_ctx; | ||
331 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
332 | |||
333 | if (err || msg.ret) { | ||
334 | ch->vm = NULL; | ||
335 | err = -ENOMEM; | ||
336 | } | ||
337 | |||
338 | return err; | ||
339 | } | ||
340 | |||
341 | static void vgpu_cache_maint(u64 handle, u8 op) | ||
342 | { | ||
343 | struct tegra_vgpu_cmd_msg msg; | ||
344 | struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint; | ||
345 | int err; | ||
346 | |||
347 | msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT; | ||
348 | msg.handle = handle; | ||
349 | p->op = op; | ||
350 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
351 | WARN_ON(err || msg.ret); | ||
352 | } | ||
353 | |||
354 | static int vgpu_mm_fb_flush(struct gk20a *g) | ||
355 | { | ||
356 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
357 | |||
358 | gk20a_dbg_fn(""); | ||
359 | |||
360 | vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | static void vgpu_mm_l2_invalidate(struct gk20a *g) | ||
365 | { | ||
366 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
367 | |||
368 | gk20a_dbg_fn(""); | ||
369 | |||
370 | vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV); | ||
371 | } | ||
372 | |||
373 | static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate) | ||
374 | { | ||
375 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
376 | u8 op; | ||
377 | |||
378 | gk20a_dbg_fn(""); | ||
379 | |||
380 | if (invalidate) | ||
381 | op = TEGRA_VGPU_L2_MAINT_FLUSH_INV; | ||
382 | else | ||
383 | op = TEGRA_VGPU_L2_MAINT_FLUSH; | ||
384 | |||
385 | vgpu_cache_maint(platform->virt_handle, op); | ||
386 | } | ||
387 | |||
388 | static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm) | ||
389 | { | ||
390 | struct gk20a *g = gk20a_from_vm(vm); | ||
391 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
392 | struct tegra_vgpu_cmd_msg msg; | ||
393 | struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate; | ||
394 | int err; | ||
395 | |||
396 | gk20a_dbg_fn(""); | ||
397 | |||
398 | /* No need to invalidate if tlb is clean */ | ||
399 | mutex_lock(&vm->update_gmmu_lock); | ||
400 | if (!vm->tlb_dirty) { | ||
401 | mutex_unlock(&vm->update_gmmu_lock); | ||
402 | return; | ||
403 | } | ||
404 | |||
405 | msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; | ||
406 | msg.handle = platform->virt_handle; | ||
407 | p->handle = vm->handle; | ||
408 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
409 | WARN_ON(err || msg.ret); | ||
410 | vm->tlb_dirty = false; | ||
411 | mutex_unlock(&vm->update_gmmu_lock); | ||
412 | } | ||
413 | |||
414 | void vgpu_init_mm_ops(struct gpu_ops *gops) | ||
415 | { | ||
416 | gops->mm.gmmu_map = vgpu_locked_gmmu_map; | ||
417 | gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap; | ||
418 | gops->mm.vm_remove = vgpu_vm_remove_support; | ||
419 | gops->mm.vm_alloc_share = vgpu_vm_alloc_share; | ||
420 | gops->mm.vm_bind_channel = vgpu_vm_bind_channel; | ||
421 | gops->mm.fb_flush = vgpu_mm_fb_flush; | ||
422 | gops->mm.l2_invalidate = vgpu_mm_l2_invalidate; | ||
423 | gops->mm.l2_flush = vgpu_mm_l2_flush; | ||
424 | gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate; | ||
425 | } | ||