diff options
author | Arto Merilainen <amerilainen@nvidia.com> | 2014-03-19 03:38:25 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:08:53 -0400 |
commit | a9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch) | |
tree | cc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | 61efaf843c22b85424036ec98015121c08f5f16c (diff) |
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location.
Bug 1482562
Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/383722
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 2984 |
1 files changed, 2984 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c new file mode 100644 index 00000000..b22df5e8 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -0,0 +1,2984 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/mm_gk20a.c | ||
3 | * | ||
4 | * GK20A memory management | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/delay.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <linux/log2.h> | ||
25 | #include <linux/nvhost.h> | ||
26 | #include <linux/pm_runtime.h> | ||
27 | #include <linux/scatterlist.h> | ||
28 | #include <linux/nvmap.h> | ||
29 | #include <linux/tegra-soc.h> | ||
30 | #include <linux/vmalloc.h> | ||
31 | #include <linux/dma-buf.h> | ||
32 | #include <asm/cacheflush.h> | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "mm_gk20a.h" | ||
36 | #include "hw_gmmu_gk20a.h" | ||
37 | #include "hw_fb_gk20a.h" | ||
38 | #include "hw_bus_gk20a.h" | ||
39 | #include "hw_ram_gk20a.h" | ||
40 | #include "hw_mc_gk20a.h" | ||
41 | #include "hw_flush_gk20a.h" | ||
42 | #include "hw_ltc_gk20a.h" | ||
43 | |||
44 | #include "kind_gk20a.h" | ||
45 | |||
46 | #ifdef CONFIG_ARM64 | ||
47 | #define outer_flush_range(a, b) | ||
48 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
49 | #endif | ||
50 | |||
51 | /* | ||
52 | * GPU mapping life cycle | ||
53 | * ====================== | ||
54 | * | ||
55 | * Kernel mappings | ||
56 | * --------------- | ||
57 | * | ||
58 | * Kernel mappings are created through vm.map(..., false): | ||
59 | * | ||
60 | * - Mappings to the same allocations are reused and refcounted. | ||
61 | * - This path does not support deferred unmapping (i.e. kernel must wait for | ||
62 | * all hw operations on the buffer to complete before unmapping). | ||
63 | * - References to dmabuf are owned and managed by the (kernel) clients of | ||
64 | * the gk20a_vm layer. | ||
65 | * | ||
66 | * | ||
67 | * User space mappings | ||
68 | * ------------------- | ||
69 | * | ||
70 | * User space mappings are created through as.map_buffer -> vm.map(..., true): | ||
71 | * | ||
72 | * - Mappings to the same allocations are reused and refcounted. | ||
73 | * - This path supports deferred unmapping (i.e. we delay the actual unmapping | ||
74 | * until all hw operations have completed). | ||
75 | * - References to dmabuf are owned and managed by the vm_gk20a | ||
76 | * layer itself. vm.map acquires these refs, and sets | ||
77 | * mapped_buffer->own_mem_ref to record that we must release the refs when we | ||
78 | * actually unmap. | ||
79 | * | ||
80 | */ | ||
81 | |||
82 | static inline int vm_aspace_id(struct vm_gk20a *vm) | ||
83 | { | ||
84 | /* -1 is bar1 or pmu, etc. */ | ||
85 | return vm->as_share ? vm->as_share->id : -1; | ||
86 | } | ||
87 | static inline u32 hi32(u64 f) | ||
88 | { | ||
89 | return (u32)(f >> 32); | ||
90 | } | ||
91 | static inline u32 lo32(u64 f) | ||
92 | { | ||
93 | return (u32)(f & 0xffffffff); | ||
94 | } | ||
95 | |||
96 | #define FLUSH_CPU_DCACHE(va, pa, size) \ | ||
97 | do { \ | ||
98 | __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ | ||
99 | outer_flush_range(pa, pa + (size_t)(size)); \ | ||
100 | } while (0) | ||
101 | |||
102 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | ||
103 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
104 | struct rb_root *root, u64 addr); | ||
105 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
106 | struct rb_root *root, struct dma_buf *dmabuf, | ||
107 | u32 kind); | ||
108 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
109 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
110 | struct sg_table *sgt, | ||
111 | u64 first_vaddr, u64 last_vaddr, | ||
112 | u8 kind_v, u32 ctag_offset, bool cacheable, | ||
113 | int rw_flag); | ||
114 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | ||
115 | static void gk20a_vm_remove_support(struct vm_gk20a *vm); | ||
116 | |||
117 | |||
118 | /* note: keep the page sizes sorted lowest to highest here */ | ||
119 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
120 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
121 | static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, | ||
122 | 0x1ffffLL }; | ||
123 | static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | ||
124 | |||
125 | struct gk20a_comptags { | ||
126 | u32 offset; | ||
127 | u32 lines; | ||
128 | }; | ||
129 | |||
130 | struct gk20a_dmabuf_priv { | ||
131 | struct mutex lock; | ||
132 | |||
133 | struct gk20a_allocator *comptag_allocator; | ||
134 | struct gk20a_comptags comptags; | ||
135 | |||
136 | struct dma_buf_attachment *attach; | ||
137 | struct sg_table *sgt; | ||
138 | |||
139 | int pin_count; | ||
140 | }; | ||
141 | |||
142 | static void gk20a_mm_delete_priv(void *_priv) | ||
143 | { | ||
144 | struct gk20a_dmabuf_priv *priv = _priv; | ||
145 | if (!priv) | ||
146 | return; | ||
147 | |||
148 | if (priv->comptags.lines) { | ||
149 | BUG_ON(!priv->comptag_allocator); | ||
150 | priv->comptag_allocator->free(priv->comptag_allocator, | ||
151 | priv->comptags.offset, | ||
152 | priv->comptags.lines); | ||
153 | } | ||
154 | |||
155 | kfree(priv); | ||
156 | } | ||
157 | |||
158 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf) | ||
159 | { | ||
160 | struct gk20a_dmabuf_priv *priv; | ||
161 | |||
162 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
163 | if (WARN_ON(!priv)) | ||
164 | return ERR_PTR(-EINVAL); | ||
165 | |||
166 | mutex_lock(&priv->lock); | ||
167 | |||
168 | if (priv->pin_count == 0) { | ||
169 | priv->attach = dma_buf_attach(dmabuf, dev); | ||
170 | if (IS_ERR(priv->attach)) { | ||
171 | mutex_unlock(&priv->lock); | ||
172 | return (struct sg_table *)priv->attach; | ||
173 | } | ||
174 | |||
175 | priv->sgt = dma_buf_map_attachment(priv->attach, | ||
176 | DMA_BIDIRECTIONAL); | ||
177 | if (IS_ERR(priv->sgt)) { | ||
178 | dma_buf_detach(dmabuf, priv->attach); | ||
179 | mutex_unlock(&priv->lock); | ||
180 | return priv->sgt; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | priv->pin_count++; | ||
185 | mutex_unlock(&priv->lock); | ||
186 | return priv->sgt; | ||
187 | } | ||
188 | |||
189 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
190 | struct sg_table *sgt) | ||
191 | { | ||
192 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
193 | dma_addr_t dma_addr; | ||
194 | |||
195 | if (IS_ERR(priv) || !priv) | ||
196 | return; | ||
197 | |||
198 | mutex_lock(&priv->lock); | ||
199 | WARN_ON(priv->sgt != sgt); | ||
200 | priv->pin_count--; | ||
201 | WARN_ON(priv->pin_count < 0); | ||
202 | dma_addr = sg_dma_address(priv->sgt->sgl); | ||
203 | if (priv->pin_count == 0) { | ||
204 | dma_buf_unmap_attachment(priv->attach, priv->sgt, | ||
205 | DMA_BIDIRECTIONAL); | ||
206 | dma_buf_detach(dmabuf, priv->attach); | ||
207 | } | ||
208 | mutex_unlock(&priv->lock); | ||
209 | } | ||
210 | |||
211 | |||
212 | static void gk20a_get_comptags(struct device *dev, | ||
213 | struct dma_buf *dmabuf, | ||
214 | struct gk20a_comptags *comptags) | ||
215 | { | ||
216 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
217 | |||
218 | if (!comptags) | ||
219 | return; | ||
220 | |||
221 | if (!priv) { | ||
222 | comptags->lines = 0; | ||
223 | comptags->offset = 0; | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | *comptags = priv->comptags; | ||
228 | } | ||
229 | |||
230 | static int gk20a_alloc_comptags(struct device *dev, | ||
231 | struct dma_buf *dmabuf, | ||
232 | struct gk20a_allocator *allocator, | ||
233 | int lines) | ||
234 | { | ||
235 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
236 | u32 offset = 0; | ||
237 | int err; | ||
238 | |||
239 | if (!priv) | ||
240 | return -ENOSYS; | ||
241 | |||
242 | if (!lines) | ||
243 | return -EINVAL; | ||
244 | |||
245 | /* store the allocator so we can use it when we free the ctags */ | ||
246 | priv->comptag_allocator = allocator; | ||
247 | err = allocator->alloc(allocator, &offset, lines); | ||
248 | if (!err) { | ||
249 | priv->comptags.lines = lines; | ||
250 | priv->comptags.offset = offset; | ||
251 | } | ||
252 | return err; | ||
253 | } | ||
254 | |||
255 | |||
256 | |||
257 | |||
258 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | ||
259 | { | ||
260 | gk20a_dbg_fn(""); | ||
261 | if (g->ops.fb.reset) | ||
262 | g->ops.fb.reset(g); | ||
263 | |||
264 | if (g->ops.fb.init_fs_state) | ||
265 | g->ops.fb.init_fs_state(g); | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | void gk20a_remove_mm_support(struct mm_gk20a *mm) | ||
271 | { | ||
272 | struct gk20a *g = mm->g; | ||
273 | struct device *d = dev_from_gk20a(g); | ||
274 | struct vm_gk20a *vm = &mm->bar1.vm; | ||
275 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
276 | |||
277 | gk20a_dbg_fn(""); | ||
278 | |||
279 | if (inst_block->cpuva) | ||
280 | dma_free_coherent(d, inst_block->size, | ||
281 | inst_block->cpuva, inst_block->iova); | ||
282 | inst_block->cpuva = NULL; | ||
283 | inst_block->iova = 0; | ||
284 | |||
285 | gk20a_vm_remove_support(vm); | ||
286 | } | ||
287 | |||
288 | int gk20a_init_mm_setup_sw(struct gk20a *g) | ||
289 | { | ||
290 | struct mm_gk20a *mm = &g->mm; | ||
291 | int i; | ||
292 | |||
293 | gk20a_dbg_fn(""); | ||
294 | |||
295 | if (mm->sw_ready) { | ||
296 | gk20a_dbg_fn("skip init"); | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | mm->g = g; | ||
301 | mutex_init(&mm->tlb_lock); | ||
302 | mutex_init(&mm->l2_op_lock); | ||
303 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
304 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
305 | mm->pde_stride = mm->big_page_size << 10; | ||
306 | mm->pde_stride_shift = ilog2(mm->pde_stride); | ||
307 | BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ | ||
308 | |||
309 | for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) { | ||
310 | |||
311 | u32 num_ptes, pte_space, num_pages; | ||
312 | |||
313 | /* assuming "full" page tables */ | ||
314 | num_ptes = mm->pde_stride / gmmu_page_sizes[i]; | ||
315 | |||
316 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
317 | /* allocate whole pages */ | ||
318 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
319 | |||
320 | num_pages = pte_space / PAGE_SIZE; | ||
321 | /* make sure "order" is viable */ | ||
322 | BUG_ON(!is_power_of_2(num_pages)); | ||
323 | |||
324 | mm->page_table_sizing[i].num_ptes = num_ptes; | ||
325 | mm->page_table_sizing[i].order = ilog2(num_pages); | ||
326 | } | ||
327 | |||
328 | /*TBD: make channel vm size configurable */ | ||
329 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | ||
330 | |||
331 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | ||
332 | |||
333 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | ||
334 | gmmu_page_sizes[gmmu_page_size_small] >> 10, | ||
335 | (mm->page_table_sizing[gmmu_page_size_small].num_ptes * | ||
336 | gmmu_pte__size_v()) >> 10); | ||
337 | |||
338 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
339 | gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
340 | (mm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
341 | gmmu_pte__size_v()) >> 10); | ||
342 | |||
343 | |||
344 | gk20a_init_bar1_vm(mm); | ||
345 | |||
346 | mm->remove_support = gk20a_remove_mm_support; | ||
347 | mm->sw_ready = true; | ||
348 | |||
349 | gk20a_dbg_fn("done"); | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | /* make sure gk20a_init_mm_support is called before */ | ||
354 | static int gk20a_init_mm_setup_hw(struct gk20a *g) | ||
355 | { | ||
356 | struct mm_gk20a *mm = &g->mm; | ||
357 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
358 | phys_addr_t inst_pa = inst_block->cpu_pa; | ||
359 | |||
360 | gk20a_dbg_fn(""); | ||
361 | |||
362 | /* set large page size in fb | ||
363 | * note this is very early on, can we defer it ? */ | ||
364 | { | ||
365 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
366 | |||
367 | if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K) | ||
368 | fb_mmu_ctrl = (fb_mmu_ctrl & | ||
369 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | | ||
370 | fb_mmu_ctrl_vm_pg_size_128kb_f(); | ||
371 | else | ||
372 | BUG_ON(1); /* no support/testing for larger ones yet */ | ||
373 | |||
374 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
375 | } | ||
376 | |||
377 | inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); | ||
378 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | ||
379 | |||
380 | /* this is very early in init... can we defer this? */ | ||
381 | { | ||
382 | gk20a_writel(g, bus_bar1_block_r(), | ||
383 | bus_bar1_block_target_vid_mem_f() | | ||
384 | bus_bar1_block_mode_virtual_f() | | ||
385 | bus_bar1_block_ptr_f(inst_pa)); | ||
386 | } | ||
387 | |||
388 | gk20a_dbg_fn("done"); | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | int gk20a_init_mm_support(struct gk20a *g) | ||
393 | { | ||
394 | u32 err; | ||
395 | |||
396 | err = gk20a_init_mm_reset_enable_hw(g); | ||
397 | if (err) | ||
398 | return err; | ||
399 | |||
400 | err = gk20a_init_mm_setup_sw(g); | ||
401 | if (err) | ||
402 | return err; | ||
403 | |||
404 | err = gk20a_init_mm_setup_hw(g); | ||
405 | if (err) | ||
406 | return err; | ||
407 | |||
408 | return err; | ||
409 | } | ||
410 | |||
411 | #ifdef CONFIG_GK20A_PHYS_PAGE_TABLES | ||
412 | static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
413 | void **handle, | ||
414 | struct sg_table **sgt, | ||
415 | size_t *size) | ||
416 | { | ||
417 | u32 num_pages = 1 << order; | ||
418 | u32 len = num_pages * PAGE_SIZE; | ||
419 | int err; | ||
420 | struct page *pages; | ||
421 | |||
422 | gk20a_dbg_fn(""); | ||
423 | |||
424 | pages = alloc_pages(GFP_KERNEL, order); | ||
425 | if (!pages) { | ||
426 | gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n"); | ||
427 | goto err_out; | ||
428 | } | ||
429 | *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); | ||
430 | if (!sgt) { | ||
431 | gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); | ||
432 | goto err_alloced; | ||
433 | } | ||
434 | err = sg_alloc_table(*sgt, 1, GFP_KERNEL); | ||
435 | if (err) { | ||
436 | gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n"); | ||
437 | goto err_sg_table; | ||
438 | } | ||
439 | sg_set_page((*sgt)->sgl, pages, len, 0); | ||
440 | *handle = page_address(pages); | ||
441 | memset(*handle, 0, len); | ||
442 | *size = len; | ||
443 | FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len); | ||
444 | |||
445 | return 0; | ||
446 | |||
447 | err_sg_table: | ||
448 | kfree(*sgt); | ||
449 | err_alloced: | ||
450 | __free_pages(pages, order); | ||
451 | err_out: | ||
452 | return -ENOMEM; | ||
453 | } | ||
454 | |||
455 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | ||
456 | struct sg_table *sgt, u32 order, | ||
457 | size_t size) | ||
458 | { | ||
459 | gk20a_dbg_fn(""); | ||
460 | BUG_ON(sgt == NULL); | ||
461 | free_pages((unsigned long)handle, order); | ||
462 | sg_free_table(sgt); | ||
463 | kfree(sgt); | ||
464 | } | ||
465 | |||
466 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | ||
467 | void **va, size_t size) | ||
468 | { | ||
469 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | ||
470 | *va = handle; | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | ||
475 | { | ||
476 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | ||
477 | } | ||
478 | #else | ||
479 | static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
480 | void **handle, | ||
481 | struct sg_table **sgt, | ||
482 | size_t *size) | ||
483 | { | ||
484 | struct device *d = dev_from_vm(vm); | ||
485 | u32 num_pages = 1 << order; | ||
486 | u32 len = num_pages * PAGE_SIZE; | ||
487 | dma_addr_t iova; | ||
488 | DEFINE_DMA_ATTRS(attrs); | ||
489 | struct page **pages; | ||
490 | int err = 0; | ||
491 | |||
492 | gk20a_dbg_fn(""); | ||
493 | |||
494 | *size = len; | ||
495 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
496 | pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs); | ||
497 | if (!pages) { | ||
498 | gk20a_err(d, "memory allocation failed\n"); | ||
499 | goto err_out; | ||
500 | } | ||
501 | |||
502 | err = gk20a_get_sgtable_from_pages(d, sgt, pages, | ||
503 | iova, len); | ||
504 | if (err) { | ||
505 | gk20a_err(d, "sgt allocation failed\n"); | ||
506 | goto err_free; | ||
507 | } | ||
508 | |||
509 | *handle = (void *)pages; | ||
510 | |||
511 | return 0; | ||
512 | |||
513 | err_free: | ||
514 | dma_free_attrs(d, len, pages, iova, &attrs); | ||
515 | pages = NULL; | ||
516 | iova = 0; | ||
517 | err_out: | ||
518 | return -ENOMEM; | ||
519 | } | ||
520 | |||
521 | static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | ||
522 | struct sg_table *sgt, u32 order, | ||
523 | size_t size) | ||
524 | { | ||
525 | struct device *d = dev_from_vm(vm); | ||
526 | u64 iova; | ||
527 | DEFINE_DMA_ATTRS(attrs); | ||
528 | struct page **pages = (struct page **)handle; | ||
529 | |||
530 | gk20a_dbg_fn(""); | ||
531 | BUG_ON(sgt == NULL); | ||
532 | |||
533 | iova = sg_dma_address(sgt->sgl); | ||
534 | |||
535 | gk20a_free_sgtable(&sgt); | ||
536 | |||
537 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
538 | dma_free_attrs(d, size, pages, iova, &attrs); | ||
539 | pages = NULL; | ||
540 | iova = 0; | ||
541 | } | ||
542 | |||
543 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | ||
544 | void **kva, size_t size) | ||
545 | { | ||
546 | int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
547 | struct page **pages = (struct page **)handle; | ||
548 | gk20a_dbg_fn(""); | ||
549 | |||
550 | *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL)); | ||
551 | if (!(*kva)) | ||
552 | return -ENOMEM; | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | ||
558 | { | ||
559 | gk20a_dbg_fn(""); | ||
560 | vunmap(va); | ||
561 | } | ||
562 | #endif | ||
563 | |||
564 | /* allocate a phys contig region big enough for a full | ||
565 | * sized gmmu page table for the given gmmu_page_size. | ||
566 | * the whole range is zeroed so it's "invalid"/will fault | ||
567 | */ | ||
568 | |||
569 | static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | ||
570 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, | ||
571 | struct page_table_gk20a *pte) | ||
572 | { | ||
573 | int err; | ||
574 | u32 pte_order; | ||
575 | void *handle = NULL; | ||
576 | struct sg_table *sgt; | ||
577 | size_t size; | ||
578 | |||
579 | gk20a_dbg_fn(""); | ||
580 | |||
581 | /* allocate enough pages for the table */ | ||
582 | pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order; | ||
583 | |||
584 | err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); | ||
585 | if (err) | ||
586 | return err; | ||
587 | |||
588 | gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", | ||
589 | pte, gk20a_mm_iova_addr(sgt->sgl), pte_order); | ||
590 | |||
591 | pte->ref = handle; | ||
592 | pte->sgt = sgt; | ||
593 | pte->size = size; | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /* given address range (inclusive) determine the pdes crossed */ | ||
599 | static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, | ||
600 | u64 addr_lo, u64 addr_hi, | ||
601 | u32 *pde_lo, u32 *pde_hi) | ||
602 | { | ||
603 | *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift); | ||
604 | *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift); | ||
605 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", | ||
606 | addr_lo, addr_hi, vm->mm->pde_stride_shift); | ||
607 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", | ||
608 | *pde_lo, *pde_hi); | ||
609 | } | ||
610 | |||
611 | static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i) | ||
612 | { | ||
613 | return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); | ||
614 | } | ||
615 | |||
616 | static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
617 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) | ||
618 | { | ||
619 | u32 ret; | ||
620 | /* mask off pde part */ | ||
621 | addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1)); | ||
622 | /* shift over to get pte index. note assumption that pte index | ||
623 | * doesn't leak over into the high 32b */ | ||
624 | ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]); | ||
625 | |||
626 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); | ||
627 | return ret; | ||
628 | } | ||
629 | |||
630 | static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, | ||
631 | u32 *pte_offset) | ||
632 | { | ||
633 | /* ptes are 8B regardless of pagesize */ | ||
634 | /* pte space pages are 4KB. so 512 ptes per 4KB page*/ | ||
635 | *pte_page = i >> 9; | ||
636 | |||
637 | /* this offset is a pte offset, not a byte offset */ | ||
638 | *pte_offset = i & ((1<<9)-1); | ||
639 | |||
640 | gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", | ||
641 | i, *pte_page, *pte_offset); | ||
642 | } | ||
643 | |||
644 | |||
645 | /* | ||
646 | * given a pde index/page table number make sure it has | ||
647 | * backing store and if not go ahead allocate it and | ||
648 | * record it in the appropriate pde | ||
649 | */ | ||
650 | static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
651 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
652 | { | ||
653 | int err; | ||
654 | struct page_table_gk20a *pte = | ||
655 | vm->pdes.ptes[gmmu_pgsz_idx] + i; | ||
656 | |||
657 | gk20a_dbg_fn(""); | ||
658 | |||
659 | /* if it's already in place it's valid */ | ||
660 | if (pte->ref) | ||
661 | return 0; | ||
662 | |||
663 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", | ||
664 | gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); | ||
665 | |||
666 | err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); | ||
667 | if (err) | ||
668 | return err; | ||
669 | |||
670 | /* rewrite pde */ | ||
671 | update_gmmu_pde_locked(vm, i); | ||
672 | |||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | ||
677 | u64 addr) | ||
678 | { | ||
679 | struct vm_reserved_va_node *va_node; | ||
680 | list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list) | ||
681 | if (addr >= va_node->vaddr_start && | ||
682 | addr < (u64)va_node->vaddr_start + (u64)va_node->size) | ||
683 | return va_node; | ||
684 | |||
685 | return NULL; | ||
686 | } | ||
687 | |||
688 | int gk20a_vm_get_buffers(struct vm_gk20a *vm, | ||
689 | struct mapped_buffer_node ***mapped_buffers, | ||
690 | int *num_buffers) | ||
691 | { | ||
692 | struct mapped_buffer_node *mapped_buffer; | ||
693 | struct mapped_buffer_node **buffer_list; | ||
694 | struct rb_node *node; | ||
695 | int i = 0; | ||
696 | |||
697 | mutex_lock(&vm->update_gmmu_lock); | ||
698 | |||
699 | buffer_list = kzalloc(sizeof(*buffer_list) * | ||
700 | vm->num_user_mapped_buffers, GFP_KERNEL); | ||
701 | if (!buffer_list) { | ||
702 | mutex_unlock(&vm->update_gmmu_lock); | ||
703 | return -ENOMEM; | ||
704 | } | ||
705 | |||
706 | node = rb_first(&vm->mapped_buffers); | ||
707 | while (node) { | ||
708 | mapped_buffer = | ||
709 | container_of(node, struct mapped_buffer_node, node); | ||
710 | if (mapped_buffer->user_mapped) { | ||
711 | buffer_list[i] = mapped_buffer; | ||
712 | kref_get(&mapped_buffer->ref); | ||
713 | i++; | ||
714 | } | ||
715 | node = rb_next(&mapped_buffer->node); | ||
716 | } | ||
717 | |||
718 | BUG_ON(i != vm->num_user_mapped_buffers); | ||
719 | |||
720 | *num_buffers = vm->num_user_mapped_buffers; | ||
721 | *mapped_buffers = buffer_list; | ||
722 | |||
723 | mutex_unlock(&vm->update_gmmu_lock); | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | static void gk20a_vm_unmap_locked_kref(struct kref *ref) | ||
729 | { | ||
730 | struct mapped_buffer_node *mapped_buffer = | ||
731 | container_of(ref, struct mapped_buffer_node, ref); | ||
732 | gk20a_vm_unmap_locked(mapped_buffer); | ||
733 | } | ||
734 | |||
735 | void gk20a_vm_put_buffers(struct vm_gk20a *vm, | ||
736 | struct mapped_buffer_node **mapped_buffers, | ||
737 | int num_buffers) | ||
738 | { | ||
739 | int i; | ||
740 | |||
741 | mutex_lock(&vm->update_gmmu_lock); | ||
742 | |||
743 | for (i = 0; i < num_buffers; ++i) | ||
744 | kref_put(&mapped_buffers[i]->ref, | ||
745 | gk20a_vm_unmap_locked_kref); | ||
746 | |||
747 | mutex_unlock(&vm->update_gmmu_lock); | ||
748 | |||
749 | kfree(mapped_buffers); | ||
750 | } | ||
751 | |||
752 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | ||
753 | { | ||
754 | struct device *d = dev_from_vm(vm); | ||
755 | int retries; | ||
756 | struct mapped_buffer_node *mapped_buffer; | ||
757 | |||
758 | mutex_lock(&vm->update_gmmu_lock); | ||
759 | |||
760 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); | ||
761 | if (!mapped_buffer) { | ||
762 | mutex_unlock(&vm->update_gmmu_lock); | ||
763 | gk20a_err(d, "invalid addr to unmap 0x%llx", offset); | ||
764 | return; | ||
765 | } | ||
766 | |||
767 | if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
768 | mutex_unlock(&vm->update_gmmu_lock); | ||
769 | |||
770 | retries = 1000; | ||
771 | while (retries) { | ||
772 | if (atomic_read(&mapped_buffer->ref.refcount) == 1) | ||
773 | break; | ||
774 | retries--; | ||
775 | udelay(50); | ||
776 | } | ||
777 | if (!retries) | ||
778 | gk20a_err(d, "sync-unmap failed on 0x%llx", | ||
779 | offset); | ||
780 | mutex_lock(&vm->update_gmmu_lock); | ||
781 | } | ||
782 | |||
783 | mapped_buffer->user_mapped--; | ||
784 | if (mapped_buffer->user_mapped == 0) | ||
785 | vm->num_user_mapped_buffers--; | ||
786 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
787 | |||
788 | mutex_unlock(&vm->update_gmmu_lock); | ||
789 | } | ||
790 | |||
791 | static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | ||
792 | u64 size, | ||
793 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
794 | |||
795 | { | ||
796 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | ||
797 | int err; | ||
798 | u64 offset; | ||
799 | u32 start_page_nr = 0, num_pages; | ||
800 | u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx]; | ||
801 | |||
802 | if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) { | ||
803 | dev_warn(dev_from_vm(vm), | ||
804 | "invalid page size requested in gk20a vm alloc"); | ||
805 | return -EINVAL; | ||
806 | } | ||
807 | |||
808 | if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { | ||
809 | dev_warn(dev_from_vm(vm), | ||
810 | "unsupportd page size requested"); | ||
811 | return -EINVAL; | ||
812 | |||
813 | } | ||
814 | |||
815 | /* be certain we round up to gmmu_page_size if needed */ | ||
816 | /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ | ||
817 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | ||
818 | |||
819 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | ||
820 | gmmu_page_sizes[gmmu_pgsz_idx]>>10); | ||
821 | |||
822 | /* The vma allocator represents page accounting. */ | ||
823 | num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx]; | ||
824 | |||
825 | err = vma->alloc(vma, &start_page_nr, num_pages); | ||
826 | |||
827 | if (err) { | ||
828 | gk20a_err(dev_from_vm(vm), | ||
829 | "%s oom: sz=0x%llx", vma->name, size); | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx]; | ||
834 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | ||
835 | |||
836 | return offset; | ||
837 | } | ||
838 | |||
839 | static int gk20a_vm_free_va(struct vm_gk20a *vm, | ||
840 | u64 offset, u64 size, | ||
841 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
842 | { | ||
843 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | ||
844 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | ||
845 | u32 page_shift = gmmu_page_shifts[pgsz_idx]; | ||
846 | u32 start_page_nr, num_pages; | ||
847 | int err; | ||
848 | |||
849 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | ||
850 | vma->name, offset, size); | ||
851 | |||
852 | start_page_nr = (u32)(offset >> page_shift); | ||
853 | num_pages = (u32)((size + page_size - 1) >> page_shift); | ||
854 | |||
855 | err = vma->free(vma, start_page_nr, num_pages); | ||
856 | if (err) { | ||
857 | gk20a_err(dev_from_vm(vm), | ||
858 | "not found: offset=0x%llx, sz=0x%llx", | ||
859 | offset, size); | ||
860 | } | ||
861 | |||
862 | return err; | ||
863 | } | ||
864 | |||
865 | static int insert_mapped_buffer(struct rb_root *root, | ||
866 | struct mapped_buffer_node *mapped_buffer) | ||
867 | { | ||
868 | struct rb_node **new_node = &(root->rb_node), *parent = NULL; | ||
869 | |||
870 | /* Figure out where to put new node */ | ||
871 | while (*new_node) { | ||
872 | struct mapped_buffer_node *cmp_with = | ||
873 | container_of(*new_node, struct mapped_buffer_node, | ||
874 | node); | ||
875 | |||
876 | parent = *new_node; | ||
877 | |||
878 | if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */ | ||
879 | new_node = &((*new_node)->rb_left); | ||
880 | else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */ | ||
881 | new_node = &((*new_node)->rb_right); | ||
882 | else | ||
883 | return -EINVAL; /* no fair dup'ing */ | ||
884 | } | ||
885 | |||
886 | /* Add new node and rebalance tree. */ | ||
887 | rb_link_node(&mapped_buffer->node, parent, new_node); | ||
888 | rb_insert_color(&mapped_buffer->node, root); | ||
889 | |||
890 | return 0; | ||
891 | } | ||
892 | |||
893 | static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( | ||
894 | struct rb_root *root, struct dma_buf *dmabuf, | ||
895 | u32 kind) | ||
896 | { | ||
897 | struct rb_node *node = rb_first(root); | ||
898 | while (node) { | ||
899 | struct mapped_buffer_node *mapped_buffer = | ||
900 | container_of(node, struct mapped_buffer_node, node); | ||
901 | if (mapped_buffer->dmabuf == dmabuf && | ||
902 | kind == mapped_buffer->kind) | ||
903 | return mapped_buffer; | ||
904 | node = rb_next(&mapped_buffer->node); | ||
905 | } | ||
906 | return 0; | ||
907 | } | ||
908 | |||
909 | static struct mapped_buffer_node *find_mapped_buffer_locked( | ||
910 | struct rb_root *root, u64 addr) | ||
911 | { | ||
912 | |||
913 | struct rb_node *node = root->rb_node; | ||
914 | while (node) { | ||
915 | struct mapped_buffer_node *mapped_buffer = | ||
916 | container_of(node, struct mapped_buffer_node, node); | ||
917 | if (mapped_buffer->addr > addr) /* u64 cmp */ | ||
918 | node = node->rb_left; | ||
919 | else if (mapped_buffer->addr != addr) /* u64 cmp */ | ||
920 | node = node->rb_right; | ||
921 | else | ||
922 | return mapped_buffer; | ||
923 | } | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static struct mapped_buffer_node *find_mapped_buffer_range_locked( | ||
928 | struct rb_root *root, u64 addr) | ||
929 | { | ||
930 | struct rb_node *node = root->rb_node; | ||
931 | while (node) { | ||
932 | struct mapped_buffer_node *m = | ||
933 | container_of(node, struct mapped_buffer_node, node); | ||
934 | if (m->addr <= addr && m->addr + m->size > addr) | ||
935 | return m; | ||
936 | else if (m->addr > addr) /* u64 cmp */ | ||
937 | node = node->rb_left; | ||
938 | else | ||
939 | node = node->rb_right; | ||
940 | } | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0])) | ||
945 | |||
946 | struct buffer_attrs { | ||
947 | struct sg_table *sgt; | ||
948 | u64 size; | ||
949 | u64 align; | ||
950 | u32 ctag_offset; | ||
951 | u32 ctag_lines; | ||
952 | int pgsz_idx; | ||
953 | u8 kind_v; | ||
954 | u8 uc_kind_v; | ||
955 | }; | ||
956 | |||
957 | static void gmmu_select_page_size(struct buffer_attrs *bfr) | ||
958 | { | ||
959 | int i; | ||
960 | /* choose the biggest first (top->bottom) */ | ||
961 | for (i = (gmmu_nr_page_sizes-1); i >= 0; i--) | ||
962 | if (!(gmmu_page_offset_masks[i] & bfr->align)) { | ||
963 | /* would like to add this too but nvmap returns the | ||
964 | * original requested size not the allocated size. | ||
965 | * (!(gmmu_page_offset_masks[i] & bfr->size)) */ | ||
966 | bfr->pgsz_idx = i; | ||
967 | break; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | static int setup_buffer_kind_and_compression(struct device *d, | ||
972 | u32 flags, | ||
973 | struct buffer_attrs *bfr, | ||
974 | enum gmmu_pgsz_gk20a pgsz_idx) | ||
975 | { | ||
976 | bool kind_compressible; | ||
977 | |||
978 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) | ||
979 | bfr->kind_v = gmmu_pte_kind_pitch_v(); | ||
980 | |||
981 | if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) { | ||
982 | gk20a_err(d, "kind 0x%x not supported", bfr->kind_v); | ||
983 | return -EINVAL; | ||
984 | } | ||
985 | |||
986 | bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); | ||
987 | /* find a suitable uncompressed kind if it becomes necessary later */ | ||
988 | kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); | ||
989 | if (kind_compressible) { | ||
990 | bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); | ||
991 | if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) { | ||
992 | /* shouldn't happen, but it is worth cross-checking */ | ||
993 | gk20a_err(d, "comptag kind 0x%x can't be" | ||
994 | " downgraded to uncompressed kind", | ||
995 | bfr->kind_v); | ||
996 | return -EINVAL; | ||
997 | } | ||
998 | } | ||
999 | /* comptags only supported for suitable kinds, 128KB pagesize */ | ||
1000 | if (unlikely(kind_compressible && | ||
1001 | (gmmu_page_sizes[pgsz_idx] != 128*1024))) { | ||
1002 | /* | ||
1003 | gk20a_warn(d, "comptags specified" | ||
1004 | " but pagesize being used doesn't support it");*/ | ||
1005 | /* it is safe to fall back to uncompressed as | ||
1006 | functionality is not harmed */ | ||
1007 | bfr->kind_v = bfr->uc_kind_v; | ||
1008 | kind_compressible = false; | ||
1009 | } | ||
1010 | if (kind_compressible) | ||
1011 | bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> | ||
1012 | COMP_TAG_LINE_SIZE_SHIFT; | ||
1013 | else | ||
1014 | bfr->ctag_lines = 0; | ||
1015 | |||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static int validate_fixed_buffer(struct vm_gk20a *vm, | ||
1020 | struct buffer_attrs *bfr, | ||
1021 | u64 map_offset) | ||
1022 | { | ||
1023 | struct device *dev = dev_from_vm(vm); | ||
1024 | struct vm_reserved_va_node *va_node; | ||
1025 | struct mapped_buffer_node *buffer; | ||
1026 | |||
1027 | if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) { | ||
1028 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | ||
1029 | map_offset); | ||
1030 | return -EINVAL; | ||
1031 | } | ||
1032 | |||
1033 | /* find the space reservation */ | ||
1034 | va_node = addr_to_reservation(vm, map_offset); | ||
1035 | if (!va_node) { | ||
1036 | gk20a_warn(dev, "fixed offset mapping without space allocation"); | ||
1037 | return -EINVAL; | ||
1038 | } | ||
1039 | |||
1040 | /* check that this mappings does not collide with existing | ||
1041 | * mappings by checking the overlapping area between the current | ||
1042 | * buffer and all other mapped buffers */ | ||
1043 | |||
1044 | list_for_each_entry(buffer, | ||
1045 | &va_node->va_buffers_list, va_buffers_list) { | ||
1046 | s64 begin = max(buffer->addr, map_offset); | ||
1047 | s64 end = min(buffer->addr + | ||
1048 | buffer->size, map_offset + bfr->size); | ||
1049 | if (end - begin > 0) { | ||
1050 | gk20a_warn(dev, "overlapping buffer map requested"); | ||
1051 | return -EINVAL; | ||
1052 | } | ||
1053 | } | ||
1054 | |||
1055 | return 0; | ||
1056 | } | ||
1057 | |||
1058 | static u64 __locked_gmmu_map(struct vm_gk20a *vm, | ||
1059 | u64 map_offset, | ||
1060 | struct sg_table *sgt, | ||
1061 | u64 size, | ||
1062 | int pgsz_idx, | ||
1063 | u8 kind_v, | ||
1064 | u32 ctag_offset, | ||
1065 | u32 flags, | ||
1066 | int rw_flag) | ||
1067 | { | ||
1068 | int err = 0, i = 0; | ||
1069 | u32 pde_lo, pde_hi; | ||
1070 | struct device *d = dev_from_vm(vm); | ||
1071 | |||
1072 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
1073 | if (!map_offset) { | ||
1074 | map_offset = gk20a_vm_alloc_va(vm, size, | ||
1075 | pgsz_idx); | ||
1076 | if (!map_offset) { | ||
1077 | gk20a_err(d, "failed to allocate va space"); | ||
1078 | err = -ENOMEM; | ||
1079 | goto fail; | ||
1080 | } | ||
1081 | } | ||
1082 | |||
1083 | pde_range_from_vaddr_range(vm, | ||
1084 | map_offset, | ||
1085 | map_offset + size - 1, | ||
1086 | &pde_lo, &pde_hi); | ||
1087 | |||
1088 | /* mark the addr range valid (but with 0 phys addr, which will fault) */ | ||
1089 | for (i = pde_lo; i <= pde_hi; i++) { | ||
1090 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
1091 | pgsz_idx); | ||
1092 | if (err) { | ||
1093 | gk20a_err(d, "failed to validate page table %d: %d", | ||
1094 | i, err); | ||
1095 | goto fail; | ||
1096 | } | ||
1097 | } | ||
1098 | |||
1099 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
1100 | sgt, | ||
1101 | map_offset, map_offset + size - 1, | ||
1102 | kind_v, | ||
1103 | ctag_offset, | ||
1104 | flags & | ||
1105 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1106 | rw_flag); | ||
1107 | if (err) { | ||
1108 | gk20a_err(d, "failed to update ptes on map"); | ||
1109 | goto fail; | ||
1110 | } | ||
1111 | |||
1112 | return map_offset; | ||
1113 | fail: | ||
1114 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); | ||
1115 | return 0; | ||
1116 | } | ||
1117 | |||
1118 | static void __locked_gmmu_unmap(struct vm_gk20a *vm, | ||
1119 | u64 vaddr, | ||
1120 | u64 size, | ||
1121 | int pgsz_idx, | ||
1122 | bool va_allocated, | ||
1123 | int rw_flag) | ||
1124 | { | ||
1125 | int err = 0; | ||
1126 | struct gk20a *g = gk20a_from_vm(vm); | ||
1127 | |||
1128 | if (va_allocated) { | ||
1129 | err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); | ||
1130 | if (err) { | ||
1131 | dev_err(dev_from_vm(vm), | ||
1132 | "failed to free va"); | ||
1133 | return; | ||
1134 | } | ||
1135 | } | ||
1136 | |||
1137 | /* unmap here needs to know the page size we assigned at mapping */ | ||
1138 | err = update_gmmu_ptes_locked(vm, | ||
1139 | pgsz_idx, | ||
1140 | 0, /* n/a for unmap */ | ||
1141 | vaddr, | ||
1142 | vaddr + size - 1, | ||
1143 | 0, 0, false /* n/a for unmap */, | ||
1144 | rw_flag); | ||
1145 | if (err) | ||
1146 | dev_err(dev_from_vm(vm), | ||
1147 | "failed to update gmmu ptes on unmap"); | ||
1148 | |||
1149 | /* detect which if any pdes/ptes can now be released */ | ||
1150 | |||
1151 | /* flush l2 so any dirty lines are written out *now*. | ||
1152 | * also as we could potentially be switching this buffer | ||
1153 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
1154 | * some point in the future we need to invalidate l2. e.g. switching | ||
1155 | * from a render buffer unmap (here) to later using the same memory | ||
1156 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
1157 | * unmapping (below). */ | ||
1158 | |||
1159 | gk20a_mm_l2_flush(g, true); | ||
1160 | } | ||
1161 | |||
1162 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, | ||
1163 | struct dma_buf *dmabuf, | ||
1164 | u64 offset_align, | ||
1165 | u32 flags, | ||
1166 | int kind, | ||
1167 | struct sg_table **sgt, | ||
1168 | bool user_mapped, | ||
1169 | int rw_flag) | ||
1170 | { | ||
1171 | struct mapped_buffer_node *mapped_buffer = 0; | ||
1172 | |||
1173 | mapped_buffer = | ||
1174 | find_mapped_buffer_reverse_locked(&vm->mapped_buffers, | ||
1175 | dmabuf, kind); | ||
1176 | if (!mapped_buffer) | ||
1177 | return 0; | ||
1178 | |||
1179 | if (mapped_buffer->flags != flags) | ||
1180 | return 0; | ||
1181 | |||
1182 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET && | ||
1183 | mapped_buffer->addr != offset_align) | ||
1184 | return 0; | ||
1185 | |||
1186 | BUG_ON(mapped_buffer->vm != vm); | ||
1187 | |||
1188 | /* mark the buffer as used */ | ||
1189 | if (user_mapped) { | ||
1190 | if (mapped_buffer->user_mapped == 0) | ||
1191 | vm->num_user_mapped_buffers++; | ||
1192 | mapped_buffer->user_mapped++; | ||
1193 | |||
1194 | /* If the mapping comes from user space, we own | ||
1195 | * the handle ref. Since we reuse an | ||
1196 | * existing mapping here, we need to give back those | ||
1197 | * refs once in order not to leak. | ||
1198 | */ | ||
1199 | if (mapped_buffer->own_mem_ref) | ||
1200 | dma_buf_put(mapped_buffer->dmabuf); | ||
1201 | else | ||
1202 | mapped_buffer->own_mem_ref = true; | ||
1203 | } | ||
1204 | kref_get(&mapped_buffer->ref); | ||
1205 | |||
1206 | gk20a_dbg(gpu_dbg_map, | ||
1207 | "reusing as=%d pgsz=%d flags=0x%x ctags=%d " | ||
1208 | "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x " | ||
1209 | "own_mem_ref=%d user_mapped=%d", | ||
1210 | vm_aspace_id(vm), mapped_buffer->pgsz_idx, | ||
1211 | mapped_buffer->flags, | ||
1212 | mapped_buffer->ctag_lines, | ||
1213 | mapped_buffer->ctag_offset, | ||
1214 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | ||
1215 | hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1216 | lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), | ||
1217 | hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1218 | lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), | ||
1219 | mapped_buffer->own_mem_ref, user_mapped); | ||
1220 | |||
1221 | if (sgt) | ||
1222 | *sgt = mapped_buffer->sgt; | ||
1223 | return mapped_buffer->addr; | ||
1224 | } | ||
1225 | |||
1226 | u64 gk20a_vm_map(struct vm_gk20a *vm, | ||
1227 | struct dma_buf *dmabuf, | ||
1228 | u64 offset_align, | ||
1229 | u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/, | ||
1230 | int kind, | ||
1231 | struct sg_table **sgt, | ||
1232 | bool user_mapped, | ||
1233 | int rw_flag) | ||
1234 | { | ||
1235 | struct gk20a *g = gk20a_from_vm(vm); | ||
1236 | struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; | ||
1237 | struct device *d = dev_from_vm(vm); | ||
1238 | struct mapped_buffer_node *mapped_buffer = 0; | ||
1239 | bool inserted = false, va_allocated = false; | ||
1240 | u32 gmmu_page_size = 0; | ||
1241 | u64 map_offset = 0; | ||
1242 | int err = 0; | ||
1243 | struct buffer_attrs bfr = {0}; | ||
1244 | struct gk20a_comptags comptags; | ||
1245 | |||
1246 | mutex_lock(&vm->update_gmmu_lock); | ||
1247 | |||
1248 | /* check if this buffer is already mapped */ | ||
1249 | map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align, | ||
1250 | flags, kind, sgt, | ||
1251 | user_mapped, rw_flag); | ||
1252 | if (map_offset) { | ||
1253 | mutex_unlock(&vm->update_gmmu_lock); | ||
1254 | return map_offset; | ||
1255 | } | ||
1256 | |||
1257 | /* pin buffer to get phys/iovmm addr */ | ||
1258 | bfr.sgt = gk20a_mm_pin(d, dmabuf); | ||
1259 | if (IS_ERR(bfr.sgt)) { | ||
1260 | /* Falling back to physical is actually possible | ||
1261 | * here in many cases if we use 4K phys pages in the | ||
1262 | * gmmu. However we have some regions which require | ||
1263 | * contig regions to work properly (either phys-contig | ||
1264 | * or contig through smmu io_vaspace). Until we can | ||
1265 | * track the difference between those two cases we have | ||
1266 | * to fail the mapping when we run out of SMMU space. | ||
1267 | */ | ||
1268 | gk20a_warn(d, "oom allocating tracking buffer"); | ||
1269 | goto clean_up; | ||
1270 | } | ||
1271 | |||
1272 | if (sgt) | ||
1273 | *sgt = bfr.sgt; | ||
1274 | |||
1275 | bfr.kind_v = kind; | ||
1276 | bfr.size = dmabuf->size; | ||
1277 | bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl)); | ||
1278 | bfr.pgsz_idx = -1; | ||
1279 | |||
1280 | /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select | ||
1281 | * page size according to memory alignment */ | ||
1282 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1283 | bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ? | ||
1284 | gmmu_page_size_big : gmmu_page_size_small; | ||
1285 | } else { | ||
1286 | gmmu_select_page_size(&bfr); | ||
1287 | } | ||
1288 | |||
1289 | /* validate/adjust bfr attributes */ | ||
1290 | if (unlikely(bfr.pgsz_idx == -1)) { | ||
1291 | gk20a_err(d, "unsupported page size detected"); | ||
1292 | goto clean_up; | ||
1293 | } | ||
1294 | |||
1295 | if (unlikely(bfr.pgsz_idx < gmmu_page_size_small || | ||
1296 | bfr.pgsz_idx > gmmu_page_size_big)) { | ||
1297 | BUG_ON(1); | ||
1298 | err = -EINVAL; | ||
1299 | goto clean_up; | ||
1300 | } | ||
1301 | gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; | ||
1302 | |||
1303 | /* Check if we should use a fixed offset for mapping this buffer */ | ||
1304 | if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | ||
1305 | err = validate_fixed_buffer(vm, &bfr, offset_align); | ||
1306 | if (err) | ||
1307 | goto clean_up; | ||
1308 | |||
1309 | map_offset = offset_align; | ||
1310 | va_allocated = false; | ||
1311 | } else | ||
1312 | va_allocated = true; | ||
1313 | |||
1314 | if (sgt) | ||
1315 | *sgt = bfr.sgt; | ||
1316 | |||
1317 | err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx); | ||
1318 | if (unlikely(err)) { | ||
1319 | gk20a_err(d, "failure setting up kind and compression"); | ||
1320 | goto clean_up; | ||
1321 | } | ||
1322 | |||
1323 | /* bar1 and pmu vm don't need ctag */ | ||
1324 | if (!vm->enable_ctag) | ||
1325 | bfr.ctag_lines = 0; | ||
1326 | |||
1327 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
1328 | |||
1329 | if (bfr.ctag_lines && !comptags.lines) { | ||
1330 | /* allocate compression resources if needed */ | ||
1331 | err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, | ||
1332 | bfr.ctag_lines); | ||
1333 | if (err) { | ||
1334 | /* ok to fall back here if we ran out */ | ||
1335 | /* TBD: we can partially alloc ctags as well... */ | ||
1336 | bfr.ctag_lines = bfr.ctag_offset = 0; | ||
1337 | bfr.kind_v = bfr.uc_kind_v; | ||
1338 | } else { | ||
1339 | gk20a_get_comptags(d, dmabuf, &comptags); | ||
1340 | |||
1341 | /* init/clear the ctag buffer */ | ||
1342 | g->ops.ltc.clear_comptags(g, | ||
1343 | comptags.offset, | ||
1344 | comptags.offset + comptags.lines - 1); | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | /* store the comptag info */ | ||
1349 | bfr.ctag_offset = comptags.offset; | ||
1350 | |||
1351 | /* update gmmu ptes */ | ||
1352 | map_offset = __locked_gmmu_map(vm, map_offset, | ||
1353 | bfr.sgt, | ||
1354 | bfr.size, | ||
1355 | bfr.pgsz_idx, | ||
1356 | bfr.kind_v, | ||
1357 | bfr.ctag_offset, | ||
1358 | flags, rw_flag); | ||
1359 | if (!map_offset) | ||
1360 | goto clean_up; | ||
1361 | |||
1362 | gk20a_dbg(gpu_dbg_map, | ||
1363 | "as=%d pgsz=%d " | ||
1364 | "kind=0x%x kind_uc=0x%x flags=0x%x " | ||
1365 | "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", | ||
1366 | vm_aspace_id(vm), gmmu_page_size, | ||
1367 | bfr.kind_v, bfr.uc_kind_v, flags, | ||
1368 | bfr.ctag_lines, bfr.ctag_offset, | ||
1369 | hi32(map_offset), lo32(map_offset), | ||
1370 | hi32((u64)sg_dma_address(bfr.sgt->sgl)), | ||
1371 | lo32((u64)sg_dma_address(bfr.sgt->sgl)), | ||
1372 | hi32((u64)sg_phys(bfr.sgt->sgl)), | ||
1373 | lo32((u64)sg_phys(bfr.sgt->sgl))); | ||
1374 | |||
1375 | #if defined(NVHOST_DEBUG) | ||
1376 | { | ||
1377 | int i; | ||
1378 | struct scatterlist *sg = NULL; | ||
1379 | gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)"); | ||
1380 | for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) { | ||
1381 | u64 da = sg_dma_address(sg); | ||
1382 | u64 pa = sg_phys(sg); | ||
1383 | u64 len = sg->length; | ||
1384 | gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x", | ||
1385 | i, hi32(pa), lo32(pa), hi32(da), lo32(da), | ||
1386 | hi32(len), lo32(len)); | ||
1387 | } | ||
1388 | } | ||
1389 | #endif | ||
1390 | |||
1391 | /* keep track of the buffer for unmapping */ | ||
1392 | /* TBD: check for multiple mapping of same buffer */ | ||
1393 | mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL); | ||
1394 | if (!mapped_buffer) { | ||
1395 | gk20a_warn(d, "oom allocating tracking buffer"); | ||
1396 | goto clean_up; | ||
1397 | } | ||
1398 | mapped_buffer->dmabuf = dmabuf; | ||
1399 | mapped_buffer->sgt = bfr.sgt; | ||
1400 | mapped_buffer->addr = map_offset; | ||
1401 | mapped_buffer->size = bfr.size; | ||
1402 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; | ||
1403 | mapped_buffer->ctag_offset = bfr.ctag_offset; | ||
1404 | mapped_buffer->ctag_lines = bfr.ctag_lines; | ||
1405 | mapped_buffer->vm = vm; | ||
1406 | mapped_buffer->flags = flags; | ||
1407 | mapped_buffer->kind = kind; | ||
1408 | mapped_buffer->va_allocated = va_allocated; | ||
1409 | mapped_buffer->user_mapped = user_mapped ? 1 : 0; | ||
1410 | mapped_buffer->own_mem_ref = user_mapped; | ||
1411 | INIT_LIST_HEAD(&mapped_buffer->unmap_list); | ||
1412 | INIT_LIST_HEAD(&mapped_buffer->va_buffers_list); | ||
1413 | kref_init(&mapped_buffer->ref); | ||
1414 | |||
1415 | err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer); | ||
1416 | if (err) { | ||
1417 | gk20a_err(d, "failed to insert into mapped buffer tree"); | ||
1418 | goto clean_up; | ||
1419 | } | ||
1420 | inserted = true; | ||
1421 | if (user_mapped) | ||
1422 | vm->num_user_mapped_buffers++; | ||
1423 | |||
1424 | gk20a_dbg_info("allocated va @ 0x%llx", map_offset); | ||
1425 | |||
1426 | if (!va_allocated) { | ||
1427 | struct vm_reserved_va_node *va_node; | ||
1428 | |||
1429 | /* find the space reservation */ | ||
1430 | va_node = addr_to_reservation(vm, map_offset); | ||
1431 | list_add_tail(&mapped_buffer->va_buffers_list, | ||
1432 | &va_node->va_buffers_list); | ||
1433 | mapped_buffer->va_node = va_node; | ||
1434 | } | ||
1435 | |||
1436 | mutex_unlock(&vm->update_gmmu_lock); | ||
1437 | |||
1438 | /* Invalidate kernel mappings immediately */ | ||
1439 | if (vm_aspace_id(vm) == -1) | ||
1440 | gk20a_mm_tlb_invalidate(vm); | ||
1441 | |||
1442 | return map_offset; | ||
1443 | |||
1444 | clean_up: | ||
1445 | if (inserted) { | ||
1446 | rb_erase(&mapped_buffer->node, &vm->mapped_buffers); | ||
1447 | if (user_mapped) | ||
1448 | vm->num_user_mapped_buffers--; | ||
1449 | } | ||
1450 | kfree(mapped_buffer); | ||
1451 | if (va_allocated) | ||
1452 | gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); | ||
1453 | if (!IS_ERR(bfr.sgt)) | ||
1454 | gk20a_mm_unpin(d, dmabuf, bfr.sgt); | ||
1455 | |||
1456 | mutex_unlock(&vm->update_gmmu_lock); | ||
1457 | gk20a_dbg_info("err=%d\n", err); | ||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, | ||
1462 | struct sg_table **sgt, | ||
1463 | u64 size, | ||
1464 | u32 flags, | ||
1465 | int rw_flag) | ||
1466 | { | ||
1467 | u64 vaddr; | ||
1468 | |||
1469 | mutex_lock(&vm->update_gmmu_lock); | ||
1470 | vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ | ||
1471 | *sgt, /* sg table */ | ||
1472 | size, | ||
1473 | 0, /* page size index = 0 i.e. SZ_4K */ | ||
1474 | 0, /* kind */ | ||
1475 | 0, /* ctag_offset */ | ||
1476 | flags, rw_flag); | ||
1477 | mutex_unlock(&vm->update_gmmu_lock); | ||
1478 | if (!vaddr) { | ||
1479 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | ||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1483 | /* Invalidate kernel mappings immediately */ | ||
1484 | gk20a_mm_tlb_invalidate(vm); | ||
1485 | |||
1486 | return vaddr; | ||
1487 | } | ||
1488 | |||
1489 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | ||
1490 | u64 vaddr, | ||
1491 | u64 size, | ||
1492 | int rw_flag) | ||
1493 | { | ||
1494 | mutex_lock(&vm->update_gmmu_lock); | ||
1495 | __locked_gmmu_unmap(vm, | ||
1496 | vaddr, | ||
1497 | size, | ||
1498 | 0, /* page size 4K */ | ||
1499 | true, /*va_allocated */ | ||
1500 | rw_flag); | ||
1501 | mutex_unlock(&vm->update_gmmu_lock); | ||
1502 | } | ||
1503 | |||
1504 | phys_addr_t gk20a_get_phys_from_iova(struct device *d, | ||
1505 | u64 dma_addr) | ||
1506 | { | ||
1507 | phys_addr_t phys; | ||
1508 | u64 iova; | ||
1509 | |||
1510 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | ||
1511 | if (!mapping) | ||
1512 | return dma_addr; | ||
1513 | |||
1514 | iova = dma_addr & PAGE_MASK; | ||
1515 | phys = iommu_iova_to_phys(mapping->domain, iova); | ||
1516 | return phys; | ||
1517 | } | ||
1518 | |||
1519 | /* get sg_table from already allocated buffer */ | ||
1520 | int gk20a_get_sgtable(struct device *d, struct sg_table **sgt, | ||
1521 | void *cpuva, u64 iova, | ||
1522 | size_t size) | ||
1523 | { | ||
1524 | int err = 0; | ||
1525 | *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | ||
1526 | if (!(*sgt)) { | ||
1527 | dev_err(d, "failed to allocate memory\n"); | ||
1528 | err = -ENOMEM; | ||
1529 | goto fail; | ||
1530 | } | ||
1531 | err = dma_get_sgtable(d, *sgt, | ||
1532 | cpuva, iova, | ||
1533 | size); | ||
1534 | if (err) { | ||
1535 | dev_err(d, "failed to create sg table\n"); | ||
1536 | goto fail; | ||
1537 | } | ||
1538 | sg_dma_address((*sgt)->sgl) = iova; | ||
1539 | |||
1540 | return 0; | ||
1541 | fail: | ||
1542 | if (*sgt) { | ||
1543 | kfree(*sgt); | ||
1544 | *sgt = NULL; | ||
1545 | } | ||
1546 | return err; | ||
1547 | } | ||
1548 | |||
1549 | int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, | ||
1550 | struct page **pages, u64 iova, | ||
1551 | size_t size) | ||
1552 | { | ||
1553 | int err = 0; | ||
1554 | *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | ||
1555 | if (!(*sgt)) { | ||
1556 | dev_err(d, "failed to allocate memory\n"); | ||
1557 | err = -ENOMEM; | ||
1558 | goto fail; | ||
1559 | } | ||
1560 | err = sg_alloc_table(*sgt, 1, GFP_KERNEL); | ||
1561 | if (err) { | ||
1562 | dev_err(d, "failed to allocate sg_table\n"); | ||
1563 | goto fail; | ||
1564 | } | ||
1565 | sg_set_page((*sgt)->sgl, *pages, size, 0); | ||
1566 | sg_dma_address((*sgt)->sgl) = iova; | ||
1567 | |||
1568 | return 0; | ||
1569 | fail: | ||
1570 | if (*sgt) { | ||
1571 | kfree(*sgt); | ||
1572 | *sgt = NULL; | ||
1573 | } | ||
1574 | return err; | ||
1575 | } | ||
1576 | |||
1577 | void gk20a_free_sgtable(struct sg_table **sgt) | ||
1578 | { | ||
1579 | sg_free_table(*sgt); | ||
1580 | kfree(*sgt); | ||
1581 | *sgt = NULL; | ||
1582 | } | ||
1583 | |||
1584 | u64 gk20a_mm_iova_addr(struct scatterlist *sgl) | ||
1585 | { | ||
1586 | u64 result = sg_phys(sgl); | ||
1587 | #ifdef CONFIG_TEGRA_IOMMU_SMMU | ||
1588 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
1589 | result = 0; | ||
1590 | else if (sg_dma_address(sgl)) { | ||
1591 | result = sg_dma_address(sgl) | | ||
1592 | 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT; | ||
1593 | } | ||
1594 | #endif | ||
1595 | return result; | ||
1596 | } | ||
1597 | |||
1598 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1599 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1600 | struct sg_table *sgt, | ||
1601 | u64 first_vaddr, u64 last_vaddr, | ||
1602 | u8 kind_v, u32 ctag_offset, | ||
1603 | bool cacheable, | ||
1604 | int rw_flag) | ||
1605 | { | ||
1606 | int err; | ||
1607 | u32 pde_lo, pde_hi, pde_i; | ||
1608 | struct scatterlist *cur_chunk; | ||
1609 | unsigned int cur_offset; | ||
1610 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1611 | u32 ctag = ctag_offset; | ||
1612 | u32 ctag_incr; | ||
1613 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | ||
1614 | u64 addr = 0; | ||
1615 | |||
1616 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
1617 | &pde_lo, &pde_hi); | ||
1618 | |||
1619 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
1620 | pgsz_idx, pde_lo, pde_hi); | ||
1621 | |||
1622 | /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch | ||
1623 | * below (per-pte). Note: this doesn't work unless page size (when | ||
1624 | * comptags are active) is 128KB. We have checks elsewhere for that. */ | ||
1625 | ctag_incr = !!ctag_offset; | ||
1626 | |||
1627 | if (sgt) | ||
1628 | cur_chunk = sgt->sgl; | ||
1629 | else | ||
1630 | cur_chunk = NULL; | ||
1631 | |||
1632 | cur_offset = 0; | ||
1633 | |||
1634 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
1635 | u32 pte_lo, pte_hi; | ||
1636 | u32 pte_cur; | ||
1637 | void *pte_kv_cur; | ||
1638 | |||
1639 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | ||
1640 | |||
1641 | if (pde_i == pde_lo) | ||
1642 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, | ||
1643 | pgsz_idx); | ||
1644 | else | ||
1645 | pte_lo = 0; | ||
1646 | |||
1647 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) | ||
1648 | pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1; | ||
1649 | else | ||
1650 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, | ||
1651 | pgsz_idx); | ||
1652 | |||
1653 | /* get cpu access to the ptes */ | ||
1654 | err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, | ||
1655 | pte->size); | ||
1656 | if (err) { | ||
1657 | gk20a_err(dev_from_vm(vm), | ||
1658 | "couldn't map ptes for update as=%d pte_ref_cnt=%d", | ||
1659 | vm_aspace_id(vm), pte->ref_cnt); | ||
1660 | goto clean_up; | ||
1661 | } | ||
1662 | |||
1663 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
1664 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
1665 | |||
1666 | if (likely(sgt)) { | ||
1667 | u64 new_addr = gk20a_mm_iova_addr(cur_chunk); | ||
1668 | if (new_addr) { | ||
1669 | addr = new_addr; | ||
1670 | addr += cur_offset; | ||
1671 | } | ||
1672 | |||
1673 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1674 | gmmu_pte_address_sys_f(addr | ||
1675 | >> gmmu_pte_address_shift_v()); | ||
1676 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1677 | gmmu_pte_kind_f(kind_v) | | ||
1678 | gmmu_pte_comptagline_f(ctag); | ||
1679 | |||
1680 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1681 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1682 | pte_w[1] |= | ||
1683 | gmmu_pte_write_disable_true_f(); | ||
1684 | } else if (rw_flag == | ||
1685 | gk20a_mem_flag_write_only) { | ||
1686 | pte_w[1] |= | ||
1687 | gmmu_pte_read_disable_true_f(); | ||
1688 | } | ||
1689 | |||
1690 | if (!cacheable) | ||
1691 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1692 | |||
1693 | pte->ref_cnt++; | ||
1694 | |||
1695 | gk20a_dbg(gpu_dbg_pte, | ||
1696 | "pte_cur=%d addr=0x%x,%08x kind=%d" | ||
1697 | " ctag=%d vol=%d refs=%d" | ||
1698 | " [0x%08x,0x%08x]", | ||
1699 | pte_cur, hi32(addr), lo32(addr), | ||
1700 | kind_v, ctag, !cacheable, | ||
1701 | pte->ref_cnt, pte_w[1], pte_w[0]); | ||
1702 | |||
1703 | ctag += ctag_incr; | ||
1704 | cur_offset += page_size; | ||
1705 | addr += page_size; | ||
1706 | while (cur_chunk && | ||
1707 | cur_offset >= cur_chunk->length) { | ||
1708 | cur_offset -= cur_chunk->length; | ||
1709 | cur_chunk = sg_next(cur_chunk); | ||
1710 | } | ||
1711 | |||
1712 | } else { | ||
1713 | pte->ref_cnt--; | ||
1714 | gk20a_dbg(gpu_dbg_pte, | ||
1715 | "pte_cur=%d ref=%d [0x0,0x0]", | ||
1716 | pte_cur, pte->ref_cnt); | ||
1717 | } | ||
1718 | |||
1719 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); | ||
1720 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); | ||
1721 | } | ||
1722 | |||
1723 | unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); | ||
1724 | |||
1725 | if (pte->ref_cnt == 0) { | ||
1726 | /* It can make sense to keep around one page table for | ||
1727 | * each flavor (empty)... in case a new map is coming | ||
1728 | * right back to alloc (and fill it in) again. | ||
1729 | * But: deferring unmapping should help with pathologic | ||
1730 | * unmap/map/unmap/map cases where we'd trigger pte | ||
1731 | * free/alloc/free/alloc. | ||
1732 | */ | ||
1733 | free_gmmu_pages(vm, pte->ref, pte->sgt, | ||
1734 | vm->mm->page_table_sizing[pgsz_idx].order, | ||
1735 | pte->size); | ||
1736 | pte->ref = NULL; | ||
1737 | |||
1738 | /* rewrite pde */ | ||
1739 | update_gmmu_pde_locked(vm, pde_i); | ||
1740 | } | ||
1741 | |||
1742 | } | ||
1743 | |||
1744 | smp_mb(); | ||
1745 | vm->tlb_dirty = true; | ||
1746 | gk20a_dbg_fn("set tlb dirty"); | ||
1747 | |||
1748 | return 0; | ||
1749 | |||
1750 | clean_up: | ||
1751 | /*TBD: potentially rewrite above to pre-map everything it needs to | ||
1752 | * as that's the only way it can fail */ | ||
1753 | return err; | ||
1754 | |||
1755 | } | ||
1756 | |||
1757 | |||
1758 | /* for gk20a the "video memory" apertures here are misnomers. */ | ||
1759 | static inline u32 big_valid_pde0_bits(u64 pte_addr) | ||
1760 | { | ||
1761 | u32 pde0_bits = | ||
1762 | gmmu_pde_aperture_big_video_memory_f() | | ||
1763 | gmmu_pde_address_big_sys_f( | ||
1764 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | ||
1765 | return pde0_bits; | ||
1766 | } | ||
1767 | static inline u32 small_valid_pde1_bits(u64 pte_addr) | ||
1768 | { | ||
1769 | u32 pde1_bits = | ||
1770 | gmmu_pde_aperture_small_video_memory_f() | | ||
1771 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | ||
1772 | gmmu_pde_address_small_sys_f( | ||
1773 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | ||
1774 | return pde1_bits; | ||
1775 | } | ||
1776 | |||
1777 | /* Given the current state of the ptes associated with a pde, | ||
1778 | determine value and write it out. There's no checking | ||
1779 | here to determine whether or not a change was actually | ||
1780 | made. So, superfluous updates will cause unnecessary | ||
1781 | pde invalidations. | ||
1782 | */ | ||
1783 | static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | ||
1784 | { | ||
1785 | bool small_valid, big_valid; | ||
1786 | u64 pte_addr[2] = {0, 0}; | ||
1787 | struct page_table_gk20a *small_pte = | ||
1788 | vm->pdes.ptes[gmmu_page_size_small] + i; | ||
1789 | struct page_table_gk20a *big_pte = | ||
1790 | vm->pdes.ptes[gmmu_page_size_big] + i; | ||
1791 | u32 pde_v[2] = {0, 0}; | ||
1792 | u32 *pde; | ||
1793 | |||
1794 | small_valid = small_pte && small_pte->ref; | ||
1795 | big_valid = big_pte && big_pte->ref; | ||
1796 | |||
1797 | if (small_valid) | ||
1798 | pte_addr[gmmu_page_size_small] = | ||
1799 | gk20a_mm_iova_addr(small_pte->sgt->sgl); | ||
1800 | if (big_valid) | ||
1801 | pte_addr[gmmu_page_size_big] = | ||
1802 | gk20a_mm_iova_addr(big_pte->sgt->sgl); | ||
1803 | |||
1804 | pde_v[0] = gmmu_pde_size_full_f(); | ||
1805 | pde_v[0] |= big_valid ? | ||
1806 | big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) | ||
1807 | : | ||
1808 | (gmmu_pde_aperture_big_invalid_f()); | ||
1809 | |||
1810 | pde_v[1] |= (small_valid ? | ||
1811 | small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) | ||
1812 | : | ||
1813 | (gmmu_pde_aperture_small_invalid_f() | | ||
1814 | gmmu_pde_vol_small_false_f()) | ||
1815 | ) | ||
1816 | | | ||
1817 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | ||
1818 | gmmu_pde_vol_big_false_f()); | ||
1819 | |||
1820 | pde = pde_from_index(vm, i); | ||
1821 | |||
1822 | gk20a_mem_wr32(pde, 0, pde_v[0]); | ||
1823 | gk20a_mem_wr32(pde, 1, pde_v[1]); | ||
1824 | |||
1825 | smp_mb(); | ||
1826 | |||
1827 | FLUSH_CPU_DCACHE(pde, | ||
1828 | sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), | ||
1829 | sizeof(u32)*2); | ||
1830 | |||
1831 | gk20a_mm_l2_invalidate(vm->mm->g); | ||
1832 | |||
1833 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | ||
1834 | |||
1835 | vm->tlb_dirty = true; | ||
1836 | } | ||
1837 | |||
1838 | |||
1839 | static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | ||
1840 | u32 num_pages, u32 pgsz_idx) | ||
1841 | { | ||
1842 | struct mm_gk20a *mm = vm->mm; | ||
1843 | struct gk20a *g = mm->g; | ||
1844 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | ||
1845 | u32 i; | ||
1846 | dma_addr_t iova; | ||
1847 | |||
1848 | /* allocate the zero page if the va does not already have one */ | ||
1849 | if (!vm->zero_page_cpuva) { | ||
1850 | int err = 0; | ||
1851 | vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, | ||
1852 | mm->big_page_size, | ||
1853 | &iova, | ||
1854 | GFP_KERNEL); | ||
1855 | if (!vm->zero_page_cpuva) { | ||
1856 | dev_err(&g->dev->dev, "failed to allocate zero page\n"); | ||
1857 | return -ENOMEM; | ||
1858 | } | ||
1859 | |||
1860 | vm->zero_page_iova = iova; | ||
1861 | err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, | ||
1862 | vm->zero_page_cpuva, vm->zero_page_iova, | ||
1863 | mm->big_page_size); | ||
1864 | if (err) { | ||
1865 | dma_free_coherent(&g->dev->dev, mm->big_page_size, | ||
1866 | vm->zero_page_cpuva, | ||
1867 | vm->zero_page_iova); | ||
1868 | vm->zero_page_iova = 0; | ||
1869 | vm->zero_page_cpuva = NULL; | ||
1870 | |||
1871 | dev_err(&g->dev->dev, "failed to create sg table for zero page\n"); | ||
1872 | return -ENOMEM; | ||
1873 | } | ||
1874 | } | ||
1875 | |||
1876 | for (i = 0; i < num_pages; i++) { | ||
1877 | u64 page_vaddr = __locked_gmmu_map(vm, vaddr, | ||
1878 | vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0, | ||
1879 | NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, | ||
1880 | gk20a_mem_flag_none); | ||
1881 | |||
1882 | if (!page_vaddr) { | ||
1883 | gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); | ||
1884 | goto err_unmap; | ||
1885 | } | ||
1886 | vaddr += pgsz; | ||
1887 | } | ||
1888 | |||
1889 | gk20a_mm_l2_flush(mm->g, true); | ||
1890 | |||
1891 | return 0; | ||
1892 | |||
1893 | err_unmap: | ||
1894 | |||
1895 | WARN_ON(1); | ||
1896 | /* something went wrong. unmap pages */ | ||
1897 | while (i--) { | ||
1898 | vaddr -= pgsz; | ||
1899 | __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, | ||
1900 | gk20a_mem_flag_none); | ||
1901 | } | ||
1902 | |||
1903 | return -EINVAL; | ||
1904 | } | ||
1905 | |||
1906 | /* NOTE! mapped_buffers lock must be held */ | ||
1907 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | ||
1908 | { | ||
1909 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
1910 | |||
1911 | if (mapped_buffer->va_node && | ||
1912 | mapped_buffer->va_node->sparse) { | ||
1913 | u64 vaddr = mapped_buffer->addr; | ||
1914 | u32 pgsz_idx = mapped_buffer->pgsz_idx; | ||
1915 | u32 num_pages = mapped_buffer->size >> | ||
1916 | gmmu_page_shifts[pgsz_idx]; | ||
1917 | |||
1918 | /* there is little we can do if this fails... */ | ||
1919 | gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); | ||
1920 | |||
1921 | } else | ||
1922 | __locked_gmmu_unmap(vm, | ||
1923 | mapped_buffer->addr, | ||
1924 | mapped_buffer->size, | ||
1925 | mapped_buffer->pgsz_idx, | ||
1926 | mapped_buffer->va_allocated, | ||
1927 | gk20a_mem_flag_none); | ||
1928 | |||
1929 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | ||
1930 | vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx], | ||
1931 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | ||
1932 | mapped_buffer->own_mem_ref); | ||
1933 | |||
1934 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, | ||
1935 | mapped_buffer->sgt); | ||
1936 | |||
1937 | /* remove from mapped buffer tree and remove list, free */ | ||
1938 | rb_erase(&mapped_buffer->node, &vm->mapped_buffers); | ||
1939 | if (!list_empty(&mapped_buffer->va_buffers_list)) | ||
1940 | list_del(&mapped_buffer->va_buffers_list); | ||
1941 | |||
1942 | /* keep track of mapped buffers */ | ||
1943 | if (mapped_buffer->user_mapped) | ||
1944 | vm->num_user_mapped_buffers--; | ||
1945 | |||
1946 | if (mapped_buffer->own_mem_ref) | ||
1947 | dma_buf_put(mapped_buffer->dmabuf); | ||
1948 | |||
1949 | kfree(mapped_buffer); | ||
1950 | |||
1951 | return; | ||
1952 | } | ||
1953 | |||
1954 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) | ||
1955 | { | ||
1956 | struct device *d = dev_from_vm(vm); | ||
1957 | struct mapped_buffer_node *mapped_buffer; | ||
1958 | |||
1959 | mutex_lock(&vm->update_gmmu_lock); | ||
1960 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); | ||
1961 | if (!mapped_buffer) { | ||
1962 | mutex_unlock(&vm->update_gmmu_lock); | ||
1963 | gk20a_err(d, "invalid addr to unmap 0x%llx", offset); | ||
1964 | return; | ||
1965 | } | ||
1966 | kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); | ||
1967 | mutex_unlock(&vm->update_gmmu_lock); | ||
1968 | } | ||
1969 | |||
1970 | static void gk20a_vm_remove_support(struct vm_gk20a *vm) | ||
1971 | { | ||
1972 | struct gk20a *g = vm->mm->g; | ||
1973 | struct mapped_buffer_node *mapped_buffer; | ||
1974 | struct vm_reserved_va_node *va_node, *va_node_tmp; | ||
1975 | struct rb_node *node; | ||
1976 | |||
1977 | gk20a_dbg_fn(""); | ||
1978 | mutex_lock(&vm->update_gmmu_lock); | ||
1979 | |||
1980 | /* TBD: add a flag here for the unmap code to recognize teardown | ||
1981 | * and short-circuit any otherwise expensive operations. */ | ||
1982 | |||
1983 | node = rb_first(&vm->mapped_buffers); | ||
1984 | while (node) { | ||
1985 | mapped_buffer = | ||
1986 | container_of(node, struct mapped_buffer_node, node); | ||
1987 | gk20a_vm_unmap_locked(mapped_buffer); | ||
1988 | node = rb_first(&vm->mapped_buffers); | ||
1989 | } | ||
1990 | |||
1991 | /* destroy remaining reserved memory areas */ | ||
1992 | list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, | ||
1993 | reserved_va_list) { | ||
1994 | list_del(&va_node->reserved_va_list); | ||
1995 | kfree(va_node); | ||
1996 | } | ||
1997 | |||
1998 | /* TBD: unmapping all buffers above may not actually free | ||
1999 | * all vm ptes. jettison them here for certain... */ | ||
2000 | |||
2001 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | ||
2002 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size); | ||
2003 | |||
2004 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | ||
2005 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | ||
2006 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2007 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2008 | |||
2009 | mutex_unlock(&vm->update_gmmu_lock); | ||
2010 | |||
2011 | /* release zero page if used */ | ||
2012 | if (vm->zero_page_cpuva) | ||
2013 | dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, | ||
2014 | vm->zero_page_cpuva, vm->zero_page_iova); | ||
2015 | |||
2016 | /* vm is not used anymore. release it. */ | ||
2017 | kfree(vm); | ||
2018 | } | ||
2019 | |||
2020 | static void gk20a_vm_remove_support_kref(struct kref *ref) | ||
2021 | { | ||
2022 | struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); | ||
2023 | gk20a_vm_remove_support(vm); | ||
2024 | } | ||
2025 | |||
2026 | void gk20a_vm_get(struct vm_gk20a *vm) | ||
2027 | { | ||
2028 | kref_get(&vm->ref); | ||
2029 | } | ||
2030 | |||
2031 | void gk20a_vm_put(struct vm_gk20a *vm) | ||
2032 | { | ||
2033 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); | ||
2034 | } | ||
2035 | |||
2036 | /* address space interfaces for the gk20a module */ | ||
2037 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | ||
2038 | { | ||
2039 | struct gk20a_as *as = as_share->as; | ||
2040 | struct gk20a *g = gk20a_from_as(as); | ||
2041 | struct mm_gk20a *mm = &g->mm; | ||
2042 | struct vm_gk20a *vm; | ||
2043 | u64 vma_size; | ||
2044 | u32 num_pages, low_hole_pages; | ||
2045 | char name[32]; | ||
2046 | int err; | ||
2047 | |||
2048 | gk20a_dbg_fn(""); | ||
2049 | |||
2050 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | ||
2051 | if (!vm) | ||
2052 | return -ENOMEM; | ||
2053 | |||
2054 | as_share->vm = vm; | ||
2055 | |||
2056 | vm->mm = mm; | ||
2057 | vm->as_share = as_share; | ||
2058 | |||
2059 | vm->big_pages = true; | ||
2060 | |||
2061 | vm->va_start = mm->pde_stride; /* create a one pde hole */ | ||
2062 | vm->va_limit = mm->channel.size; /* note this means channel.size is | ||
2063 | really just the max */ | ||
2064 | { | ||
2065 | u32 pde_lo, pde_hi; | ||
2066 | pde_range_from_vaddr_range(vm, | ||
2067 | 0, vm->va_limit-1, | ||
2068 | &pde_lo, &pde_hi); | ||
2069 | vm->pdes.num_pdes = pde_hi + 1; | ||
2070 | } | ||
2071 | |||
2072 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2073 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2074 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2075 | |||
2076 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2077 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2078 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2079 | |||
2080 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2081 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2082 | return -ENOMEM; | ||
2083 | |||
2084 | gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d", | ||
2085 | vm->va_limit, vm->pdes.num_pdes); | ||
2086 | |||
2087 | /* allocate the page table directory */ | ||
2088 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2089 | &vm->pdes.sgt, &vm->pdes.size); | ||
2090 | if (err) | ||
2091 | return -ENOMEM; | ||
2092 | |||
2093 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2094 | vm->pdes.size); | ||
2095 | if (err) { | ||
2096 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2097 | vm->pdes.size); | ||
2098 | return -ENOMEM; | ||
2099 | } | ||
2100 | gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx", | ||
2101 | vm->pdes.kv, | ||
2102 | gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2103 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2104 | |||
2105 | |||
2106 | /* low-half: alloc small pages */ | ||
2107 | /* high-half: alloc big pages */ | ||
2108 | vma_size = mm->channel.size >> 1; | ||
2109 | |||
2110 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
2111 | gmmu_page_sizes[gmmu_page_size_small]>>10); | ||
2112 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); | ||
2113 | |||
2114 | /* num_pages above is without regard to the low-side hole. */ | ||
2115 | low_hole_pages = (vm->va_start >> | ||
2116 | gmmu_page_shifts[gmmu_page_size_small]); | ||
2117 | |||
2118 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, | ||
2119 | low_hole_pages, /* start */ | ||
2120 | num_pages - low_hole_pages, /* length */ | ||
2121 | 1); /* align */ | ||
2122 | |||
2123 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
2124 | gmmu_page_sizes[gmmu_page_size_big]>>10); | ||
2125 | |||
2126 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); | ||
2127 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, | ||
2128 | num_pages, /* start */ | ||
2129 | num_pages, /* length */ | ||
2130 | 1); /* align */ | ||
2131 | |||
2132 | vm->mapped_buffers = RB_ROOT; | ||
2133 | |||
2134 | mutex_init(&vm->update_gmmu_lock); | ||
2135 | kref_init(&vm->ref); | ||
2136 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2137 | |||
2138 | vm->enable_ctag = true; | ||
2139 | |||
2140 | return 0; | ||
2141 | } | ||
2142 | |||
2143 | |||
2144 | int gk20a_vm_release_share(struct gk20a_as_share *as_share) | ||
2145 | { | ||
2146 | struct vm_gk20a *vm = as_share->vm; | ||
2147 | |||
2148 | gk20a_dbg_fn(""); | ||
2149 | |||
2150 | vm->as_share = NULL; | ||
2151 | |||
2152 | /* put as reference to vm */ | ||
2153 | gk20a_vm_put(vm); | ||
2154 | |||
2155 | as_share->vm = NULL; | ||
2156 | |||
2157 | return 0; | ||
2158 | } | ||
2159 | |||
2160 | |||
2161 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | ||
2162 | struct nvhost_as_alloc_space_args *args) | ||
2163 | |||
2164 | { int err = -ENOMEM; | ||
2165 | int pgsz_idx; | ||
2166 | u32 start_page_nr; | ||
2167 | struct gk20a_allocator *vma; | ||
2168 | struct vm_gk20a *vm = as_share->vm; | ||
2169 | struct vm_reserved_va_node *va_node; | ||
2170 | u64 vaddr_start = 0; | ||
2171 | |||
2172 | gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", | ||
2173 | args->flags, args->page_size, args->pages, | ||
2174 | args->o_a.offset); | ||
2175 | |||
2176 | /* determine pagesz idx */ | ||
2177 | for (pgsz_idx = gmmu_page_size_small; | ||
2178 | pgsz_idx < gmmu_nr_page_sizes; | ||
2179 | pgsz_idx++) { | ||
2180 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | ||
2181 | break; | ||
2182 | } | ||
2183 | |||
2184 | if (pgsz_idx >= gmmu_nr_page_sizes) { | ||
2185 | err = -EINVAL; | ||
2186 | goto clean_up; | ||
2187 | } | ||
2188 | |||
2189 | va_node = kzalloc(sizeof(*va_node), GFP_KERNEL); | ||
2190 | if (!va_node) { | ||
2191 | err = -ENOMEM; | ||
2192 | goto clean_up; | ||
2193 | } | ||
2194 | |||
2195 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE && | ||
2196 | pgsz_idx != gmmu_page_size_big) { | ||
2197 | err = -ENOSYS; | ||
2198 | kfree(va_node); | ||
2199 | goto clean_up; | ||
2200 | } | ||
2201 | |||
2202 | start_page_nr = 0; | ||
2203 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
2204 | start_page_nr = (u32)(args->o_a.offset >> | ||
2205 | gmmu_page_shifts[pgsz_idx]); | ||
2206 | |||
2207 | vma = &vm->vma[pgsz_idx]; | ||
2208 | err = vma->alloc(vma, &start_page_nr, args->pages); | ||
2209 | if (err) { | ||
2210 | kfree(va_node); | ||
2211 | goto clean_up; | ||
2212 | } | ||
2213 | |||
2214 | vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx]; | ||
2215 | |||
2216 | va_node->vaddr_start = vaddr_start; | ||
2217 | va_node->size = (u64)args->page_size * (u64)args->pages; | ||
2218 | va_node->pgsz_idx = args->page_size; | ||
2219 | INIT_LIST_HEAD(&va_node->va_buffers_list); | ||
2220 | INIT_LIST_HEAD(&va_node->reserved_va_list); | ||
2221 | |||
2222 | mutex_lock(&vm->update_gmmu_lock); | ||
2223 | |||
2224 | /* mark that we need to use sparse mappings here */ | ||
2225 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { | ||
2226 | err = gk20a_vm_put_empty(vm, vaddr_start, args->pages, | ||
2227 | pgsz_idx); | ||
2228 | if (err) { | ||
2229 | mutex_unlock(&vm->update_gmmu_lock); | ||
2230 | vma->free(vma, start_page_nr, args->pages); | ||
2231 | kfree(va_node); | ||
2232 | goto clean_up; | ||
2233 | } | ||
2234 | |||
2235 | va_node->sparse = true; | ||
2236 | } | ||
2237 | |||
2238 | list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); | ||
2239 | |||
2240 | mutex_unlock(&vm->update_gmmu_lock); | ||
2241 | |||
2242 | args->o_a.offset = vaddr_start; | ||
2243 | |||
2244 | clean_up: | ||
2245 | return err; | ||
2246 | } | ||
2247 | |||
2248 | int gk20a_vm_free_space(struct gk20a_as_share *as_share, | ||
2249 | struct nvhost_as_free_space_args *args) | ||
2250 | { | ||
2251 | int err = -ENOMEM; | ||
2252 | int pgsz_idx; | ||
2253 | u32 start_page_nr; | ||
2254 | struct gk20a_allocator *vma; | ||
2255 | struct vm_gk20a *vm = as_share->vm; | ||
2256 | struct vm_reserved_va_node *va_node; | ||
2257 | |||
2258 | gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, | ||
2259 | args->pages, args->offset); | ||
2260 | |||
2261 | /* determine pagesz idx */ | ||
2262 | for (pgsz_idx = gmmu_page_size_small; | ||
2263 | pgsz_idx < gmmu_nr_page_sizes; | ||
2264 | pgsz_idx++) { | ||
2265 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | ||
2266 | break; | ||
2267 | } | ||
2268 | |||
2269 | if (pgsz_idx >= gmmu_nr_page_sizes) { | ||
2270 | err = -EINVAL; | ||
2271 | goto clean_up; | ||
2272 | } | ||
2273 | |||
2274 | start_page_nr = (u32)(args->offset >> | ||
2275 | gmmu_page_shifts[pgsz_idx]); | ||
2276 | |||
2277 | vma = &vm->vma[pgsz_idx]; | ||
2278 | err = vma->free(vma, start_page_nr, args->pages); | ||
2279 | |||
2280 | if (err) | ||
2281 | goto clean_up; | ||
2282 | |||
2283 | mutex_lock(&vm->update_gmmu_lock); | ||
2284 | va_node = addr_to_reservation(vm, args->offset); | ||
2285 | if (va_node) { | ||
2286 | struct mapped_buffer_node *buffer; | ||
2287 | |||
2288 | /* there is no need to unallocate the buffers in va. Just | ||
2289 | * convert them into normal buffers */ | ||
2290 | |||
2291 | list_for_each_entry(buffer, | ||
2292 | &va_node->va_buffers_list, va_buffers_list) | ||
2293 | list_del_init(&buffer->va_buffers_list); | ||
2294 | |||
2295 | list_del(&va_node->reserved_va_list); | ||
2296 | |||
2297 | /* if this was a sparse mapping, free the va */ | ||
2298 | if (va_node->sparse) | ||
2299 | __locked_gmmu_unmap(vm, | ||
2300 | va_node->vaddr_start, | ||
2301 | va_node->size, | ||
2302 | va_node->pgsz_idx, | ||
2303 | false, | ||
2304 | gk20a_mem_flag_none); | ||
2305 | kfree(va_node); | ||
2306 | } | ||
2307 | mutex_unlock(&vm->update_gmmu_lock); | ||
2308 | |||
2309 | clean_up: | ||
2310 | return err; | ||
2311 | } | ||
2312 | |||
2313 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | ||
2314 | struct channel_gk20a *ch) | ||
2315 | { | ||
2316 | int err = 0; | ||
2317 | struct vm_gk20a *vm = as_share->vm; | ||
2318 | |||
2319 | gk20a_dbg_fn(""); | ||
2320 | |||
2321 | ch->vm = vm; | ||
2322 | err = channel_gk20a_commit_va(ch); | ||
2323 | if (err) | ||
2324 | ch->vm = 0; | ||
2325 | |||
2326 | return err; | ||
2327 | } | ||
2328 | |||
2329 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) | ||
2330 | { | ||
2331 | struct gk20a_dmabuf_priv *priv; | ||
2332 | static DEFINE_MUTEX(priv_lock); | ||
2333 | |||
2334 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
2335 | if (likely(priv)) | ||
2336 | return 0; | ||
2337 | |||
2338 | mutex_lock(&priv_lock); | ||
2339 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
2340 | if (priv) | ||
2341 | goto priv_exist_or_err; | ||
2342 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
2343 | if (!priv) { | ||
2344 | priv = ERR_PTR(-ENOMEM); | ||
2345 | goto priv_exist_or_err; | ||
2346 | } | ||
2347 | mutex_init(&priv->lock); | ||
2348 | dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); | ||
2349 | priv_exist_or_err: | ||
2350 | mutex_unlock(&priv_lock); | ||
2351 | if (IS_ERR(priv)) | ||
2352 | return -ENOMEM; | ||
2353 | |||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2357 | |||
2358 | static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf) | ||
2359 | { | ||
2360 | int kind = 0; | ||
2361 | #ifdef CONFIG_TEGRA_NVMAP | ||
2362 | int err; | ||
2363 | u64 nvmap_param; | ||
2364 | |||
2365 | err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND, | ||
2366 | &nvmap_param); | ||
2367 | kind = err ? kind : nvmap_param; | ||
2368 | #endif | ||
2369 | return kind; | ||
2370 | } | ||
2371 | |||
2372 | int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, | ||
2373 | int dmabuf_fd, | ||
2374 | u64 *offset_align, | ||
2375 | u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ | ||
2376 | int kind) | ||
2377 | { | ||
2378 | int err = 0; | ||
2379 | struct vm_gk20a *vm = as_share->vm; | ||
2380 | struct dma_buf *dmabuf; | ||
2381 | u64 ret_va; | ||
2382 | |||
2383 | gk20a_dbg_fn(""); | ||
2384 | |||
2385 | /* get ref to the mem handle (released on unmap_locked) */ | ||
2386 | dmabuf = dma_buf_get(dmabuf_fd); | ||
2387 | if (!dmabuf) | ||
2388 | return 0; | ||
2389 | |||
2390 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
2391 | if (err) { | ||
2392 | dma_buf_put(dmabuf); | ||
2393 | return err; | ||
2394 | } | ||
2395 | |||
2396 | if (kind == -1) | ||
2397 | kind = gk20a_dmabuf_get_kind(dmabuf); | ||
2398 | |||
2399 | ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, | ||
2400 | flags, kind, NULL, true, | ||
2401 | gk20a_mem_flag_none); | ||
2402 | *offset_align = ret_va; | ||
2403 | if (!ret_va) { | ||
2404 | dma_buf_put(dmabuf); | ||
2405 | err = -EINVAL; | ||
2406 | } | ||
2407 | |||
2408 | return err; | ||
2409 | } | ||
2410 | |||
2411 | int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) | ||
2412 | { | ||
2413 | struct vm_gk20a *vm = as_share->vm; | ||
2414 | |||
2415 | gk20a_dbg_fn(""); | ||
2416 | |||
2417 | gk20a_vm_unmap_user(vm, offset); | ||
2418 | return 0; | ||
2419 | } | ||
2420 | |||
2421 | int gk20a_init_bar1_vm(struct mm_gk20a *mm) | ||
2422 | { | ||
2423 | int err; | ||
2424 | phys_addr_t inst_pa; | ||
2425 | void *inst_ptr; | ||
2426 | struct vm_gk20a *vm = &mm->bar1.vm; | ||
2427 | struct gk20a *g = gk20a_from_mm(mm); | ||
2428 | struct device *d = dev_from_gk20a(g); | ||
2429 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
2430 | u64 pde_addr; | ||
2431 | u32 pde_addr_lo; | ||
2432 | u32 pde_addr_hi; | ||
2433 | dma_addr_t iova; | ||
2434 | |||
2435 | vm->mm = mm; | ||
2436 | |||
2437 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | ||
2438 | |||
2439 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | ||
2440 | |||
2441 | vm->va_start = mm->pde_stride * 1; | ||
2442 | vm->va_limit = mm->bar1.aperture_size; | ||
2443 | |||
2444 | { | ||
2445 | u32 pde_lo, pde_hi; | ||
2446 | pde_range_from_vaddr_range(vm, | ||
2447 | 0, vm->va_limit-1, | ||
2448 | &pde_lo, &pde_hi); | ||
2449 | vm->pdes.num_pdes = pde_hi + 1; | ||
2450 | } | ||
2451 | |||
2452 | /* bar1 is likely only to ever use/need small page sizes. */ | ||
2453 | /* But just in case, for now... arrange for both.*/ | ||
2454 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2455 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2456 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2457 | |||
2458 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2459 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2460 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2461 | |||
2462 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2463 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2464 | return -ENOMEM; | ||
2465 | |||
2466 | gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d", | ||
2467 | vm->va_limit, vm->pdes.num_pdes); | ||
2468 | |||
2469 | |||
2470 | /* allocate the page table directory */ | ||
2471 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2472 | &vm->pdes.sgt, &vm->pdes.size); | ||
2473 | if (err) | ||
2474 | goto clean_up; | ||
2475 | |||
2476 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2477 | vm->pdes.size); | ||
2478 | if (err) { | ||
2479 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2480 | vm->pdes.size); | ||
2481 | goto clean_up; | ||
2482 | } | ||
2483 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", | ||
2484 | vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2485 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2486 | |||
2487 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | ||
2488 | pde_addr_lo = u64_lo32(pde_addr >> 12); | ||
2489 | pde_addr_hi = u64_hi32(pde_addr); | ||
2490 | |||
2491 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2492 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl), | ||
2493 | pde_addr_lo, pde_addr_hi); | ||
2494 | |||
2495 | /* allocate instance mem for bar1 */ | ||
2496 | inst_block->size = ram_in_alloc_size_v(); | ||
2497 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | ||
2498 | &iova, GFP_KERNEL); | ||
2499 | if (!inst_block->cpuva) { | ||
2500 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
2501 | err = -ENOMEM; | ||
2502 | goto clean_up; | ||
2503 | } | ||
2504 | |||
2505 | inst_block->iova = iova; | ||
2506 | inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); | ||
2507 | if (!inst_block->cpu_pa) { | ||
2508 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | ||
2509 | err = -ENOMEM; | ||
2510 | goto clean_up; | ||
2511 | } | ||
2512 | |||
2513 | inst_pa = inst_block->cpu_pa; | ||
2514 | inst_ptr = inst_block->cpuva; | ||
2515 | |||
2516 | gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", | ||
2517 | (u64)inst_pa, inst_ptr); | ||
2518 | |||
2519 | memset(inst_ptr, 0, ram_fc_size_val_v()); | ||
2520 | |||
2521 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2522 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2523 | ram_in_page_dir_base_vol_true_f() | | ||
2524 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2525 | |||
2526 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2527 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2528 | |||
2529 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
2530 | u64_lo32(vm->va_limit) | 0xFFF); | ||
2531 | |||
2532 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
2533 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | ||
2534 | |||
2535 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); | ||
2536 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1", | ||
2537 | 1,/*start*/ | ||
2538 | (vm->va_limit >> 12) - 1 /* length*/, | ||
2539 | 1); /* align */ | ||
2540 | /* initialize just in case we try to use it anyway */ | ||
2541 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused", | ||
2542 | 0x0badc0de, /* start */ | ||
2543 | 1, /* length */ | ||
2544 | 1); /* align */ | ||
2545 | |||
2546 | vm->mapped_buffers = RB_ROOT; | ||
2547 | |||
2548 | mutex_init(&vm->update_gmmu_lock); | ||
2549 | kref_init(&vm->ref); | ||
2550 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2551 | |||
2552 | return 0; | ||
2553 | |||
2554 | clean_up: | ||
2555 | /* free, etc */ | ||
2556 | if (inst_block->cpuva) | ||
2557 | dma_free_coherent(d, inst_block->size, | ||
2558 | inst_block->cpuva, inst_block->iova); | ||
2559 | inst_block->cpuva = NULL; | ||
2560 | inst_block->iova = 0; | ||
2561 | return err; | ||
2562 | } | ||
2563 | |||
2564 | /* pmu vm, share channel_vm interfaces */ | ||
2565 | int gk20a_init_pmu_vm(struct mm_gk20a *mm) | ||
2566 | { | ||
2567 | int err; | ||
2568 | phys_addr_t inst_pa; | ||
2569 | void *inst_ptr; | ||
2570 | struct vm_gk20a *vm = &mm->pmu.vm; | ||
2571 | struct gk20a *g = gk20a_from_mm(mm); | ||
2572 | struct device *d = dev_from_gk20a(g); | ||
2573 | struct inst_desc *inst_block = &mm->pmu.inst_block; | ||
2574 | u64 pde_addr; | ||
2575 | u32 pde_addr_lo; | ||
2576 | u32 pde_addr_hi; | ||
2577 | dma_addr_t iova; | ||
2578 | |||
2579 | vm->mm = mm; | ||
2580 | |||
2581 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | ||
2582 | |||
2583 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | ||
2584 | |||
2585 | vm->va_start = GK20A_PMU_VA_START; | ||
2586 | vm->va_limit = vm->va_start + mm->pmu.aperture_size; | ||
2587 | |||
2588 | { | ||
2589 | u32 pde_lo, pde_hi; | ||
2590 | pde_range_from_vaddr_range(vm, | ||
2591 | 0, vm->va_limit-1, | ||
2592 | &pde_lo, &pde_hi); | ||
2593 | vm->pdes.num_pdes = pde_hi + 1; | ||
2594 | } | ||
2595 | |||
2596 | /* The pmu is likely only to ever use/need small page sizes. */ | ||
2597 | /* But just in case, for now... arrange for both.*/ | ||
2598 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2599 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2600 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2601 | |||
2602 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2603 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2604 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2605 | |||
2606 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2607 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2608 | return -ENOMEM; | ||
2609 | |||
2610 | gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d", | ||
2611 | vm->va_limit, vm->pdes.num_pdes); | ||
2612 | |||
2613 | /* allocate the page table directory */ | ||
2614 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2615 | &vm->pdes.sgt, &vm->pdes.size); | ||
2616 | if (err) | ||
2617 | goto clean_up; | ||
2618 | |||
2619 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2620 | vm->pdes.size); | ||
2621 | if (err) { | ||
2622 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2623 | vm->pdes.size); | ||
2624 | goto clean_up; | ||
2625 | } | ||
2626 | gk20a_dbg_info("pmu pdes phys @ 0x%llx", | ||
2627 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2628 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2629 | |||
2630 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | ||
2631 | pde_addr_lo = u64_lo32(pde_addr >> 12); | ||
2632 | pde_addr_hi = u64_hi32(pde_addr); | ||
2633 | |||
2634 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2635 | (u64)pde_addr, pde_addr_lo, pde_addr_hi); | ||
2636 | |||
2637 | /* allocate instance mem for pmu */ | ||
2638 | inst_block->size = GK20A_PMU_INST_SIZE; | ||
2639 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | ||
2640 | &iova, GFP_KERNEL); | ||
2641 | if (!inst_block->cpuva) { | ||
2642 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | ||
2643 | err = -ENOMEM; | ||
2644 | goto clean_up; | ||
2645 | } | ||
2646 | |||
2647 | inst_block->iova = iova; | ||
2648 | inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); | ||
2649 | if (!inst_block->cpu_pa) { | ||
2650 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | ||
2651 | err = -ENOMEM; | ||
2652 | goto clean_up; | ||
2653 | } | ||
2654 | |||
2655 | inst_pa = inst_block->cpu_pa; | ||
2656 | inst_ptr = inst_block->cpuva; | ||
2657 | |||
2658 | gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); | ||
2659 | |||
2660 | memset(inst_ptr, 0, GK20A_PMU_INST_SIZE); | ||
2661 | |||
2662 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2663 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2664 | ram_in_page_dir_base_vol_true_f() | | ||
2665 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2666 | |||
2667 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2668 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2669 | |||
2670 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | ||
2671 | u64_lo32(vm->va_limit) | 0xFFF); | ||
2672 | |||
2673 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | ||
2674 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | ||
2675 | |||
2676 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu", | ||
2677 | (vm->va_start >> 12), /* start */ | ||
2678 | (vm->va_limit - vm->va_start) >> 12, /*length*/ | ||
2679 | 1); /* align */ | ||
2680 | /* initialize just in case we try to use it anyway */ | ||
2681 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused", | ||
2682 | 0x0badc0de, /* start */ | ||
2683 | 1, /* length */ | ||
2684 | 1); /* align */ | ||
2685 | |||
2686 | |||
2687 | vm->mapped_buffers = RB_ROOT; | ||
2688 | |||
2689 | mutex_init(&vm->update_gmmu_lock); | ||
2690 | kref_init(&vm->ref); | ||
2691 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2692 | |||
2693 | return 0; | ||
2694 | |||
2695 | clean_up: | ||
2696 | /* free, etc */ | ||
2697 | if (inst_block->cpuva) | ||
2698 | dma_free_coherent(d, inst_block->size, | ||
2699 | inst_block->cpuva, inst_block->iova); | ||
2700 | inst_block->cpuva = NULL; | ||
2701 | inst_block->iova = 0; | ||
2702 | return err; | ||
2703 | } | ||
2704 | |||
2705 | void gk20a_mm_fb_flush(struct gk20a *g) | ||
2706 | { | ||
2707 | struct mm_gk20a *mm = &g->mm; | ||
2708 | u32 data; | ||
2709 | s32 retry = 100; | ||
2710 | |||
2711 | gk20a_dbg_fn(""); | ||
2712 | |||
2713 | mutex_lock(&mm->l2_op_lock); | ||
2714 | |||
2715 | g->ops.ltc.elpg_flush(g); | ||
2716 | |||
2717 | /* Make sure all previous writes are committed to the L2. There's no | ||
2718 | guarantee that writes are to DRAM. This will be a sysmembar internal | ||
2719 | to the L2. */ | ||
2720 | gk20a_writel(g, flush_fb_flush_r(), | ||
2721 | flush_fb_flush_pending_busy_f()); | ||
2722 | |||
2723 | do { | ||
2724 | data = gk20a_readl(g, flush_fb_flush_r()); | ||
2725 | |||
2726 | if (flush_fb_flush_outstanding_v(data) == | ||
2727 | flush_fb_flush_outstanding_true_v() || | ||
2728 | flush_fb_flush_pending_v(data) == | ||
2729 | flush_fb_flush_pending_busy_v()) { | ||
2730 | gk20a_dbg_info("fb_flush 0x%x", data); | ||
2731 | retry--; | ||
2732 | usleep_range(20, 40); | ||
2733 | } else | ||
2734 | break; | ||
2735 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2736 | |||
2737 | if (retry < 0) | ||
2738 | gk20a_warn(dev_from_gk20a(g), | ||
2739 | "fb_flush too many retries"); | ||
2740 | |||
2741 | mutex_unlock(&mm->l2_op_lock); | ||
2742 | } | ||
2743 | |||
2744 | static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) | ||
2745 | { | ||
2746 | u32 data; | ||
2747 | s32 retry = 200; | ||
2748 | |||
2749 | /* Invalidate any clean lines from the L2 so subsequent reads go to | ||
2750 | DRAM. Dirty lines are not affected by this operation. */ | ||
2751 | gk20a_writel(g, flush_l2_system_invalidate_r(), | ||
2752 | flush_l2_system_invalidate_pending_busy_f()); | ||
2753 | |||
2754 | do { | ||
2755 | data = gk20a_readl(g, flush_l2_system_invalidate_r()); | ||
2756 | |||
2757 | if (flush_l2_system_invalidate_outstanding_v(data) == | ||
2758 | flush_l2_system_invalidate_outstanding_true_v() || | ||
2759 | flush_l2_system_invalidate_pending_v(data) == | ||
2760 | flush_l2_system_invalidate_pending_busy_v()) { | ||
2761 | gk20a_dbg_info("l2_system_invalidate 0x%x", | ||
2762 | data); | ||
2763 | retry--; | ||
2764 | usleep_range(20, 40); | ||
2765 | } else | ||
2766 | break; | ||
2767 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2768 | |||
2769 | if (retry < 0) | ||
2770 | gk20a_warn(dev_from_gk20a(g), | ||
2771 | "l2_system_invalidate too many retries"); | ||
2772 | } | ||
2773 | |||
2774 | void gk20a_mm_l2_invalidate(struct gk20a *g) | ||
2775 | { | ||
2776 | struct mm_gk20a *mm = &g->mm; | ||
2777 | mutex_lock(&mm->l2_op_lock); | ||
2778 | gk20a_mm_l2_invalidate_locked(g); | ||
2779 | mutex_unlock(&mm->l2_op_lock); | ||
2780 | } | ||
2781 | |||
2782 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) | ||
2783 | { | ||
2784 | struct mm_gk20a *mm = &g->mm; | ||
2785 | u32 data; | ||
2786 | s32 retry = 200; | ||
2787 | |||
2788 | gk20a_dbg_fn(""); | ||
2789 | |||
2790 | mutex_lock(&mm->l2_op_lock); | ||
2791 | |||
2792 | /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 | ||
2793 | as clean, so subsequent reads might hit in the L2. */ | ||
2794 | gk20a_writel(g, flush_l2_flush_dirty_r(), | ||
2795 | flush_l2_flush_dirty_pending_busy_f()); | ||
2796 | |||
2797 | do { | ||
2798 | data = gk20a_readl(g, flush_l2_flush_dirty_r()); | ||
2799 | |||
2800 | if (flush_l2_flush_dirty_outstanding_v(data) == | ||
2801 | flush_l2_flush_dirty_outstanding_true_v() || | ||
2802 | flush_l2_flush_dirty_pending_v(data) == | ||
2803 | flush_l2_flush_dirty_pending_busy_v()) { | ||
2804 | gk20a_dbg_info("l2_flush_dirty 0x%x", data); | ||
2805 | retry--; | ||
2806 | usleep_range(20, 40); | ||
2807 | } else | ||
2808 | break; | ||
2809 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2810 | |||
2811 | if (retry < 0) | ||
2812 | gk20a_warn(dev_from_gk20a(g), | ||
2813 | "l2_flush_dirty too many retries"); | ||
2814 | |||
2815 | if (invalidate) | ||
2816 | gk20a_mm_l2_invalidate_locked(g); | ||
2817 | |||
2818 | mutex_unlock(&mm->l2_op_lock); | ||
2819 | } | ||
2820 | |||
2821 | |||
2822 | int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | ||
2823 | struct dma_buf **dmabuf, | ||
2824 | u64 *offset) | ||
2825 | { | ||
2826 | struct mapped_buffer_node *mapped_buffer; | ||
2827 | |||
2828 | gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); | ||
2829 | |||
2830 | mutex_lock(&vm->update_gmmu_lock); | ||
2831 | |||
2832 | mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers, | ||
2833 | gpu_va); | ||
2834 | if (!mapped_buffer) { | ||
2835 | mutex_unlock(&vm->update_gmmu_lock); | ||
2836 | return -EINVAL; | ||
2837 | } | ||
2838 | |||
2839 | *dmabuf = mapped_buffer->dmabuf; | ||
2840 | *offset = gpu_va - mapped_buffer->addr; | ||
2841 | |||
2842 | mutex_unlock(&vm->update_gmmu_lock); | ||
2843 | |||
2844 | return 0; | ||
2845 | } | ||
2846 | |||
2847 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | ||
2848 | { | ||
2849 | struct mm_gk20a *mm = vm->mm; | ||
2850 | struct gk20a *g = gk20a_from_vm(vm); | ||
2851 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); | ||
2852 | u32 data; | ||
2853 | s32 retry = 200; | ||
2854 | |||
2855 | gk20a_dbg_fn(""); | ||
2856 | |||
2857 | /* pagetables are considered sw states which are preserved after | ||
2858 | prepare_poweroff. When gk20a deinit releases those pagetables, | ||
2859 | common code in vm unmap path calls tlb invalidate that touches | ||
2860 | hw. Use the power_on flag to skip tlb invalidation when gpu | ||
2861 | power is turned off */ | ||
2862 | |||
2863 | if (!g->power_on) | ||
2864 | return; | ||
2865 | |||
2866 | /* No need to invalidate if tlb is clean */ | ||
2867 | mutex_lock(&vm->update_gmmu_lock); | ||
2868 | if (!vm->tlb_dirty) { | ||
2869 | mutex_unlock(&vm->update_gmmu_lock); | ||
2870 | return; | ||
2871 | } | ||
2872 | vm->tlb_dirty = false; | ||
2873 | mutex_unlock(&vm->update_gmmu_lock); | ||
2874 | |||
2875 | mutex_lock(&mm->tlb_lock); | ||
2876 | do { | ||
2877 | data = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
2878 | if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) | ||
2879 | break; | ||
2880 | usleep_range(20, 40); | ||
2881 | retry--; | ||
2882 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2883 | |||
2884 | if (retry < 0) | ||
2885 | gk20a_warn(dev_from_gk20a(g), | ||
2886 | "wait mmu fifo space too many retries"); | ||
2887 | |||
2888 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | ||
2889 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | ||
2890 | fb_mmu_invalidate_pdb_aperture_vid_mem_f()); | ||
2891 | |||
2892 | /* this is a sledgehammer, it would seem */ | ||
2893 | gk20a_writel(g, fb_mmu_invalidate_r(), | ||
2894 | fb_mmu_invalidate_all_pdb_true_f() | | ||
2895 | fb_mmu_invalidate_all_va_true_f() | | ||
2896 | fb_mmu_invalidate_trigger_true_f()); | ||
2897 | |||
2898 | do { | ||
2899 | data = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
2900 | if (fb_mmu_ctrl_pri_fifo_empty_v(data) != | ||
2901 | fb_mmu_ctrl_pri_fifo_empty_false_f()) | ||
2902 | break; | ||
2903 | retry--; | ||
2904 | usleep_range(20, 40); | ||
2905 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
2906 | |||
2907 | if (retry < 0) | ||
2908 | gk20a_warn(dev_from_gk20a(g), | ||
2909 | "mmu invalidate too many retries"); | ||
2910 | |||
2911 | mutex_unlock(&mm->tlb_lock); | ||
2912 | } | ||
2913 | |||
2914 | int gk20a_mm_suspend(struct gk20a *g) | ||
2915 | { | ||
2916 | gk20a_dbg_fn(""); | ||
2917 | |||
2918 | gk20a_mm_fb_flush(g); | ||
2919 | gk20a_mm_l2_flush(g, true); | ||
2920 | |||
2921 | gk20a_dbg_fn("done"); | ||
2922 | return 0; | ||
2923 | } | ||
2924 | |||
2925 | void gk20a_mm_ltc_isr(struct gk20a *g) | ||
2926 | { | ||
2927 | u32 intr; | ||
2928 | |||
2929 | intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r()); | ||
2930 | gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr); | ||
2931 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); | ||
2932 | } | ||
2933 | |||
2934 | bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) | ||
2935 | { | ||
2936 | u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
2937 | return fb_mmu_debug_ctrl_debug_v(debug_ctrl) == | ||
2938 | fb_mmu_debug_ctrl_debug_enabled_v(); | ||
2939 | } | ||
2940 | |||
2941 | static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, | ||
2942 | const unsigned int msec) | ||
2943 | { | ||
2944 | unsigned long timeout; | ||
2945 | |||
2946 | timeout = jiffies + msecs_to_jiffies(msec); | ||
2947 | while (1) { | ||
2948 | u32 val; | ||
2949 | |||
2950 | val = gk20a_readl(g, fb_mmu_vpr_info_r()); | ||
2951 | if (fb_mmu_vpr_info_fetch_v(val) == | ||
2952 | fb_mmu_vpr_info_fetch_false_v()) | ||
2953 | break; | ||
2954 | |||
2955 | if (tegra_platform_is_silicon() && | ||
2956 | WARN_ON(time_after(jiffies, timeout))) | ||
2957 | return -ETIME; | ||
2958 | } | ||
2959 | |||
2960 | return 0; | ||
2961 | } | ||
2962 | |||
2963 | int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g) | ||
2964 | { | ||
2965 | int ret = 0; | ||
2966 | |||
2967 | gk20a_busy_noresume(g->dev); | ||
2968 | if (!pm_runtime_active(&g->dev->dev)) | ||
2969 | goto fail; | ||
2970 | |||
2971 | if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) { | ||
2972 | ret = -ETIME; | ||
2973 | goto fail; | ||
2974 | } | ||
2975 | |||
2976 | gk20a_writel(g, fb_mmu_vpr_info_r(), | ||
2977 | fb_mmu_vpr_info_fetch_true_v()); | ||
2978 | |||
2979 | ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5); | ||
2980 | |||
2981 | fail: | ||
2982 | gk20a_idle(g->dev); | ||
2983 | return ret; | ||
2984 | } | ||