summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-03-19 03:38:25 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:08:53 -0400
commita9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch)
treecc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent61efaf843c22b85424036ec98015121c08f5f16c (diff)
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c2984
1 files changed, 2984 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
new file mode 100644
index 00000000..b22df5e8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -0,0 +1,2984 @@
1/*
2 * drivers/video/tegra/host/gk20a/mm_gk20a.c
3 *
4 * GK20A memory management
5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <linux/delay.h>
23#include <linux/highmem.h>
24#include <linux/log2.h>
25#include <linux/nvhost.h>
26#include <linux/pm_runtime.h>
27#include <linux/scatterlist.h>
28#include <linux/nvmap.h>
29#include <linux/tegra-soc.h>
30#include <linux/vmalloc.h>
31#include <linux/dma-buf.h>
32#include <asm/cacheflush.h>
33
34#include "gk20a.h"
35#include "mm_gk20a.h"
36#include "hw_gmmu_gk20a.h"
37#include "hw_fb_gk20a.h"
38#include "hw_bus_gk20a.h"
39#include "hw_ram_gk20a.h"
40#include "hw_mc_gk20a.h"
41#include "hw_flush_gk20a.h"
42#include "hw_ltc_gk20a.h"
43
44#include "kind_gk20a.h"
45
46#ifdef CONFIG_ARM64
47#define outer_flush_range(a, b)
48#define __cpuc_flush_dcache_area __flush_dcache_area
49#endif
50
51/*
52 * GPU mapping life cycle
53 * ======================
54 *
55 * Kernel mappings
56 * ---------------
57 *
58 * Kernel mappings are created through vm.map(..., false):
59 *
60 * - Mappings to the same allocations are reused and refcounted.
61 * - This path does not support deferred unmapping (i.e. kernel must wait for
62 * all hw operations on the buffer to complete before unmapping).
63 * - References to dmabuf are owned and managed by the (kernel) clients of
64 * the gk20a_vm layer.
65 *
66 *
67 * User space mappings
68 * -------------------
69 *
70 * User space mappings are created through as.map_buffer -> vm.map(..., true):
71 *
72 * - Mappings to the same allocations are reused and refcounted.
73 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
74 * until all hw operations have completed).
75 * - References to dmabuf are owned and managed by the vm_gk20a
76 * layer itself. vm.map acquires these refs, and sets
77 * mapped_buffer->own_mem_ref to record that we must release the refs when we
78 * actually unmap.
79 *
80 */
81
82static inline int vm_aspace_id(struct vm_gk20a *vm)
83{
84 /* -1 is bar1 or pmu, etc. */
85 return vm->as_share ? vm->as_share->id : -1;
86}
87static inline u32 hi32(u64 f)
88{
89 return (u32)(f >> 32);
90}
91static inline u32 lo32(u64 f)
92{
93 return (u32)(f & 0xffffffff);
94}
95
96#define FLUSH_CPU_DCACHE(va, pa, size) \
97 do { \
98 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99 outer_flush_range(pa, pa + (size_t)(size)); \
100 } while (0)
101
102static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103static struct mapped_buffer_node *find_mapped_buffer_locked(
104 struct rb_root *root, u64 addr);
105static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
106 struct rb_root *root, struct dma_buf *dmabuf,
107 u32 kind);
108static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
109 enum gmmu_pgsz_gk20a pgsz_idx,
110 struct sg_table *sgt,
111 u64 first_vaddr, u64 last_vaddr,
112 u8 kind_v, u32 ctag_offset, bool cacheable,
113 int rw_flag);
114static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
115static void gk20a_vm_remove_support(struct vm_gk20a *vm);
116
117
118/* note: keep the page sizes sorted lowest to highest here */
119static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
120static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
121static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
122 0x1ffffLL };
123static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
124
125struct gk20a_comptags {
126 u32 offset;
127 u32 lines;
128};
129
130struct gk20a_dmabuf_priv {
131 struct mutex lock;
132
133 struct gk20a_allocator *comptag_allocator;
134 struct gk20a_comptags comptags;
135
136 struct dma_buf_attachment *attach;
137 struct sg_table *sgt;
138
139 int pin_count;
140};
141
142static void gk20a_mm_delete_priv(void *_priv)
143{
144 struct gk20a_dmabuf_priv *priv = _priv;
145 if (!priv)
146 return;
147
148 if (priv->comptags.lines) {
149 BUG_ON(!priv->comptag_allocator);
150 priv->comptag_allocator->free(priv->comptag_allocator,
151 priv->comptags.offset,
152 priv->comptags.lines);
153 }
154
155 kfree(priv);
156}
157
158struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
159{
160 struct gk20a_dmabuf_priv *priv;
161
162 priv = dma_buf_get_drvdata(dmabuf, dev);
163 if (WARN_ON(!priv))
164 return ERR_PTR(-EINVAL);
165
166 mutex_lock(&priv->lock);
167
168 if (priv->pin_count == 0) {
169 priv->attach = dma_buf_attach(dmabuf, dev);
170 if (IS_ERR(priv->attach)) {
171 mutex_unlock(&priv->lock);
172 return (struct sg_table *)priv->attach;
173 }
174
175 priv->sgt = dma_buf_map_attachment(priv->attach,
176 DMA_BIDIRECTIONAL);
177 if (IS_ERR(priv->sgt)) {
178 dma_buf_detach(dmabuf, priv->attach);
179 mutex_unlock(&priv->lock);
180 return priv->sgt;
181 }
182 }
183
184 priv->pin_count++;
185 mutex_unlock(&priv->lock);
186 return priv->sgt;
187}
188
189void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
190 struct sg_table *sgt)
191{
192 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
193 dma_addr_t dma_addr;
194
195 if (IS_ERR(priv) || !priv)
196 return;
197
198 mutex_lock(&priv->lock);
199 WARN_ON(priv->sgt != sgt);
200 priv->pin_count--;
201 WARN_ON(priv->pin_count < 0);
202 dma_addr = sg_dma_address(priv->sgt->sgl);
203 if (priv->pin_count == 0) {
204 dma_buf_unmap_attachment(priv->attach, priv->sgt,
205 DMA_BIDIRECTIONAL);
206 dma_buf_detach(dmabuf, priv->attach);
207 }
208 mutex_unlock(&priv->lock);
209}
210
211
212static void gk20a_get_comptags(struct device *dev,
213 struct dma_buf *dmabuf,
214 struct gk20a_comptags *comptags)
215{
216 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
217
218 if (!comptags)
219 return;
220
221 if (!priv) {
222 comptags->lines = 0;
223 comptags->offset = 0;
224 return;
225 }
226
227 *comptags = priv->comptags;
228}
229
230static int gk20a_alloc_comptags(struct device *dev,
231 struct dma_buf *dmabuf,
232 struct gk20a_allocator *allocator,
233 int lines)
234{
235 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
236 u32 offset = 0;
237 int err;
238
239 if (!priv)
240 return -ENOSYS;
241
242 if (!lines)
243 return -EINVAL;
244
245 /* store the allocator so we can use it when we free the ctags */
246 priv->comptag_allocator = allocator;
247 err = allocator->alloc(allocator, &offset, lines);
248 if (!err) {
249 priv->comptags.lines = lines;
250 priv->comptags.offset = offset;
251 }
252 return err;
253}
254
255
256
257
258static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
259{
260 gk20a_dbg_fn("");
261 if (g->ops.fb.reset)
262 g->ops.fb.reset(g);
263
264 if (g->ops.fb.init_fs_state)
265 g->ops.fb.init_fs_state(g);
266
267 return 0;
268}
269
270void gk20a_remove_mm_support(struct mm_gk20a *mm)
271{
272 struct gk20a *g = mm->g;
273 struct device *d = dev_from_gk20a(g);
274 struct vm_gk20a *vm = &mm->bar1.vm;
275 struct inst_desc *inst_block = &mm->bar1.inst_block;
276
277 gk20a_dbg_fn("");
278
279 if (inst_block->cpuva)
280 dma_free_coherent(d, inst_block->size,
281 inst_block->cpuva, inst_block->iova);
282 inst_block->cpuva = NULL;
283 inst_block->iova = 0;
284
285 gk20a_vm_remove_support(vm);
286}
287
288int gk20a_init_mm_setup_sw(struct gk20a *g)
289{
290 struct mm_gk20a *mm = &g->mm;
291 int i;
292
293 gk20a_dbg_fn("");
294
295 if (mm->sw_ready) {
296 gk20a_dbg_fn("skip init");
297 return 0;
298 }
299
300 mm->g = g;
301 mutex_init(&mm->tlb_lock);
302 mutex_init(&mm->l2_op_lock);
303 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
304 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
305 mm->pde_stride = mm->big_page_size << 10;
306 mm->pde_stride_shift = ilog2(mm->pde_stride);
307 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
308
309 for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
310
311 u32 num_ptes, pte_space, num_pages;
312
313 /* assuming "full" page tables */
314 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
315
316 pte_space = num_ptes * gmmu_pte__size_v();
317 /* allocate whole pages */
318 pte_space = roundup(pte_space, PAGE_SIZE);
319
320 num_pages = pte_space / PAGE_SIZE;
321 /* make sure "order" is viable */
322 BUG_ON(!is_power_of_2(num_pages));
323
324 mm->page_table_sizing[i].num_ptes = num_ptes;
325 mm->page_table_sizing[i].order = ilog2(num_pages);
326 }
327
328 /*TBD: make channel vm size configurable */
329 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
330
331 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
332
333 gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
334 gmmu_page_sizes[gmmu_page_size_small] >> 10,
335 (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
336 gmmu_pte__size_v()) >> 10);
337
338 gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
339 gmmu_page_sizes[gmmu_page_size_big] >> 10,
340 (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
341 gmmu_pte__size_v()) >> 10);
342
343
344 gk20a_init_bar1_vm(mm);
345
346 mm->remove_support = gk20a_remove_mm_support;
347 mm->sw_ready = true;
348
349 gk20a_dbg_fn("done");
350 return 0;
351}
352
353/* make sure gk20a_init_mm_support is called before */
354static int gk20a_init_mm_setup_hw(struct gk20a *g)
355{
356 struct mm_gk20a *mm = &g->mm;
357 struct inst_desc *inst_block = &mm->bar1.inst_block;
358 phys_addr_t inst_pa = inst_block->cpu_pa;
359
360 gk20a_dbg_fn("");
361
362 /* set large page size in fb
363 * note this is very early on, can we defer it ? */
364 {
365 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
366
367 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
368 fb_mmu_ctrl = (fb_mmu_ctrl &
369 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
370 fb_mmu_ctrl_vm_pg_size_128kb_f();
371 else
372 BUG_ON(1); /* no support/testing for larger ones yet */
373
374 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
375 }
376
377 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
378 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
379
380 /* this is very early in init... can we defer this? */
381 {
382 gk20a_writel(g, bus_bar1_block_r(),
383 bus_bar1_block_target_vid_mem_f() |
384 bus_bar1_block_mode_virtual_f() |
385 bus_bar1_block_ptr_f(inst_pa));
386 }
387
388 gk20a_dbg_fn("done");
389 return 0;
390}
391
392int gk20a_init_mm_support(struct gk20a *g)
393{
394 u32 err;
395
396 err = gk20a_init_mm_reset_enable_hw(g);
397 if (err)
398 return err;
399
400 err = gk20a_init_mm_setup_sw(g);
401 if (err)
402 return err;
403
404 err = gk20a_init_mm_setup_hw(g);
405 if (err)
406 return err;
407
408 return err;
409}
410
411#ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
412static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
413 void **handle,
414 struct sg_table **sgt,
415 size_t *size)
416{
417 u32 num_pages = 1 << order;
418 u32 len = num_pages * PAGE_SIZE;
419 int err;
420 struct page *pages;
421
422 gk20a_dbg_fn("");
423
424 pages = alloc_pages(GFP_KERNEL, order);
425 if (!pages) {
426 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
427 goto err_out;
428 }
429 *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
430 if (!sgt) {
431 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
432 goto err_alloced;
433 }
434 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
435 if (err) {
436 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
437 goto err_sg_table;
438 }
439 sg_set_page((*sgt)->sgl, pages, len, 0);
440 *handle = page_address(pages);
441 memset(*handle, 0, len);
442 *size = len;
443 FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
444
445 return 0;
446
447err_sg_table:
448 kfree(*sgt);
449err_alloced:
450 __free_pages(pages, order);
451err_out:
452 return -ENOMEM;
453}
454
455static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
456 struct sg_table *sgt, u32 order,
457 size_t size)
458{
459 gk20a_dbg_fn("");
460 BUG_ON(sgt == NULL);
461 free_pages((unsigned long)handle, order);
462 sg_free_table(sgt);
463 kfree(sgt);
464}
465
466static int map_gmmu_pages(void *handle, struct sg_table *sgt,
467 void **va, size_t size)
468{
469 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
470 *va = handle;
471 return 0;
472}
473
474static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
475{
476 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
477}
478#else
479static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
480 void **handle,
481 struct sg_table **sgt,
482 size_t *size)
483{
484 struct device *d = dev_from_vm(vm);
485 u32 num_pages = 1 << order;
486 u32 len = num_pages * PAGE_SIZE;
487 dma_addr_t iova;
488 DEFINE_DMA_ATTRS(attrs);
489 struct page **pages;
490 int err = 0;
491
492 gk20a_dbg_fn("");
493
494 *size = len;
495 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
496 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
497 if (!pages) {
498 gk20a_err(d, "memory allocation failed\n");
499 goto err_out;
500 }
501
502 err = gk20a_get_sgtable_from_pages(d, sgt, pages,
503 iova, len);
504 if (err) {
505 gk20a_err(d, "sgt allocation failed\n");
506 goto err_free;
507 }
508
509 *handle = (void *)pages;
510
511 return 0;
512
513err_free:
514 dma_free_attrs(d, len, pages, iova, &attrs);
515 pages = NULL;
516 iova = 0;
517err_out:
518 return -ENOMEM;
519}
520
521static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
522 struct sg_table *sgt, u32 order,
523 size_t size)
524{
525 struct device *d = dev_from_vm(vm);
526 u64 iova;
527 DEFINE_DMA_ATTRS(attrs);
528 struct page **pages = (struct page **)handle;
529
530 gk20a_dbg_fn("");
531 BUG_ON(sgt == NULL);
532
533 iova = sg_dma_address(sgt->sgl);
534
535 gk20a_free_sgtable(&sgt);
536
537 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
538 dma_free_attrs(d, size, pages, iova, &attrs);
539 pages = NULL;
540 iova = 0;
541}
542
543static int map_gmmu_pages(void *handle, struct sg_table *sgt,
544 void **kva, size_t size)
545{
546 int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
547 struct page **pages = (struct page **)handle;
548 gk20a_dbg_fn("");
549
550 *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
551 if (!(*kva))
552 return -ENOMEM;
553
554 return 0;
555}
556
557static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
558{
559 gk20a_dbg_fn("");
560 vunmap(va);
561}
562#endif
563
564/* allocate a phys contig region big enough for a full
565 * sized gmmu page table for the given gmmu_page_size.
566 * the whole range is zeroed so it's "invalid"/will fault
567 */
568
569static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
570 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
571 struct page_table_gk20a *pte)
572{
573 int err;
574 u32 pte_order;
575 void *handle = NULL;
576 struct sg_table *sgt;
577 size_t size;
578
579 gk20a_dbg_fn("");
580
581 /* allocate enough pages for the table */
582 pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
583
584 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
585 if (err)
586 return err;
587
588 gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
589 pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
590
591 pte->ref = handle;
592 pte->sgt = sgt;
593 pte->size = size;
594
595 return 0;
596}
597
598/* given address range (inclusive) determine the pdes crossed */
599static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
600 u64 addr_lo, u64 addr_hi,
601 u32 *pde_lo, u32 *pde_hi)
602{
603 *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
604 *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
605 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
606 addr_lo, addr_hi, vm->mm->pde_stride_shift);
607 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
608 *pde_lo, *pde_hi);
609}
610
611static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
612{
613 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
614}
615
616static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
617 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
618{
619 u32 ret;
620 /* mask off pde part */
621 addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
622 /* shift over to get pte index. note assumption that pte index
623 * doesn't leak over into the high 32b */
624 ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
625
626 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
627 return ret;
628}
629
630static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
631 u32 *pte_offset)
632{
633 /* ptes are 8B regardless of pagesize */
634 /* pte space pages are 4KB. so 512 ptes per 4KB page*/
635 *pte_page = i >> 9;
636
637 /* this offset is a pte offset, not a byte offset */
638 *pte_offset = i & ((1<<9)-1);
639
640 gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
641 i, *pte_page, *pte_offset);
642}
643
644
645/*
646 * given a pde index/page table number make sure it has
647 * backing store and if not go ahead allocate it and
648 * record it in the appropriate pde
649 */
650static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
651 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
652{
653 int err;
654 struct page_table_gk20a *pte =
655 vm->pdes.ptes[gmmu_pgsz_idx] + i;
656
657 gk20a_dbg_fn("");
658
659 /* if it's already in place it's valid */
660 if (pte->ref)
661 return 0;
662
663 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
664 gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
665
666 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
667 if (err)
668 return err;
669
670 /* rewrite pde */
671 update_gmmu_pde_locked(vm, i);
672
673 return 0;
674}
675
676static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
677 u64 addr)
678{
679 struct vm_reserved_va_node *va_node;
680 list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
681 if (addr >= va_node->vaddr_start &&
682 addr < (u64)va_node->vaddr_start + (u64)va_node->size)
683 return va_node;
684
685 return NULL;
686}
687
688int gk20a_vm_get_buffers(struct vm_gk20a *vm,
689 struct mapped_buffer_node ***mapped_buffers,
690 int *num_buffers)
691{
692 struct mapped_buffer_node *mapped_buffer;
693 struct mapped_buffer_node **buffer_list;
694 struct rb_node *node;
695 int i = 0;
696
697 mutex_lock(&vm->update_gmmu_lock);
698
699 buffer_list = kzalloc(sizeof(*buffer_list) *
700 vm->num_user_mapped_buffers, GFP_KERNEL);
701 if (!buffer_list) {
702 mutex_unlock(&vm->update_gmmu_lock);
703 return -ENOMEM;
704 }
705
706 node = rb_first(&vm->mapped_buffers);
707 while (node) {
708 mapped_buffer =
709 container_of(node, struct mapped_buffer_node, node);
710 if (mapped_buffer->user_mapped) {
711 buffer_list[i] = mapped_buffer;
712 kref_get(&mapped_buffer->ref);
713 i++;
714 }
715 node = rb_next(&mapped_buffer->node);
716 }
717
718 BUG_ON(i != vm->num_user_mapped_buffers);
719
720 *num_buffers = vm->num_user_mapped_buffers;
721 *mapped_buffers = buffer_list;
722
723 mutex_unlock(&vm->update_gmmu_lock);
724
725 return 0;
726}
727
728static void gk20a_vm_unmap_locked_kref(struct kref *ref)
729{
730 struct mapped_buffer_node *mapped_buffer =
731 container_of(ref, struct mapped_buffer_node, ref);
732 gk20a_vm_unmap_locked(mapped_buffer);
733}
734
735void gk20a_vm_put_buffers(struct vm_gk20a *vm,
736 struct mapped_buffer_node **mapped_buffers,
737 int num_buffers)
738{
739 int i;
740
741 mutex_lock(&vm->update_gmmu_lock);
742
743 for (i = 0; i < num_buffers; ++i)
744 kref_put(&mapped_buffers[i]->ref,
745 gk20a_vm_unmap_locked_kref);
746
747 mutex_unlock(&vm->update_gmmu_lock);
748
749 kfree(mapped_buffers);
750}
751
752static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
753{
754 struct device *d = dev_from_vm(vm);
755 int retries;
756 struct mapped_buffer_node *mapped_buffer;
757
758 mutex_lock(&vm->update_gmmu_lock);
759
760 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
761 if (!mapped_buffer) {
762 mutex_unlock(&vm->update_gmmu_lock);
763 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
764 return;
765 }
766
767 if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
768 mutex_unlock(&vm->update_gmmu_lock);
769
770 retries = 1000;
771 while (retries) {
772 if (atomic_read(&mapped_buffer->ref.refcount) == 1)
773 break;
774 retries--;
775 udelay(50);
776 }
777 if (!retries)
778 gk20a_err(d, "sync-unmap failed on 0x%llx",
779 offset);
780 mutex_lock(&vm->update_gmmu_lock);
781 }
782
783 mapped_buffer->user_mapped--;
784 if (mapped_buffer->user_mapped == 0)
785 vm->num_user_mapped_buffers--;
786 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
787
788 mutex_unlock(&vm->update_gmmu_lock);
789}
790
791static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
792 u64 size,
793 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
794
795{
796 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
797 int err;
798 u64 offset;
799 u32 start_page_nr = 0, num_pages;
800 u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
801
802 if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
803 dev_warn(dev_from_vm(vm),
804 "invalid page size requested in gk20a vm alloc");
805 return -EINVAL;
806 }
807
808 if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
809 dev_warn(dev_from_vm(vm),
810 "unsupportd page size requested");
811 return -EINVAL;
812
813 }
814
815 /* be certain we round up to gmmu_page_size if needed */
816 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
817 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
818
819 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
820 gmmu_page_sizes[gmmu_pgsz_idx]>>10);
821
822 /* The vma allocator represents page accounting. */
823 num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
824
825 err = vma->alloc(vma, &start_page_nr, num_pages);
826
827 if (err) {
828 gk20a_err(dev_from_vm(vm),
829 "%s oom: sz=0x%llx", vma->name, size);
830 return 0;
831 }
832
833 offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
834 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
835
836 return offset;
837}
838
839static int gk20a_vm_free_va(struct vm_gk20a *vm,
840 u64 offset, u64 size,
841 enum gmmu_pgsz_gk20a pgsz_idx)
842{
843 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
844 u32 page_size = gmmu_page_sizes[pgsz_idx];
845 u32 page_shift = gmmu_page_shifts[pgsz_idx];
846 u32 start_page_nr, num_pages;
847 int err;
848
849 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
850 vma->name, offset, size);
851
852 start_page_nr = (u32)(offset >> page_shift);
853 num_pages = (u32)((size + page_size - 1) >> page_shift);
854
855 err = vma->free(vma, start_page_nr, num_pages);
856 if (err) {
857 gk20a_err(dev_from_vm(vm),
858 "not found: offset=0x%llx, sz=0x%llx",
859 offset, size);
860 }
861
862 return err;
863}
864
865static int insert_mapped_buffer(struct rb_root *root,
866 struct mapped_buffer_node *mapped_buffer)
867{
868 struct rb_node **new_node = &(root->rb_node), *parent = NULL;
869
870 /* Figure out where to put new node */
871 while (*new_node) {
872 struct mapped_buffer_node *cmp_with =
873 container_of(*new_node, struct mapped_buffer_node,
874 node);
875
876 parent = *new_node;
877
878 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
879 new_node = &((*new_node)->rb_left);
880 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
881 new_node = &((*new_node)->rb_right);
882 else
883 return -EINVAL; /* no fair dup'ing */
884 }
885
886 /* Add new node and rebalance tree. */
887 rb_link_node(&mapped_buffer->node, parent, new_node);
888 rb_insert_color(&mapped_buffer->node, root);
889
890 return 0;
891}
892
893static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
894 struct rb_root *root, struct dma_buf *dmabuf,
895 u32 kind)
896{
897 struct rb_node *node = rb_first(root);
898 while (node) {
899 struct mapped_buffer_node *mapped_buffer =
900 container_of(node, struct mapped_buffer_node, node);
901 if (mapped_buffer->dmabuf == dmabuf &&
902 kind == mapped_buffer->kind)
903 return mapped_buffer;
904 node = rb_next(&mapped_buffer->node);
905 }
906 return 0;
907}
908
909static struct mapped_buffer_node *find_mapped_buffer_locked(
910 struct rb_root *root, u64 addr)
911{
912
913 struct rb_node *node = root->rb_node;
914 while (node) {
915 struct mapped_buffer_node *mapped_buffer =
916 container_of(node, struct mapped_buffer_node, node);
917 if (mapped_buffer->addr > addr) /* u64 cmp */
918 node = node->rb_left;
919 else if (mapped_buffer->addr != addr) /* u64 cmp */
920 node = node->rb_right;
921 else
922 return mapped_buffer;
923 }
924 return 0;
925}
926
927static struct mapped_buffer_node *find_mapped_buffer_range_locked(
928 struct rb_root *root, u64 addr)
929{
930 struct rb_node *node = root->rb_node;
931 while (node) {
932 struct mapped_buffer_node *m =
933 container_of(node, struct mapped_buffer_node, node);
934 if (m->addr <= addr && m->addr + m->size > addr)
935 return m;
936 else if (m->addr > addr) /* u64 cmp */
937 node = node->rb_left;
938 else
939 node = node->rb_right;
940 }
941 return 0;
942}
943
944#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
945
946struct buffer_attrs {
947 struct sg_table *sgt;
948 u64 size;
949 u64 align;
950 u32 ctag_offset;
951 u32 ctag_lines;
952 int pgsz_idx;
953 u8 kind_v;
954 u8 uc_kind_v;
955};
956
957static void gmmu_select_page_size(struct buffer_attrs *bfr)
958{
959 int i;
960 /* choose the biggest first (top->bottom) */
961 for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
962 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
963 /* would like to add this too but nvmap returns the
964 * original requested size not the allocated size.
965 * (!(gmmu_page_offset_masks[i] & bfr->size)) */
966 bfr->pgsz_idx = i;
967 break;
968 }
969}
970
971static int setup_buffer_kind_and_compression(struct device *d,
972 u32 flags,
973 struct buffer_attrs *bfr,
974 enum gmmu_pgsz_gk20a pgsz_idx)
975{
976 bool kind_compressible;
977
978 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
979 bfr->kind_v = gmmu_pte_kind_pitch_v();
980
981 if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
982 gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
983 return -EINVAL;
984 }
985
986 bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
987 /* find a suitable uncompressed kind if it becomes necessary later */
988 kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
989 if (kind_compressible) {
990 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
991 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
992 /* shouldn't happen, but it is worth cross-checking */
993 gk20a_err(d, "comptag kind 0x%x can't be"
994 " downgraded to uncompressed kind",
995 bfr->kind_v);
996 return -EINVAL;
997 }
998 }
999 /* comptags only supported for suitable kinds, 128KB pagesize */
1000 if (unlikely(kind_compressible &&
1001 (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1002 /*
1003 gk20a_warn(d, "comptags specified"
1004 " but pagesize being used doesn't support it");*/
1005 /* it is safe to fall back to uncompressed as
1006 functionality is not harmed */
1007 bfr->kind_v = bfr->uc_kind_v;
1008 kind_compressible = false;
1009 }
1010 if (kind_compressible)
1011 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
1012 COMP_TAG_LINE_SIZE_SHIFT;
1013 else
1014 bfr->ctag_lines = 0;
1015
1016 return 0;
1017}
1018
1019static int validate_fixed_buffer(struct vm_gk20a *vm,
1020 struct buffer_attrs *bfr,
1021 u64 map_offset)
1022{
1023 struct device *dev = dev_from_vm(vm);
1024 struct vm_reserved_va_node *va_node;
1025 struct mapped_buffer_node *buffer;
1026
1027 if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
1028 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1029 map_offset);
1030 return -EINVAL;
1031 }
1032
1033 /* find the space reservation */
1034 va_node = addr_to_reservation(vm, map_offset);
1035 if (!va_node) {
1036 gk20a_warn(dev, "fixed offset mapping without space allocation");
1037 return -EINVAL;
1038 }
1039
1040 /* check that this mappings does not collide with existing
1041 * mappings by checking the overlapping area between the current
1042 * buffer and all other mapped buffers */
1043
1044 list_for_each_entry(buffer,
1045 &va_node->va_buffers_list, va_buffers_list) {
1046 s64 begin = max(buffer->addr, map_offset);
1047 s64 end = min(buffer->addr +
1048 buffer->size, map_offset + bfr->size);
1049 if (end - begin > 0) {
1050 gk20a_warn(dev, "overlapping buffer map requested");
1051 return -EINVAL;
1052 }
1053 }
1054
1055 return 0;
1056}
1057
1058static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1059 u64 map_offset,
1060 struct sg_table *sgt,
1061 u64 size,
1062 int pgsz_idx,
1063 u8 kind_v,
1064 u32 ctag_offset,
1065 u32 flags,
1066 int rw_flag)
1067{
1068 int err = 0, i = 0;
1069 u32 pde_lo, pde_hi;
1070 struct device *d = dev_from_vm(vm);
1071
1072 /* Allocate (or validate when map_offset != 0) the virtual address. */
1073 if (!map_offset) {
1074 map_offset = gk20a_vm_alloc_va(vm, size,
1075 pgsz_idx);
1076 if (!map_offset) {
1077 gk20a_err(d, "failed to allocate va space");
1078 err = -ENOMEM;
1079 goto fail;
1080 }
1081 }
1082
1083 pde_range_from_vaddr_range(vm,
1084 map_offset,
1085 map_offset + size - 1,
1086 &pde_lo, &pde_hi);
1087
1088 /* mark the addr range valid (but with 0 phys addr, which will fault) */
1089 for (i = pde_lo; i <= pde_hi; i++) {
1090 err = validate_gmmu_page_table_gk20a_locked(vm, i,
1091 pgsz_idx);
1092 if (err) {
1093 gk20a_err(d, "failed to validate page table %d: %d",
1094 i, err);
1095 goto fail;
1096 }
1097 }
1098
1099 err = update_gmmu_ptes_locked(vm, pgsz_idx,
1100 sgt,
1101 map_offset, map_offset + size - 1,
1102 kind_v,
1103 ctag_offset,
1104 flags &
1105 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1106 rw_flag);
1107 if (err) {
1108 gk20a_err(d, "failed to update ptes on map");
1109 goto fail;
1110 }
1111
1112 return map_offset;
1113 fail:
1114 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
1115 return 0;
1116}
1117
1118static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1119 u64 vaddr,
1120 u64 size,
1121 int pgsz_idx,
1122 bool va_allocated,
1123 int rw_flag)
1124{
1125 int err = 0;
1126 struct gk20a *g = gk20a_from_vm(vm);
1127
1128 if (va_allocated) {
1129 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1130 if (err) {
1131 dev_err(dev_from_vm(vm),
1132 "failed to free va");
1133 return;
1134 }
1135 }
1136
1137 /* unmap here needs to know the page size we assigned at mapping */
1138 err = update_gmmu_ptes_locked(vm,
1139 pgsz_idx,
1140 0, /* n/a for unmap */
1141 vaddr,
1142 vaddr + size - 1,
1143 0, 0, false /* n/a for unmap */,
1144 rw_flag);
1145 if (err)
1146 dev_err(dev_from_vm(vm),
1147 "failed to update gmmu ptes on unmap");
1148
1149 /* detect which if any pdes/ptes can now be released */
1150
1151 /* flush l2 so any dirty lines are written out *now*.
1152 * also as we could potentially be switching this buffer
1153 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1154 * some point in the future we need to invalidate l2. e.g. switching
1155 * from a render buffer unmap (here) to later using the same memory
1156 * for gmmu ptes. note the positioning of this relative to any smmu
1157 * unmapping (below). */
1158
1159 gk20a_mm_l2_flush(g, true);
1160}
1161
1162static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1163 struct dma_buf *dmabuf,
1164 u64 offset_align,
1165 u32 flags,
1166 int kind,
1167 struct sg_table **sgt,
1168 bool user_mapped,
1169 int rw_flag)
1170{
1171 struct mapped_buffer_node *mapped_buffer = 0;
1172
1173 mapped_buffer =
1174 find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
1175 dmabuf, kind);
1176 if (!mapped_buffer)
1177 return 0;
1178
1179 if (mapped_buffer->flags != flags)
1180 return 0;
1181
1182 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1183 mapped_buffer->addr != offset_align)
1184 return 0;
1185
1186 BUG_ON(mapped_buffer->vm != vm);
1187
1188 /* mark the buffer as used */
1189 if (user_mapped) {
1190 if (mapped_buffer->user_mapped == 0)
1191 vm->num_user_mapped_buffers++;
1192 mapped_buffer->user_mapped++;
1193
1194 /* If the mapping comes from user space, we own
1195 * the handle ref. Since we reuse an
1196 * existing mapping here, we need to give back those
1197 * refs once in order not to leak.
1198 */
1199 if (mapped_buffer->own_mem_ref)
1200 dma_buf_put(mapped_buffer->dmabuf);
1201 else
1202 mapped_buffer->own_mem_ref = true;
1203 }
1204 kref_get(&mapped_buffer->ref);
1205
1206 gk20a_dbg(gpu_dbg_map,
1207 "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1208 "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1209 "own_mem_ref=%d user_mapped=%d",
1210 vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1211 mapped_buffer->flags,
1212 mapped_buffer->ctag_lines,
1213 mapped_buffer->ctag_offset,
1214 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1215 hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1216 lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1217 hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1218 lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1219 mapped_buffer->own_mem_ref, user_mapped);
1220
1221 if (sgt)
1222 *sgt = mapped_buffer->sgt;
1223 return mapped_buffer->addr;
1224}
1225
1226u64 gk20a_vm_map(struct vm_gk20a *vm,
1227 struct dma_buf *dmabuf,
1228 u64 offset_align,
1229 u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1230 int kind,
1231 struct sg_table **sgt,
1232 bool user_mapped,
1233 int rw_flag)
1234{
1235 struct gk20a *g = gk20a_from_vm(vm);
1236 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
1237 struct device *d = dev_from_vm(vm);
1238 struct mapped_buffer_node *mapped_buffer = 0;
1239 bool inserted = false, va_allocated = false;
1240 u32 gmmu_page_size = 0;
1241 u64 map_offset = 0;
1242 int err = 0;
1243 struct buffer_attrs bfr = {0};
1244 struct gk20a_comptags comptags;
1245
1246 mutex_lock(&vm->update_gmmu_lock);
1247
1248 /* check if this buffer is already mapped */
1249 map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
1250 flags, kind, sgt,
1251 user_mapped, rw_flag);
1252 if (map_offset) {
1253 mutex_unlock(&vm->update_gmmu_lock);
1254 return map_offset;
1255 }
1256
1257 /* pin buffer to get phys/iovmm addr */
1258 bfr.sgt = gk20a_mm_pin(d, dmabuf);
1259 if (IS_ERR(bfr.sgt)) {
1260 /* Falling back to physical is actually possible
1261 * here in many cases if we use 4K phys pages in the
1262 * gmmu. However we have some regions which require
1263 * contig regions to work properly (either phys-contig
1264 * or contig through smmu io_vaspace). Until we can
1265 * track the difference between those two cases we have
1266 * to fail the mapping when we run out of SMMU space.
1267 */
1268 gk20a_warn(d, "oom allocating tracking buffer");
1269 goto clean_up;
1270 }
1271
1272 if (sgt)
1273 *sgt = bfr.sgt;
1274
1275 bfr.kind_v = kind;
1276 bfr.size = dmabuf->size;
1277 bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl));
1278 bfr.pgsz_idx = -1;
1279
1280 /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1281 * page size according to memory alignment */
1282 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1283 bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
1284 gmmu_page_size_big : gmmu_page_size_small;
1285 } else {
1286 gmmu_select_page_size(&bfr);
1287 }
1288
1289 /* validate/adjust bfr attributes */
1290 if (unlikely(bfr.pgsz_idx == -1)) {
1291 gk20a_err(d, "unsupported page size detected");
1292 goto clean_up;
1293 }
1294
1295 if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1296 bfr.pgsz_idx > gmmu_page_size_big)) {
1297 BUG_ON(1);
1298 err = -EINVAL;
1299 goto clean_up;
1300 }
1301 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1302
1303 /* Check if we should use a fixed offset for mapping this buffer */
1304 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1305 err = validate_fixed_buffer(vm, &bfr, offset_align);
1306 if (err)
1307 goto clean_up;
1308
1309 map_offset = offset_align;
1310 va_allocated = false;
1311 } else
1312 va_allocated = true;
1313
1314 if (sgt)
1315 *sgt = bfr.sgt;
1316
1317 err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1318 if (unlikely(err)) {
1319 gk20a_err(d, "failure setting up kind and compression");
1320 goto clean_up;
1321 }
1322
1323 /* bar1 and pmu vm don't need ctag */
1324 if (!vm->enable_ctag)
1325 bfr.ctag_lines = 0;
1326
1327 gk20a_get_comptags(d, dmabuf, &comptags);
1328
1329 if (bfr.ctag_lines && !comptags.lines) {
1330 /* allocate compression resources if needed */
1331 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
1332 bfr.ctag_lines);
1333 if (err) {
1334 /* ok to fall back here if we ran out */
1335 /* TBD: we can partially alloc ctags as well... */
1336 bfr.ctag_lines = bfr.ctag_offset = 0;
1337 bfr.kind_v = bfr.uc_kind_v;
1338 } else {
1339 gk20a_get_comptags(d, dmabuf, &comptags);
1340
1341 /* init/clear the ctag buffer */
1342 g->ops.ltc.clear_comptags(g,
1343 comptags.offset,
1344 comptags.offset + comptags.lines - 1);
1345 }
1346 }
1347
1348 /* store the comptag info */
1349 bfr.ctag_offset = comptags.offset;
1350
1351 /* update gmmu ptes */
1352 map_offset = __locked_gmmu_map(vm, map_offset,
1353 bfr.sgt,
1354 bfr.size,
1355 bfr.pgsz_idx,
1356 bfr.kind_v,
1357 bfr.ctag_offset,
1358 flags, rw_flag);
1359 if (!map_offset)
1360 goto clean_up;
1361
1362 gk20a_dbg(gpu_dbg_map,
1363 "as=%d pgsz=%d "
1364 "kind=0x%x kind_uc=0x%x flags=0x%x "
1365 "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1366 vm_aspace_id(vm), gmmu_page_size,
1367 bfr.kind_v, bfr.uc_kind_v, flags,
1368 bfr.ctag_lines, bfr.ctag_offset,
1369 hi32(map_offset), lo32(map_offset),
1370 hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1371 lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1372 hi32((u64)sg_phys(bfr.sgt->sgl)),
1373 lo32((u64)sg_phys(bfr.sgt->sgl)));
1374
1375#if defined(NVHOST_DEBUG)
1376 {
1377 int i;
1378 struct scatterlist *sg = NULL;
1379 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1380 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1381 u64 da = sg_dma_address(sg);
1382 u64 pa = sg_phys(sg);
1383 u64 len = sg->length;
1384 gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1385 i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1386 hi32(len), lo32(len));
1387 }
1388 }
1389#endif
1390
1391 /* keep track of the buffer for unmapping */
1392 /* TBD: check for multiple mapping of same buffer */
1393 mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1394 if (!mapped_buffer) {
1395 gk20a_warn(d, "oom allocating tracking buffer");
1396 goto clean_up;
1397 }
1398 mapped_buffer->dmabuf = dmabuf;
1399 mapped_buffer->sgt = bfr.sgt;
1400 mapped_buffer->addr = map_offset;
1401 mapped_buffer->size = bfr.size;
1402 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
1403 mapped_buffer->ctag_offset = bfr.ctag_offset;
1404 mapped_buffer->ctag_lines = bfr.ctag_lines;
1405 mapped_buffer->vm = vm;
1406 mapped_buffer->flags = flags;
1407 mapped_buffer->kind = kind;
1408 mapped_buffer->va_allocated = va_allocated;
1409 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1410 mapped_buffer->own_mem_ref = user_mapped;
1411 INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1412 INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1413 kref_init(&mapped_buffer->ref);
1414
1415 err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1416 if (err) {
1417 gk20a_err(d, "failed to insert into mapped buffer tree");
1418 goto clean_up;
1419 }
1420 inserted = true;
1421 if (user_mapped)
1422 vm->num_user_mapped_buffers++;
1423
1424 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1425
1426 if (!va_allocated) {
1427 struct vm_reserved_va_node *va_node;
1428
1429 /* find the space reservation */
1430 va_node = addr_to_reservation(vm, map_offset);
1431 list_add_tail(&mapped_buffer->va_buffers_list,
1432 &va_node->va_buffers_list);
1433 mapped_buffer->va_node = va_node;
1434 }
1435
1436 mutex_unlock(&vm->update_gmmu_lock);
1437
1438 /* Invalidate kernel mappings immediately */
1439 if (vm_aspace_id(vm) == -1)
1440 gk20a_mm_tlb_invalidate(vm);
1441
1442 return map_offset;
1443
1444clean_up:
1445 if (inserted) {
1446 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1447 if (user_mapped)
1448 vm->num_user_mapped_buffers--;
1449 }
1450 kfree(mapped_buffer);
1451 if (va_allocated)
1452 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1453 if (!IS_ERR(bfr.sgt))
1454 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
1455
1456 mutex_unlock(&vm->update_gmmu_lock);
1457 gk20a_dbg_info("err=%d\n", err);
1458 return 0;
1459}
1460
1461u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1462 struct sg_table **sgt,
1463 u64 size,
1464 u32 flags,
1465 int rw_flag)
1466{
1467 u64 vaddr;
1468
1469 mutex_lock(&vm->update_gmmu_lock);
1470 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1471 *sgt, /* sg table */
1472 size,
1473 0, /* page size index = 0 i.e. SZ_4K */
1474 0, /* kind */
1475 0, /* ctag_offset */
1476 flags, rw_flag);
1477 mutex_unlock(&vm->update_gmmu_lock);
1478 if (!vaddr) {
1479 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
1480 return 0;
1481 }
1482
1483 /* Invalidate kernel mappings immediately */
1484 gk20a_mm_tlb_invalidate(vm);
1485
1486 return vaddr;
1487}
1488
1489void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1490 u64 vaddr,
1491 u64 size,
1492 int rw_flag)
1493{
1494 mutex_lock(&vm->update_gmmu_lock);
1495 __locked_gmmu_unmap(vm,
1496 vaddr,
1497 size,
1498 0, /* page size 4K */
1499 true, /*va_allocated */
1500 rw_flag);
1501 mutex_unlock(&vm->update_gmmu_lock);
1502}
1503
1504phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1505 u64 dma_addr)
1506{
1507 phys_addr_t phys;
1508 u64 iova;
1509
1510 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1511 if (!mapping)
1512 return dma_addr;
1513
1514 iova = dma_addr & PAGE_MASK;
1515 phys = iommu_iova_to_phys(mapping->domain, iova);
1516 return phys;
1517}
1518
1519/* get sg_table from already allocated buffer */
1520int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1521 void *cpuva, u64 iova,
1522 size_t size)
1523{
1524 int err = 0;
1525 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1526 if (!(*sgt)) {
1527 dev_err(d, "failed to allocate memory\n");
1528 err = -ENOMEM;
1529 goto fail;
1530 }
1531 err = dma_get_sgtable(d, *sgt,
1532 cpuva, iova,
1533 size);
1534 if (err) {
1535 dev_err(d, "failed to create sg table\n");
1536 goto fail;
1537 }
1538 sg_dma_address((*sgt)->sgl) = iova;
1539
1540 return 0;
1541 fail:
1542 if (*sgt) {
1543 kfree(*sgt);
1544 *sgt = NULL;
1545 }
1546 return err;
1547}
1548
1549int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
1550 struct page **pages, u64 iova,
1551 size_t size)
1552{
1553 int err = 0;
1554 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1555 if (!(*sgt)) {
1556 dev_err(d, "failed to allocate memory\n");
1557 err = -ENOMEM;
1558 goto fail;
1559 }
1560 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
1561 if (err) {
1562 dev_err(d, "failed to allocate sg_table\n");
1563 goto fail;
1564 }
1565 sg_set_page((*sgt)->sgl, *pages, size, 0);
1566 sg_dma_address((*sgt)->sgl) = iova;
1567
1568 return 0;
1569 fail:
1570 if (*sgt) {
1571 kfree(*sgt);
1572 *sgt = NULL;
1573 }
1574 return err;
1575}
1576
1577void gk20a_free_sgtable(struct sg_table **sgt)
1578{
1579 sg_free_table(*sgt);
1580 kfree(*sgt);
1581 *sgt = NULL;
1582}
1583
1584u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1585{
1586 u64 result = sg_phys(sgl);
1587#ifdef CONFIG_TEGRA_IOMMU_SMMU
1588 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1589 result = 0;
1590 else if (sg_dma_address(sgl)) {
1591 result = sg_dma_address(sgl) |
1592 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1593 }
1594#endif
1595 return result;
1596}
1597
1598static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1599 enum gmmu_pgsz_gk20a pgsz_idx,
1600 struct sg_table *sgt,
1601 u64 first_vaddr, u64 last_vaddr,
1602 u8 kind_v, u32 ctag_offset,
1603 bool cacheable,
1604 int rw_flag)
1605{
1606 int err;
1607 u32 pde_lo, pde_hi, pde_i;
1608 struct scatterlist *cur_chunk;
1609 unsigned int cur_offset;
1610 u32 pte_w[2] = {0, 0}; /* invalid pte */
1611 u32 ctag = ctag_offset;
1612 u32 ctag_incr;
1613 u32 page_size = gmmu_page_sizes[pgsz_idx];
1614 u64 addr = 0;
1615
1616 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1617 &pde_lo, &pde_hi);
1618
1619 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1620 pgsz_idx, pde_lo, pde_hi);
1621
1622 /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch
1623 * below (per-pte). Note: this doesn't work unless page size (when
1624 * comptags are active) is 128KB. We have checks elsewhere for that. */
1625 ctag_incr = !!ctag_offset;
1626
1627 if (sgt)
1628 cur_chunk = sgt->sgl;
1629 else
1630 cur_chunk = NULL;
1631
1632 cur_offset = 0;
1633
1634 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1635 u32 pte_lo, pte_hi;
1636 u32 pte_cur;
1637 void *pte_kv_cur;
1638
1639 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1640
1641 if (pde_i == pde_lo)
1642 pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1643 pgsz_idx);
1644 else
1645 pte_lo = 0;
1646
1647 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1648 pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1649 else
1650 pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1651 pgsz_idx);
1652
1653 /* get cpu access to the ptes */
1654 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
1655 pte->size);
1656 if (err) {
1657 gk20a_err(dev_from_vm(vm),
1658 "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1659 vm_aspace_id(vm), pte->ref_cnt);
1660 goto clean_up;
1661 }
1662
1663 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1664 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1665
1666 if (likely(sgt)) {
1667 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1668 if (new_addr) {
1669 addr = new_addr;
1670 addr += cur_offset;
1671 }
1672
1673 pte_w[0] = gmmu_pte_valid_true_f() |
1674 gmmu_pte_address_sys_f(addr
1675 >> gmmu_pte_address_shift_v());
1676 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1677 gmmu_pte_kind_f(kind_v) |
1678 gmmu_pte_comptagline_f(ctag);
1679
1680 if (rw_flag == gk20a_mem_flag_read_only) {
1681 pte_w[0] |= gmmu_pte_read_only_true_f();
1682 pte_w[1] |=
1683 gmmu_pte_write_disable_true_f();
1684 } else if (rw_flag ==
1685 gk20a_mem_flag_write_only) {
1686 pte_w[1] |=
1687 gmmu_pte_read_disable_true_f();
1688 }
1689
1690 if (!cacheable)
1691 pte_w[1] |= gmmu_pte_vol_true_f();
1692
1693 pte->ref_cnt++;
1694
1695 gk20a_dbg(gpu_dbg_pte,
1696 "pte_cur=%d addr=0x%x,%08x kind=%d"
1697 " ctag=%d vol=%d refs=%d"
1698 " [0x%08x,0x%08x]",
1699 pte_cur, hi32(addr), lo32(addr),
1700 kind_v, ctag, !cacheable,
1701 pte->ref_cnt, pte_w[1], pte_w[0]);
1702
1703 ctag += ctag_incr;
1704 cur_offset += page_size;
1705 addr += page_size;
1706 while (cur_chunk &&
1707 cur_offset >= cur_chunk->length) {
1708 cur_offset -= cur_chunk->length;
1709 cur_chunk = sg_next(cur_chunk);
1710 }
1711
1712 } else {
1713 pte->ref_cnt--;
1714 gk20a_dbg(gpu_dbg_pte,
1715 "pte_cur=%d ref=%d [0x0,0x0]",
1716 pte_cur, pte->ref_cnt);
1717 }
1718
1719 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1720 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1721 }
1722
1723 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1724
1725 if (pte->ref_cnt == 0) {
1726 /* It can make sense to keep around one page table for
1727 * each flavor (empty)... in case a new map is coming
1728 * right back to alloc (and fill it in) again.
1729 * But: deferring unmapping should help with pathologic
1730 * unmap/map/unmap/map cases where we'd trigger pte
1731 * free/alloc/free/alloc.
1732 */
1733 free_gmmu_pages(vm, pte->ref, pte->sgt,
1734 vm->mm->page_table_sizing[pgsz_idx].order,
1735 pte->size);
1736 pte->ref = NULL;
1737
1738 /* rewrite pde */
1739 update_gmmu_pde_locked(vm, pde_i);
1740 }
1741
1742 }
1743
1744 smp_mb();
1745 vm->tlb_dirty = true;
1746 gk20a_dbg_fn("set tlb dirty");
1747
1748 return 0;
1749
1750clean_up:
1751 /*TBD: potentially rewrite above to pre-map everything it needs to
1752 * as that's the only way it can fail */
1753 return err;
1754
1755}
1756
1757
1758/* for gk20a the "video memory" apertures here are misnomers. */
1759static inline u32 big_valid_pde0_bits(u64 pte_addr)
1760{
1761 u32 pde0_bits =
1762 gmmu_pde_aperture_big_video_memory_f() |
1763 gmmu_pde_address_big_sys_f(
1764 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1765 return pde0_bits;
1766}
1767static inline u32 small_valid_pde1_bits(u64 pte_addr)
1768{
1769 u32 pde1_bits =
1770 gmmu_pde_aperture_small_video_memory_f() |
1771 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1772 gmmu_pde_address_small_sys_f(
1773 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1774 return pde1_bits;
1775}
1776
1777/* Given the current state of the ptes associated with a pde,
1778 determine value and write it out. There's no checking
1779 here to determine whether or not a change was actually
1780 made. So, superfluous updates will cause unnecessary
1781 pde invalidations.
1782*/
1783static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1784{
1785 bool small_valid, big_valid;
1786 u64 pte_addr[2] = {0, 0};
1787 struct page_table_gk20a *small_pte =
1788 vm->pdes.ptes[gmmu_page_size_small] + i;
1789 struct page_table_gk20a *big_pte =
1790 vm->pdes.ptes[gmmu_page_size_big] + i;
1791 u32 pde_v[2] = {0, 0};
1792 u32 *pde;
1793
1794 small_valid = small_pte && small_pte->ref;
1795 big_valid = big_pte && big_pte->ref;
1796
1797 if (small_valid)
1798 pte_addr[gmmu_page_size_small] =
1799 gk20a_mm_iova_addr(small_pte->sgt->sgl);
1800 if (big_valid)
1801 pte_addr[gmmu_page_size_big] =
1802 gk20a_mm_iova_addr(big_pte->sgt->sgl);
1803
1804 pde_v[0] = gmmu_pde_size_full_f();
1805 pde_v[0] |= big_valid ?
1806 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1807 :
1808 (gmmu_pde_aperture_big_invalid_f());
1809
1810 pde_v[1] |= (small_valid ?
1811 small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1812 :
1813 (gmmu_pde_aperture_small_invalid_f() |
1814 gmmu_pde_vol_small_false_f())
1815 )
1816 |
1817 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1818 gmmu_pde_vol_big_false_f());
1819
1820 pde = pde_from_index(vm, i);
1821
1822 gk20a_mem_wr32(pde, 0, pde_v[0]);
1823 gk20a_mem_wr32(pde, 1, pde_v[1]);
1824
1825 smp_mb();
1826
1827 FLUSH_CPU_DCACHE(pde,
1828 sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1829 sizeof(u32)*2);
1830
1831 gk20a_mm_l2_invalidate(vm->mm->g);
1832
1833 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1834
1835 vm->tlb_dirty = true;
1836}
1837
1838
1839static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1840 u32 num_pages, u32 pgsz_idx)
1841{
1842 struct mm_gk20a *mm = vm->mm;
1843 struct gk20a *g = mm->g;
1844 u32 pgsz = gmmu_page_sizes[pgsz_idx];
1845 u32 i;
1846 dma_addr_t iova;
1847
1848 /* allocate the zero page if the va does not already have one */
1849 if (!vm->zero_page_cpuva) {
1850 int err = 0;
1851 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1852 mm->big_page_size,
1853 &iova,
1854 GFP_KERNEL);
1855 if (!vm->zero_page_cpuva) {
1856 dev_err(&g->dev->dev, "failed to allocate zero page\n");
1857 return -ENOMEM;
1858 }
1859
1860 vm->zero_page_iova = iova;
1861 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1862 vm->zero_page_cpuva, vm->zero_page_iova,
1863 mm->big_page_size);
1864 if (err) {
1865 dma_free_coherent(&g->dev->dev, mm->big_page_size,
1866 vm->zero_page_cpuva,
1867 vm->zero_page_iova);
1868 vm->zero_page_iova = 0;
1869 vm->zero_page_cpuva = NULL;
1870
1871 dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1872 return -ENOMEM;
1873 }
1874 }
1875
1876 for (i = 0; i < num_pages; i++) {
1877 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1878 vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
1879 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1880 gk20a_mem_flag_none);
1881
1882 if (!page_vaddr) {
1883 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
1884 goto err_unmap;
1885 }
1886 vaddr += pgsz;
1887 }
1888
1889 gk20a_mm_l2_flush(mm->g, true);
1890
1891 return 0;
1892
1893err_unmap:
1894
1895 WARN_ON(1);
1896 /* something went wrong. unmap pages */
1897 while (i--) {
1898 vaddr -= pgsz;
1899 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1900 gk20a_mem_flag_none);
1901 }
1902
1903 return -EINVAL;
1904}
1905
1906/* NOTE! mapped_buffers lock must be held */
1907static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1908{
1909 struct vm_gk20a *vm = mapped_buffer->vm;
1910
1911 if (mapped_buffer->va_node &&
1912 mapped_buffer->va_node->sparse) {
1913 u64 vaddr = mapped_buffer->addr;
1914 u32 pgsz_idx = mapped_buffer->pgsz_idx;
1915 u32 num_pages = mapped_buffer->size >>
1916 gmmu_page_shifts[pgsz_idx];
1917
1918 /* there is little we can do if this fails... */
1919 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1920
1921 } else
1922 __locked_gmmu_unmap(vm,
1923 mapped_buffer->addr,
1924 mapped_buffer->size,
1925 mapped_buffer->pgsz_idx,
1926 mapped_buffer->va_allocated,
1927 gk20a_mem_flag_none);
1928
1929 gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
1930 vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
1931 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1932 mapped_buffer->own_mem_ref);
1933
1934 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
1935 mapped_buffer->sgt);
1936
1937 /* remove from mapped buffer tree and remove list, free */
1938 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1939 if (!list_empty(&mapped_buffer->va_buffers_list))
1940 list_del(&mapped_buffer->va_buffers_list);
1941
1942 /* keep track of mapped buffers */
1943 if (mapped_buffer->user_mapped)
1944 vm->num_user_mapped_buffers--;
1945
1946 if (mapped_buffer->own_mem_ref)
1947 dma_buf_put(mapped_buffer->dmabuf);
1948
1949 kfree(mapped_buffer);
1950
1951 return;
1952}
1953
1954void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
1955{
1956 struct device *d = dev_from_vm(vm);
1957 struct mapped_buffer_node *mapped_buffer;
1958
1959 mutex_lock(&vm->update_gmmu_lock);
1960 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
1961 if (!mapped_buffer) {
1962 mutex_unlock(&vm->update_gmmu_lock);
1963 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
1964 return;
1965 }
1966 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
1967 mutex_unlock(&vm->update_gmmu_lock);
1968}
1969
1970static void gk20a_vm_remove_support(struct vm_gk20a *vm)
1971{
1972 struct gk20a *g = vm->mm->g;
1973 struct mapped_buffer_node *mapped_buffer;
1974 struct vm_reserved_va_node *va_node, *va_node_tmp;
1975 struct rb_node *node;
1976
1977 gk20a_dbg_fn("");
1978 mutex_lock(&vm->update_gmmu_lock);
1979
1980 /* TBD: add a flag here for the unmap code to recognize teardown
1981 * and short-circuit any otherwise expensive operations. */
1982
1983 node = rb_first(&vm->mapped_buffers);
1984 while (node) {
1985 mapped_buffer =
1986 container_of(node, struct mapped_buffer_node, node);
1987 gk20a_vm_unmap_locked(mapped_buffer);
1988 node = rb_first(&vm->mapped_buffers);
1989 }
1990
1991 /* destroy remaining reserved memory areas */
1992 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
1993 reserved_va_list) {
1994 list_del(&va_node->reserved_va_list);
1995 kfree(va_node);
1996 }
1997
1998 /* TBD: unmapping all buffers above may not actually free
1999 * all vm ptes. jettison them here for certain... */
2000
2001 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2002 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
2003
2004 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2005 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2006 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2007 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2008
2009 mutex_unlock(&vm->update_gmmu_lock);
2010
2011 /* release zero page if used */
2012 if (vm->zero_page_cpuva)
2013 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
2014 vm->zero_page_cpuva, vm->zero_page_iova);
2015
2016 /* vm is not used anymore. release it. */
2017 kfree(vm);
2018}
2019
2020static void gk20a_vm_remove_support_kref(struct kref *ref)
2021{
2022 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2023 gk20a_vm_remove_support(vm);
2024}
2025
2026void gk20a_vm_get(struct vm_gk20a *vm)
2027{
2028 kref_get(&vm->ref);
2029}
2030
2031void gk20a_vm_put(struct vm_gk20a *vm)
2032{
2033 kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2034}
2035
2036/* address space interfaces for the gk20a module */
2037int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2038{
2039 struct gk20a_as *as = as_share->as;
2040 struct gk20a *g = gk20a_from_as(as);
2041 struct mm_gk20a *mm = &g->mm;
2042 struct vm_gk20a *vm;
2043 u64 vma_size;
2044 u32 num_pages, low_hole_pages;
2045 char name[32];
2046 int err;
2047
2048 gk20a_dbg_fn("");
2049
2050 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2051 if (!vm)
2052 return -ENOMEM;
2053
2054 as_share->vm = vm;
2055
2056 vm->mm = mm;
2057 vm->as_share = as_share;
2058
2059 vm->big_pages = true;
2060
2061 vm->va_start = mm->pde_stride; /* create a one pde hole */
2062 vm->va_limit = mm->channel.size; /* note this means channel.size is
2063 really just the max */
2064 {
2065 u32 pde_lo, pde_hi;
2066 pde_range_from_vaddr_range(vm,
2067 0, vm->va_limit-1,
2068 &pde_lo, &pde_hi);
2069 vm->pdes.num_pdes = pde_hi + 1;
2070 }
2071
2072 vm->pdes.ptes[gmmu_page_size_small] =
2073 kzalloc(sizeof(struct page_table_gk20a) *
2074 vm->pdes.num_pdes, GFP_KERNEL);
2075
2076 vm->pdes.ptes[gmmu_page_size_big] =
2077 kzalloc(sizeof(struct page_table_gk20a) *
2078 vm->pdes.num_pdes, GFP_KERNEL);
2079
2080 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2081 vm->pdes.ptes[gmmu_page_size_big]))
2082 return -ENOMEM;
2083
2084 gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
2085 vm->va_limit, vm->pdes.num_pdes);
2086
2087 /* allocate the page table directory */
2088 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2089 &vm->pdes.sgt, &vm->pdes.size);
2090 if (err)
2091 return -ENOMEM;
2092
2093 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2094 vm->pdes.size);
2095 if (err) {
2096 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2097 vm->pdes.size);
2098 return -ENOMEM;
2099 }
2100 gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
2101 vm->pdes.kv,
2102 gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2103 /* we could release vm->pdes.kv but it's only one page... */
2104
2105
2106 /* low-half: alloc small pages */
2107 /* high-half: alloc big pages */
2108 vma_size = mm->channel.size >> 1;
2109
2110 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2111 gmmu_page_sizes[gmmu_page_size_small]>>10);
2112 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2113
2114 /* num_pages above is without regard to the low-side hole. */
2115 low_hole_pages = (vm->va_start >>
2116 gmmu_page_shifts[gmmu_page_size_small]);
2117
2118 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
2119 low_hole_pages, /* start */
2120 num_pages - low_hole_pages, /* length */
2121 1); /* align */
2122
2123 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2124 gmmu_page_sizes[gmmu_page_size_big]>>10);
2125
2126 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
2127 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
2128 num_pages, /* start */
2129 num_pages, /* length */
2130 1); /* align */
2131
2132 vm->mapped_buffers = RB_ROOT;
2133
2134 mutex_init(&vm->update_gmmu_lock);
2135 kref_init(&vm->ref);
2136 INIT_LIST_HEAD(&vm->reserved_va_list);
2137
2138 vm->enable_ctag = true;
2139
2140 return 0;
2141}
2142
2143
2144int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2145{
2146 struct vm_gk20a *vm = as_share->vm;
2147
2148 gk20a_dbg_fn("");
2149
2150 vm->as_share = NULL;
2151
2152 /* put as reference to vm */
2153 gk20a_vm_put(vm);
2154
2155 as_share->vm = NULL;
2156
2157 return 0;
2158}
2159
2160
2161int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2162 struct nvhost_as_alloc_space_args *args)
2163
2164{ int err = -ENOMEM;
2165 int pgsz_idx;
2166 u32 start_page_nr;
2167 struct gk20a_allocator *vma;
2168 struct vm_gk20a *vm = as_share->vm;
2169 struct vm_reserved_va_node *va_node;
2170 u64 vaddr_start = 0;
2171
2172 gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2173 args->flags, args->page_size, args->pages,
2174 args->o_a.offset);
2175
2176 /* determine pagesz idx */
2177 for (pgsz_idx = gmmu_page_size_small;
2178 pgsz_idx < gmmu_nr_page_sizes;
2179 pgsz_idx++) {
2180 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2181 break;
2182 }
2183
2184 if (pgsz_idx >= gmmu_nr_page_sizes) {
2185 err = -EINVAL;
2186 goto clean_up;
2187 }
2188
2189 va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2190 if (!va_node) {
2191 err = -ENOMEM;
2192 goto clean_up;
2193 }
2194
2195 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2196 pgsz_idx != gmmu_page_size_big) {
2197 err = -ENOSYS;
2198 kfree(va_node);
2199 goto clean_up;
2200 }
2201
2202 start_page_nr = 0;
2203 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2204 start_page_nr = (u32)(args->o_a.offset >>
2205 gmmu_page_shifts[pgsz_idx]);
2206
2207 vma = &vm->vma[pgsz_idx];
2208 err = vma->alloc(vma, &start_page_nr, args->pages);
2209 if (err) {
2210 kfree(va_node);
2211 goto clean_up;
2212 }
2213
2214 vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2215
2216 va_node->vaddr_start = vaddr_start;
2217 va_node->size = (u64)args->page_size * (u64)args->pages;
2218 va_node->pgsz_idx = args->page_size;
2219 INIT_LIST_HEAD(&va_node->va_buffers_list);
2220 INIT_LIST_HEAD(&va_node->reserved_va_list);
2221
2222 mutex_lock(&vm->update_gmmu_lock);
2223
2224 /* mark that we need to use sparse mappings here */
2225 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2226 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2227 pgsz_idx);
2228 if (err) {
2229 mutex_unlock(&vm->update_gmmu_lock);
2230 vma->free(vma, start_page_nr, args->pages);
2231 kfree(va_node);
2232 goto clean_up;
2233 }
2234
2235 va_node->sparse = true;
2236 }
2237
2238 list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2239
2240 mutex_unlock(&vm->update_gmmu_lock);
2241
2242 args->o_a.offset = vaddr_start;
2243
2244clean_up:
2245 return err;
2246}
2247
2248int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2249 struct nvhost_as_free_space_args *args)
2250{
2251 int err = -ENOMEM;
2252 int pgsz_idx;
2253 u32 start_page_nr;
2254 struct gk20a_allocator *vma;
2255 struct vm_gk20a *vm = as_share->vm;
2256 struct vm_reserved_va_node *va_node;
2257
2258 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2259 args->pages, args->offset);
2260
2261 /* determine pagesz idx */
2262 for (pgsz_idx = gmmu_page_size_small;
2263 pgsz_idx < gmmu_nr_page_sizes;
2264 pgsz_idx++) {
2265 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2266 break;
2267 }
2268
2269 if (pgsz_idx >= gmmu_nr_page_sizes) {
2270 err = -EINVAL;
2271 goto clean_up;
2272 }
2273
2274 start_page_nr = (u32)(args->offset >>
2275 gmmu_page_shifts[pgsz_idx]);
2276
2277 vma = &vm->vma[pgsz_idx];
2278 err = vma->free(vma, start_page_nr, args->pages);
2279
2280 if (err)
2281 goto clean_up;
2282
2283 mutex_lock(&vm->update_gmmu_lock);
2284 va_node = addr_to_reservation(vm, args->offset);
2285 if (va_node) {
2286 struct mapped_buffer_node *buffer;
2287
2288 /* there is no need to unallocate the buffers in va. Just
2289 * convert them into normal buffers */
2290
2291 list_for_each_entry(buffer,
2292 &va_node->va_buffers_list, va_buffers_list)
2293 list_del_init(&buffer->va_buffers_list);
2294
2295 list_del(&va_node->reserved_va_list);
2296
2297 /* if this was a sparse mapping, free the va */
2298 if (va_node->sparse)
2299 __locked_gmmu_unmap(vm,
2300 va_node->vaddr_start,
2301 va_node->size,
2302 va_node->pgsz_idx,
2303 false,
2304 gk20a_mem_flag_none);
2305 kfree(va_node);
2306 }
2307 mutex_unlock(&vm->update_gmmu_lock);
2308
2309clean_up:
2310 return err;
2311}
2312
2313int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
2314 struct channel_gk20a *ch)
2315{
2316 int err = 0;
2317 struct vm_gk20a *vm = as_share->vm;
2318
2319 gk20a_dbg_fn("");
2320
2321 ch->vm = vm;
2322 err = channel_gk20a_commit_va(ch);
2323 if (err)
2324 ch->vm = 0;
2325
2326 return err;
2327}
2328
2329int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
2330{
2331 struct gk20a_dmabuf_priv *priv;
2332 static DEFINE_MUTEX(priv_lock);
2333
2334 priv = dma_buf_get_drvdata(dmabuf, dev);
2335 if (likely(priv))
2336 return 0;
2337
2338 mutex_lock(&priv_lock);
2339 priv = dma_buf_get_drvdata(dmabuf, dev);
2340 if (priv)
2341 goto priv_exist_or_err;
2342 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2343 if (!priv) {
2344 priv = ERR_PTR(-ENOMEM);
2345 goto priv_exist_or_err;
2346 }
2347 mutex_init(&priv->lock);
2348 dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
2349priv_exist_or_err:
2350 mutex_unlock(&priv_lock);
2351 if (IS_ERR(priv))
2352 return -ENOMEM;
2353
2354 return 0;
2355}
2356
2357
2358static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
2359{
2360 int kind = 0;
2361#ifdef CONFIG_TEGRA_NVMAP
2362 int err;
2363 u64 nvmap_param;
2364
2365 err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
2366 &nvmap_param);
2367 kind = err ? kind : nvmap_param;
2368#endif
2369 return kind;
2370}
2371
2372int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2373 int dmabuf_fd,
2374 u64 *offset_align,
2375 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2376 int kind)
2377{
2378 int err = 0;
2379 struct vm_gk20a *vm = as_share->vm;
2380 struct dma_buf *dmabuf;
2381 u64 ret_va;
2382
2383 gk20a_dbg_fn("");
2384
2385 /* get ref to the mem handle (released on unmap_locked) */
2386 dmabuf = dma_buf_get(dmabuf_fd);
2387 if (!dmabuf)
2388 return 0;
2389
2390 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
2391 if (err) {
2392 dma_buf_put(dmabuf);
2393 return err;
2394 }
2395
2396 if (kind == -1)
2397 kind = gk20a_dmabuf_get_kind(dmabuf);
2398
2399 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2400 flags, kind, NULL, true,
2401 gk20a_mem_flag_none);
2402 *offset_align = ret_va;
2403 if (!ret_va) {
2404 dma_buf_put(dmabuf);
2405 err = -EINVAL;
2406 }
2407
2408 return err;
2409}
2410
2411int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2412{
2413 struct vm_gk20a *vm = as_share->vm;
2414
2415 gk20a_dbg_fn("");
2416
2417 gk20a_vm_unmap_user(vm, offset);
2418 return 0;
2419}
2420
2421int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2422{
2423 int err;
2424 phys_addr_t inst_pa;
2425 void *inst_ptr;
2426 struct vm_gk20a *vm = &mm->bar1.vm;
2427 struct gk20a *g = gk20a_from_mm(mm);
2428 struct device *d = dev_from_gk20a(g);
2429 struct inst_desc *inst_block = &mm->bar1.inst_block;
2430 u64 pde_addr;
2431 u32 pde_addr_lo;
2432 u32 pde_addr_hi;
2433 dma_addr_t iova;
2434
2435 vm->mm = mm;
2436
2437 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2438
2439 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2440
2441 vm->va_start = mm->pde_stride * 1;
2442 vm->va_limit = mm->bar1.aperture_size;
2443
2444 {
2445 u32 pde_lo, pde_hi;
2446 pde_range_from_vaddr_range(vm,
2447 0, vm->va_limit-1,
2448 &pde_lo, &pde_hi);
2449 vm->pdes.num_pdes = pde_hi + 1;
2450 }
2451
2452 /* bar1 is likely only to ever use/need small page sizes. */
2453 /* But just in case, for now... arrange for both.*/
2454 vm->pdes.ptes[gmmu_page_size_small] =
2455 kzalloc(sizeof(struct page_table_gk20a) *
2456 vm->pdes.num_pdes, GFP_KERNEL);
2457
2458 vm->pdes.ptes[gmmu_page_size_big] =
2459 kzalloc(sizeof(struct page_table_gk20a) *
2460 vm->pdes.num_pdes, GFP_KERNEL);
2461
2462 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2463 vm->pdes.ptes[gmmu_page_size_big]))
2464 return -ENOMEM;
2465
2466 gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2467 vm->va_limit, vm->pdes.num_pdes);
2468
2469
2470 /* allocate the page table directory */
2471 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2472 &vm->pdes.sgt, &vm->pdes.size);
2473 if (err)
2474 goto clean_up;
2475
2476 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2477 vm->pdes.size);
2478 if (err) {
2479 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2480 vm->pdes.size);
2481 goto clean_up;
2482 }
2483 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2484 vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2485 /* we could release vm->pdes.kv but it's only one page... */
2486
2487 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2488 pde_addr_lo = u64_lo32(pde_addr >> 12);
2489 pde_addr_hi = u64_hi32(pde_addr);
2490
2491 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2492 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2493 pde_addr_lo, pde_addr_hi);
2494
2495 /* allocate instance mem for bar1 */
2496 inst_block->size = ram_in_alloc_size_v();
2497 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2498 &iova, GFP_KERNEL);
2499 if (!inst_block->cpuva) {
2500 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2501 err = -ENOMEM;
2502 goto clean_up;
2503 }
2504
2505 inst_block->iova = iova;
2506 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2507 if (!inst_block->cpu_pa) {
2508 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2509 err = -ENOMEM;
2510 goto clean_up;
2511 }
2512
2513 inst_pa = inst_block->cpu_pa;
2514 inst_ptr = inst_block->cpuva;
2515
2516 gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2517 (u64)inst_pa, inst_ptr);
2518
2519 memset(inst_ptr, 0, ram_fc_size_val_v());
2520
2521 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2522 ram_in_page_dir_base_target_vid_mem_f() |
2523 ram_in_page_dir_base_vol_true_f() |
2524 ram_in_page_dir_base_lo_f(pde_addr_lo));
2525
2526 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2527 ram_in_page_dir_base_hi_f(pde_addr_hi));
2528
2529 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2530 u64_lo32(vm->va_limit) | 0xFFF);
2531
2532 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2533 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2534
2535 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2536 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2537 1,/*start*/
2538 (vm->va_limit >> 12) - 1 /* length*/,
2539 1); /* align */
2540 /* initialize just in case we try to use it anyway */
2541 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2542 0x0badc0de, /* start */
2543 1, /* length */
2544 1); /* align */
2545
2546 vm->mapped_buffers = RB_ROOT;
2547
2548 mutex_init(&vm->update_gmmu_lock);
2549 kref_init(&vm->ref);
2550 INIT_LIST_HEAD(&vm->reserved_va_list);
2551
2552 return 0;
2553
2554clean_up:
2555 /* free, etc */
2556 if (inst_block->cpuva)
2557 dma_free_coherent(d, inst_block->size,
2558 inst_block->cpuva, inst_block->iova);
2559 inst_block->cpuva = NULL;
2560 inst_block->iova = 0;
2561 return err;
2562}
2563
2564/* pmu vm, share channel_vm interfaces */
2565int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2566{
2567 int err;
2568 phys_addr_t inst_pa;
2569 void *inst_ptr;
2570 struct vm_gk20a *vm = &mm->pmu.vm;
2571 struct gk20a *g = gk20a_from_mm(mm);
2572 struct device *d = dev_from_gk20a(g);
2573 struct inst_desc *inst_block = &mm->pmu.inst_block;
2574 u64 pde_addr;
2575 u32 pde_addr_lo;
2576 u32 pde_addr_hi;
2577 dma_addr_t iova;
2578
2579 vm->mm = mm;
2580
2581 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2582
2583 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2584
2585 vm->va_start = GK20A_PMU_VA_START;
2586 vm->va_limit = vm->va_start + mm->pmu.aperture_size;
2587
2588 {
2589 u32 pde_lo, pde_hi;
2590 pde_range_from_vaddr_range(vm,
2591 0, vm->va_limit-1,
2592 &pde_lo, &pde_hi);
2593 vm->pdes.num_pdes = pde_hi + 1;
2594 }
2595
2596 /* The pmu is likely only to ever use/need small page sizes. */
2597 /* But just in case, for now... arrange for both.*/
2598 vm->pdes.ptes[gmmu_page_size_small] =
2599 kzalloc(sizeof(struct page_table_gk20a) *
2600 vm->pdes.num_pdes, GFP_KERNEL);
2601
2602 vm->pdes.ptes[gmmu_page_size_big] =
2603 kzalloc(sizeof(struct page_table_gk20a) *
2604 vm->pdes.num_pdes, GFP_KERNEL);
2605
2606 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2607 vm->pdes.ptes[gmmu_page_size_big]))
2608 return -ENOMEM;
2609
2610 gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2611 vm->va_limit, vm->pdes.num_pdes);
2612
2613 /* allocate the page table directory */
2614 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2615 &vm->pdes.sgt, &vm->pdes.size);
2616 if (err)
2617 goto clean_up;
2618
2619 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2620 vm->pdes.size);
2621 if (err) {
2622 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2623 vm->pdes.size);
2624 goto clean_up;
2625 }
2626 gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2627 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2628 /* we could release vm->pdes.kv but it's only one page... */
2629
2630 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2631 pde_addr_lo = u64_lo32(pde_addr >> 12);
2632 pde_addr_hi = u64_hi32(pde_addr);
2633
2634 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2635 (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2636
2637 /* allocate instance mem for pmu */
2638 inst_block->size = GK20A_PMU_INST_SIZE;
2639 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2640 &iova, GFP_KERNEL);
2641 if (!inst_block->cpuva) {
2642 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2643 err = -ENOMEM;
2644 goto clean_up;
2645 }
2646
2647 inst_block->iova = iova;
2648 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2649 if (!inst_block->cpu_pa) {
2650 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2651 err = -ENOMEM;
2652 goto clean_up;
2653 }
2654
2655 inst_pa = inst_block->cpu_pa;
2656 inst_ptr = inst_block->cpuva;
2657
2658 gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2659
2660 memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2661
2662 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2663 ram_in_page_dir_base_target_vid_mem_f() |
2664 ram_in_page_dir_base_vol_true_f() |
2665 ram_in_page_dir_base_lo_f(pde_addr_lo));
2666
2667 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2668 ram_in_page_dir_base_hi_f(pde_addr_hi));
2669
2670 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2671 u64_lo32(vm->va_limit) | 0xFFF);
2672
2673 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2674 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2675
2676 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2677 (vm->va_start >> 12), /* start */
2678 (vm->va_limit - vm->va_start) >> 12, /*length*/
2679 1); /* align */
2680 /* initialize just in case we try to use it anyway */
2681 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2682 0x0badc0de, /* start */
2683 1, /* length */
2684 1); /* align */
2685
2686
2687 vm->mapped_buffers = RB_ROOT;
2688
2689 mutex_init(&vm->update_gmmu_lock);
2690 kref_init(&vm->ref);
2691 INIT_LIST_HEAD(&vm->reserved_va_list);
2692
2693 return 0;
2694
2695clean_up:
2696 /* free, etc */
2697 if (inst_block->cpuva)
2698 dma_free_coherent(d, inst_block->size,
2699 inst_block->cpuva, inst_block->iova);
2700 inst_block->cpuva = NULL;
2701 inst_block->iova = 0;
2702 return err;
2703}
2704
2705void gk20a_mm_fb_flush(struct gk20a *g)
2706{
2707 struct mm_gk20a *mm = &g->mm;
2708 u32 data;
2709 s32 retry = 100;
2710
2711 gk20a_dbg_fn("");
2712
2713 mutex_lock(&mm->l2_op_lock);
2714
2715 g->ops.ltc.elpg_flush(g);
2716
2717 /* Make sure all previous writes are committed to the L2. There's no
2718 guarantee that writes are to DRAM. This will be a sysmembar internal
2719 to the L2. */
2720 gk20a_writel(g, flush_fb_flush_r(),
2721 flush_fb_flush_pending_busy_f());
2722
2723 do {
2724 data = gk20a_readl(g, flush_fb_flush_r());
2725
2726 if (flush_fb_flush_outstanding_v(data) ==
2727 flush_fb_flush_outstanding_true_v() ||
2728 flush_fb_flush_pending_v(data) ==
2729 flush_fb_flush_pending_busy_v()) {
2730 gk20a_dbg_info("fb_flush 0x%x", data);
2731 retry--;
2732 usleep_range(20, 40);
2733 } else
2734 break;
2735 } while (retry >= 0 || !tegra_platform_is_silicon());
2736
2737 if (retry < 0)
2738 gk20a_warn(dev_from_gk20a(g),
2739 "fb_flush too many retries");
2740
2741 mutex_unlock(&mm->l2_op_lock);
2742}
2743
2744static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2745{
2746 u32 data;
2747 s32 retry = 200;
2748
2749 /* Invalidate any clean lines from the L2 so subsequent reads go to
2750 DRAM. Dirty lines are not affected by this operation. */
2751 gk20a_writel(g, flush_l2_system_invalidate_r(),
2752 flush_l2_system_invalidate_pending_busy_f());
2753
2754 do {
2755 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2756
2757 if (flush_l2_system_invalidate_outstanding_v(data) ==
2758 flush_l2_system_invalidate_outstanding_true_v() ||
2759 flush_l2_system_invalidate_pending_v(data) ==
2760 flush_l2_system_invalidate_pending_busy_v()) {
2761 gk20a_dbg_info("l2_system_invalidate 0x%x",
2762 data);
2763 retry--;
2764 usleep_range(20, 40);
2765 } else
2766 break;
2767 } while (retry >= 0 || !tegra_platform_is_silicon());
2768
2769 if (retry < 0)
2770 gk20a_warn(dev_from_gk20a(g),
2771 "l2_system_invalidate too many retries");
2772}
2773
2774void gk20a_mm_l2_invalidate(struct gk20a *g)
2775{
2776 struct mm_gk20a *mm = &g->mm;
2777 mutex_lock(&mm->l2_op_lock);
2778 gk20a_mm_l2_invalidate_locked(g);
2779 mutex_unlock(&mm->l2_op_lock);
2780}
2781
2782void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2783{
2784 struct mm_gk20a *mm = &g->mm;
2785 u32 data;
2786 s32 retry = 200;
2787
2788 gk20a_dbg_fn("");
2789
2790 mutex_lock(&mm->l2_op_lock);
2791
2792 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2793 as clean, so subsequent reads might hit in the L2. */
2794 gk20a_writel(g, flush_l2_flush_dirty_r(),
2795 flush_l2_flush_dirty_pending_busy_f());
2796
2797 do {
2798 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2799
2800 if (flush_l2_flush_dirty_outstanding_v(data) ==
2801 flush_l2_flush_dirty_outstanding_true_v() ||
2802 flush_l2_flush_dirty_pending_v(data) ==
2803 flush_l2_flush_dirty_pending_busy_v()) {
2804 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2805 retry--;
2806 usleep_range(20, 40);
2807 } else
2808 break;
2809 } while (retry >= 0 || !tegra_platform_is_silicon());
2810
2811 if (retry < 0)
2812 gk20a_warn(dev_from_gk20a(g),
2813 "l2_flush_dirty too many retries");
2814
2815 if (invalidate)
2816 gk20a_mm_l2_invalidate_locked(g);
2817
2818 mutex_unlock(&mm->l2_op_lock);
2819}
2820
2821
2822int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2823 struct dma_buf **dmabuf,
2824 u64 *offset)
2825{
2826 struct mapped_buffer_node *mapped_buffer;
2827
2828 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
2829
2830 mutex_lock(&vm->update_gmmu_lock);
2831
2832 mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2833 gpu_va);
2834 if (!mapped_buffer) {
2835 mutex_unlock(&vm->update_gmmu_lock);
2836 return -EINVAL;
2837 }
2838
2839 *dmabuf = mapped_buffer->dmabuf;
2840 *offset = gpu_va - mapped_buffer->addr;
2841
2842 mutex_unlock(&vm->update_gmmu_lock);
2843
2844 return 0;
2845}
2846
2847void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2848{
2849 struct mm_gk20a *mm = vm->mm;
2850 struct gk20a *g = gk20a_from_vm(vm);
2851 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2852 u32 data;
2853 s32 retry = 200;
2854
2855 gk20a_dbg_fn("");
2856
2857 /* pagetables are considered sw states which are preserved after
2858 prepare_poweroff. When gk20a deinit releases those pagetables,
2859 common code in vm unmap path calls tlb invalidate that touches
2860 hw. Use the power_on flag to skip tlb invalidation when gpu
2861 power is turned off */
2862
2863 if (!g->power_on)
2864 return;
2865
2866 /* No need to invalidate if tlb is clean */
2867 mutex_lock(&vm->update_gmmu_lock);
2868 if (!vm->tlb_dirty) {
2869 mutex_unlock(&vm->update_gmmu_lock);
2870 return;
2871 }
2872 vm->tlb_dirty = false;
2873 mutex_unlock(&vm->update_gmmu_lock);
2874
2875 mutex_lock(&mm->tlb_lock);
2876 do {
2877 data = gk20a_readl(g, fb_mmu_ctrl_r());
2878 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2879 break;
2880 usleep_range(20, 40);
2881 retry--;
2882 } while (retry >= 0 || !tegra_platform_is_silicon());
2883
2884 if (retry < 0)
2885 gk20a_warn(dev_from_gk20a(g),
2886 "wait mmu fifo space too many retries");
2887
2888 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2889 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2890 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2891
2892 /* this is a sledgehammer, it would seem */
2893 gk20a_writel(g, fb_mmu_invalidate_r(),
2894 fb_mmu_invalidate_all_pdb_true_f() |
2895 fb_mmu_invalidate_all_va_true_f() |
2896 fb_mmu_invalidate_trigger_true_f());
2897
2898 do {
2899 data = gk20a_readl(g, fb_mmu_ctrl_r());
2900 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2901 fb_mmu_ctrl_pri_fifo_empty_false_f())
2902 break;
2903 retry--;
2904 usleep_range(20, 40);
2905 } while (retry >= 0 || !tegra_platform_is_silicon());
2906
2907 if (retry < 0)
2908 gk20a_warn(dev_from_gk20a(g),
2909 "mmu invalidate too many retries");
2910
2911 mutex_unlock(&mm->tlb_lock);
2912}
2913
2914int gk20a_mm_suspend(struct gk20a *g)
2915{
2916 gk20a_dbg_fn("");
2917
2918 gk20a_mm_fb_flush(g);
2919 gk20a_mm_l2_flush(g, true);
2920
2921 gk20a_dbg_fn("done");
2922 return 0;
2923}
2924
2925void gk20a_mm_ltc_isr(struct gk20a *g)
2926{
2927 u32 intr;
2928
2929 intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
2930 gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
2931 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
2932}
2933
2934bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
2935{
2936 u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
2937 return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
2938 fb_mmu_debug_ctrl_debug_enabled_v();
2939}
2940
2941static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
2942 const unsigned int msec)
2943{
2944 unsigned long timeout;
2945
2946 timeout = jiffies + msecs_to_jiffies(msec);
2947 while (1) {
2948 u32 val;
2949
2950 val = gk20a_readl(g, fb_mmu_vpr_info_r());
2951 if (fb_mmu_vpr_info_fetch_v(val) ==
2952 fb_mmu_vpr_info_fetch_false_v())
2953 break;
2954
2955 if (tegra_platform_is_silicon() &&
2956 WARN_ON(time_after(jiffies, timeout)))
2957 return -ETIME;
2958 }
2959
2960 return 0;
2961}
2962
2963int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g)
2964{
2965 int ret = 0;
2966
2967 gk20a_busy_noresume(g->dev);
2968 if (!pm_runtime_active(&g->dev->dev))
2969 goto fail;
2970
2971 if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) {
2972 ret = -ETIME;
2973 goto fail;
2974 }
2975
2976 gk20a_writel(g, fb_mmu_vpr_info_r(),
2977 fb_mmu_vpr_info_fetch_true_v());
2978
2979 ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5);
2980
2981 fail:
2982 gk20a_idle(g->dev);
2983 return ret;
2984}