summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:32:05 -0400
committerKen Adams <kadams@nvidia.com>2016-05-13 10:11:33 -0400
commit6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch)
tree08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}() for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like functionality, and gk20a_memset() for filling buffers with a constant. The 8 and 16 bit accessor functions are removed. vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to support other types of mappings or conditions where mapping the buffer is unnecessary or different. Several function arguments that would access these buffers are also changed to take a mem_desc instead of a plain cpu pointer. Some relevant occasions are changed to use the accessor functions instead of cpu pointers without them (e.g., memcpying to and from), but the majority of direct accesses will be adjusted later, when the buffers are moved to support vidmem. JIRA DNVGPU-23 Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121143 Reviewed-by: Ken Adams <kadams@nvidia.com> Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c144
1 files changed, 127 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 6f6734b4..13382416 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -44,6 +44,112 @@
44#include "kind_gk20a.h" 44#include "kind_gk20a.h"
45#include "semaphore_gk20a.h" 45#include "semaphore_gk20a.h"
46 46
47int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
48{
49 void *cpu_va;
50
51 if (WARN_ON(mem->cpu_va)) {
52 gk20a_warn(dev_from_gk20a(g), "nested %s", __func__);
53 return -EBUSY;
54 }
55
56 cpu_va = vmap(mem->pages,
57 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
58 0, pgprot_writecombine(PAGE_KERNEL));
59
60 if (WARN_ON(!cpu_va))
61 return -ENOMEM;
62
63 mem->cpu_va = cpu_va;
64 return 0;
65}
66
67void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
68{
69 vunmap(mem->cpu_va);
70 mem->cpu_va = NULL;
71}
72
73u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
74{
75 u32 *ptr = mem->cpu_va;
76 u32 data;
77
78 WARN_ON(!ptr);
79 data = ptr[w];
80#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
81 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
82#endif
83 return data;
84}
85
86u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset)
87{
88 WARN_ON(offset & 3);
89 return gk20a_mem_rd32(g, mem, offset / sizeof(u32));
90}
91
92void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
93 u32 offset, void *dest, u32 size)
94{
95 u32 i;
96 u32 *dest_u32 = dest;
97
98 WARN_ON(offset & 3);
99 WARN_ON(size & 3);
100 offset /= sizeof(u32);
101 size /= sizeof(u32);
102
103 for (i = 0; i < size; i++)
104 dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i);
105}
106
107void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
108{
109 u32 *ptr = mem->cpu_va;
110
111 WARN_ON(!ptr);
112#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
113 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
114#endif
115 ptr[w] = data;
116}
117
118void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data)
119{
120 WARN_ON(offset & 3);
121 gk20a_mem_wr32(g, mem, offset / sizeof(u32), data);
122}
123
124void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
125 void *src, u32 size)
126{
127 u32 i;
128 u32 *src_u32 = src;
129
130 WARN_ON(offset & 3);
131 WARN_ON(size & 3);
132 offset /= sizeof(u32);
133 size /= sizeof(u32);
134
135 for (i = 0; i < size; i++)
136 gk20a_mem_wr32(g, mem, offset + i, src_u32[i]);
137}
138
139void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
140 u32 value, u32 size)
141{
142 u32 i;
143
144 WARN_ON(offset & 3);
145 WARN_ON(size & 3);
146 offset /= sizeof(u32);
147 size /= sizeof(u32);
148
149 for (i = 0; i < size; i++)
150 gk20a_mem_wr32(g, mem, offset + i, value);
151}
152
47/* 153/*
48 * GPU mapping life cycle 154 * GPU mapping life cycle
49 * ====================== 155 * ======================
@@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
780 *pde_lo, *pde_hi); 886 *pde_lo, *pde_hi);
781} 887}
782 888
783u32 *pde_from_index(struct vm_gk20a *vm, u32 i) 889static u32 pde_from_index(u32 i)
890{
891 return i * gmmu_pde__size_v() / sizeof(u32);
892}
893
894static u32 pte_from_index(u32 i)
784{ 895{
785 return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); 896 return i * gmmu_pte__size_v() / sizeof(u32);
786} 897}
787 898
788u32 pte_index_from_vaddr(struct vm_gk20a *vm, 899u32 pte_index_from_vaddr(struct vm_gk20a *vm,
@@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2323 u64 pte_addr_small = 0, pte_addr_big = 0; 2434 u64 pte_addr_small = 0, pte_addr_big = 0;
2324 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 2435 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
2325 u32 pde_v[2] = {0, 0}; 2436 u32 pde_v[2] = {0, 0};
2326 u32 *pde; 2437 u32 pde;
2327 2438
2328 gk20a_dbg_fn(""); 2439 gk20a_dbg_fn("");
2329 2440
@@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2348 (big_valid ? (gmmu_pde_vol_big_true_f()) : 2459 (big_valid ? (gmmu_pde_vol_big_true_f()) :
2349 gmmu_pde_vol_big_false_f()); 2460 gmmu_pde_vol_big_false_f());
2350 2461
2351 pde = pde_from_index(vm, i); 2462 pde = pde_from_index(i);
2352 2463
2353 gk20a_mem_wr32(pde, 0, pde_v[0]); 2464 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]);
2354 gk20a_mem_wr32(pde, 1, pde_v[1]); 2465 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]);
2355 2466
2356 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 2467 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
2357 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 2468 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2432 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 2543 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
2433 } 2544 }
2434 2545
2435 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); 2546 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]);
2436 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); 2547 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]);
2437 2548
2438 if (*iova) { 2549 if (*iova) {
2439 *iova += page_size; 2550 *iova += page_size;
@@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
3489 false, false, "cde"); 3600 false, false, "cde");
3490} 3601}
3491 3602
3492void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 3603void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr)
3493{ 3604{
3494 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 3605 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
3495 u32 pdb_addr_hi = u64_hi32(pdb_addr); 3606 u32 pdb_addr_hi = u64_hi32(pdb_addr);
3496 3607
3497 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 3608 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
3498 (g->mm.vidmem_is_vidmem ? 3609 (g->mm.vidmem_is_vidmem ?
3499 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 3610 ram_in_page_dir_base_target_sys_mem_ncoh_f() :
3500 ram_in_page_dir_base_target_vid_mem_f()) | 3611 ram_in_page_dir_base_target_vid_mem_f()) |
3501 ram_in_page_dir_base_vol_true_f() | 3612 ram_in_page_dir_base_vol_true_f() |
3502 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 3613 ram_in_page_dir_base_lo_f(pdb_addr_lo));
3503 3614
3504 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), 3615 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
3505 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 3616 ram_in_page_dir_base_hi_f(pdb_addr_hi));
3506} 3617}
3507 3618
@@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
3510{ 3621{
3511 struct gk20a *g = gk20a_from_vm(vm); 3622 struct gk20a *g = gk20a_from_vm(vm);
3512 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); 3623 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
3513 void *inst_ptr = inst_block->cpu_va;
3514 3624
3515 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 3625 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
3516 gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); 3626 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
3517 3627
3518 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); 3628 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr);
3519 3629
3520 g->ops.mm.init_pdb(g, inst_ptr, pde_addr); 3630 g->ops.mm.init_pdb(g, inst_block, pde_addr);
3521 3631
3522 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), 3632 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
3523 u64_lo32(vm->va_limit - 1) & ~0xfff); 3633 u64_lo32(vm->va_limit - 1) & ~0xfff);
3524 3634
3525 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 3635 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
3526 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); 3636 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
3527 3637
3528 if (big_page_size && g->ops.mm.set_big_page_size) 3638 if (big_page_size && g->ops.mm.set_big_page_size)
3529 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); 3639 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
3530} 3640}
3531 3641
3532int gk20a_mm_fb_flush(struct gk20a *g) 3642int gk20a_mm_fb_flush(struct gk20a *g)