diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 144 |
1 files changed, 127 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 6f6734b4..13382416 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -44,6 +44,112 @@ | |||
44 | #include "kind_gk20a.h" | 44 | #include "kind_gk20a.h" |
45 | #include "semaphore_gk20a.h" | 45 | #include "semaphore_gk20a.h" |
46 | 46 | ||
47 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | ||
48 | { | ||
49 | void *cpu_va; | ||
50 | |||
51 | if (WARN_ON(mem->cpu_va)) { | ||
52 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | ||
53 | return -EBUSY; | ||
54 | } | ||
55 | |||
56 | cpu_va = vmap(mem->pages, | ||
57 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
58 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
59 | |||
60 | if (WARN_ON(!cpu_va)) | ||
61 | return -ENOMEM; | ||
62 | |||
63 | mem->cpu_va = cpu_va; | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | ||
68 | { | ||
69 | vunmap(mem->cpu_va); | ||
70 | mem->cpu_va = NULL; | ||
71 | } | ||
72 | |||
73 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
74 | { | ||
75 | u32 *ptr = mem->cpu_va; | ||
76 | u32 data; | ||
77 | |||
78 | WARN_ON(!ptr); | ||
79 | data = ptr[w]; | ||
80 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
81 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
82 | #endif | ||
83 | return data; | ||
84 | } | ||
85 | |||
86 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | ||
87 | { | ||
88 | WARN_ON(offset & 3); | ||
89 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | ||
90 | } | ||
91 | |||
92 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | ||
93 | u32 offset, void *dest, u32 size) | ||
94 | { | ||
95 | u32 i; | ||
96 | u32 *dest_u32 = dest; | ||
97 | |||
98 | WARN_ON(offset & 3); | ||
99 | WARN_ON(size & 3); | ||
100 | offset /= sizeof(u32); | ||
101 | size /= sizeof(u32); | ||
102 | |||
103 | for (i = 0; i < size; i++) | ||
104 | dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i); | ||
105 | } | ||
106 | |||
107 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | ||
108 | { | ||
109 | u32 *ptr = mem->cpu_va; | ||
110 | |||
111 | WARN_ON(!ptr); | ||
112 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
113 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
114 | #endif | ||
115 | ptr[w] = data; | ||
116 | } | ||
117 | |||
118 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | ||
119 | { | ||
120 | WARN_ON(offset & 3); | ||
121 | gk20a_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
122 | } | ||
123 | |||
124 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
125 | void *src, u32 size) | ||
126 | { | ||
127 | u32 i; | ||
128 | u32 *src_u32 = src; | ||
129 | |||
130 | WARN_ON(offset & 3); | ||
131 | WARN_ON(size & 3); | ||
132 | offset /= sizeof(u32); | ||
133 | size /= sizeof(u32); | ||
134 | |||
135 | for (i = 0; i < size; i++) | ||
136 | gk20a_mem_wr32(g, mem, offset + i, src_u32[i]); | ||
137 | } | ||
138 | |||
139 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
140 | u32 value, u32 size) | ||
141 | { | ||
142 | u32 i; | ||
143 | |||
144 | WARN_ON(offset & 3); | ||
145 | WARN_ON(size & 3); | ||
146 | offset /= sizeof(u32); | ||
147 | size /= sizeof(u32); | ||
148 | |||
149 | for (i = 0; i < size; i++) | ||
150 | gk20a_mem_wr32(g, mem, offset + i, value); | ||
151 | } | ||
152 | |||
47 | /* | 153 | /* |
48 | * GPU mapping life cycle | 154 | * GPU mapping life cycle |
49 | * ====================== | 155 | * ====================== |
@@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
780 | *pde_lo, *pde_hi); | 886 | *pde_lo, *pde_hi); |
781 | } | 887 | } |
782 | 888 | ||
783 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i) | 889 | static u32 pde_from_index(u32 i) |
890 | { | ||
891 | return i * gmmu_pde__size_v() / sizeof(u32); | ||
892 | } | ||
893 | |||
894 | static u32 pte_from_index(u32 i) | ||
784 | { | 895 | { |
785 | return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); | 896 | return i * gmmu_pte__size_v() / sizeof(u32); |
786 | } | 897 | } |
787 | 898 | ||
788 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | 899 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, |
@@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2323 | u64 pte_addr_small = 0, pte_addr_big = 0; | 2434 | u64 pte_addr_small = 0, pte_addr_big = 0; |
2324 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | 2435 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; |
2325 | u32 pde_v[2] = {0, 0}; | 2436 | u32 pde_v[2] = {0, 0}; |
2326 | u32 *pde; | 2437 | u32 pde; |
2327 | 2438 | ||
2328 | gk20a_dbg_fn(""); | 2439 | gk20a_dbg_fn(""); |
2329 | 2440 | ||
@@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2348 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 2459 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
2349 | gmmu_pde_vol_big_false_f()); | 2460 | gmmu_pde_vol_big_false_f()); |
2350 | 2461 | ||
2351 | pde = pde_from_index(vm, i); | 2462 | pde = pde_from_index(i); |
2352 | 2463 | ||
2353 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 2464 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]); |
2354 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 2465 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]); |
2355 | 2466 | ||
2356 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 2467 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
2357 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 2468 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2432 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 2543 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
2433 | } | 2544 | } |
2434 | 2545 | ||
2435 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); | 2546 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]); |
2436 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); | 2547 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]); |
2437 | 2548 | ||
2438 | if (*iova) { | 2549 | if (*iova) { |
2439 | *iova += page_size; | 2550 | *iova += page_size; |
@@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) | |||
3489 | false, false, "cde"); | 3600 | false, false, "cde"); |
3490 | } | 3601 | } |
3491 | 3602 | ||
3492 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | 3603 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) |
3493 | { | 3604 | { |
3494 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 3605 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
3495 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 3606 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
3496 | 3607 | ||
3497 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 3608 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), |
3498 | (g->mm.vidmem_is_vidmem ? | 3609 | (g->mm.vidmem_is_vidmem ? |
3499 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 3610 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : |
3500 | ram_in_page_dir_base_target_vid_mem_f()) | | 3611 | ram_in_page_dir_base_target_vid_mem_f()) | |
3501 | ram_in_page_dir_base_vol_true_f() | | 3612 | ram_in_page_dir_base_vol_true_f() | |
3502 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 3613 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); |
3503 | 3614 | ||
3504 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | 3615 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), |
3505 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 3616 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
3506 | } | 3617 | } |
3507 | 3618 | ||
@@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, | |||
3510 | { | 3621 | { |
3511 | struct gk20a *g = gk20a_from_vm(vm); | 3622 | struct gk20a *g = gk20a_from_vm(vm); |
3512 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | 3623 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); |
3513 | void *inst_ptr = inst_block->cpu_va; | ||
3514 | 3624 | ||
3515 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 3625 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
3516 | gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); | 3626 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); |
3517 | 3627 | ||
3518 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 3628 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); |
3519 | 3629 | ||
3520 | g->ops.mm.init_pdb(g, inst_ptr, pde_addr); | 3630 | g->ops.mm.init_pdb(g, inst_block, pde_addr); |
3521 | 3631 | ||
3522 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | 3632 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), |
3523 | u64_lo32(vm->va_limit - 1) & ~0xfff); | 3633 | u64_lo32(vm->va_limit - 1) & ~0xfff); |
3524 | 3634 | ||
3525 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 3635 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(), |
3526 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); | 3636 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); |
3527 | 3637 | ||
3528 | if (big_page_size && g->ops.mm.set_big_page_size) | 3638 | if (big_page_size && g->ops.mm.set_big_page_size) |
3529 | g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); | 3639 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); |
3530 | } | 3640 | } |
3531 | 3641 | ||
3532 | int gk20a_mm_fb_flush(struct gk20a *g) | 3642 | int gk20a_mm_fb_flush(struct gk20a *g) |