summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:31:30 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:15 -0500
commit18a017865946617fd63256858a0d2300160643f4 (patch)
tree310822095e53bb0bb377f4955df7e2cc0f62fc7d /drivers/gpu/nvgpu/gp10b/mm_gp10b.c
parente746a16f7abbaacba89e390c692620941fc1b34c (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. JIRA DNVGPU-23 Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121914 GVS: Gerrit_Virtual_Submit Reviewed-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c44
1 files changed, 25 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0e1f3c4b..0b693f7c 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
145 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 145 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
146} 146}
147 147
148static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) 148static u32 pde3_from_index(u32 i)
149{ 149{
150 return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); 150 return i * gmmu_new_pde__size_v() / sizeof(u32);
151}
152
153static u32 pte3_from_index(u32 i)
154{
155 return i * gmmu_new_pte__size_v() / sizeof(u32);
151} 156}
152 157
153static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) 158static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
176 u64 pde_addr = 0; 181 u64 pde_addr = 0;
177 struct gk20a_mm_entry *pte = parent->entries + i; 182 struct gk20a_mm_entry *pte = parent->entries + i;
178 u32 pde_v[2] = {0, 0}; 183 u32 pde_v[2] = {0, 0};
179 u32 *pde; 184 u32 pde;
180 185
181 gk20a_dbg_fn(""); 186 gk20a_dbg_fn("");
182 187
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
189 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); 194 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
190 pde_v[0] |= gmmu_new_pde_vol_true_f(); 195 pde_v[0] |= gmmu_new_pde_vol_true_f();
191 pde_v[1] |= pte_addr >> 24; 196 pde_v[1] |= pte_addr >> 24;
192 pde = pde3_from_index(parent, i); 197 pde = pde3_from_index(i);
193 198
194 gk20a_mem_wr32(pde, 0, pde_v[0]); 199 gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]);
195 gk20a_mem_wr32(pde, 1, pde_v[1]); 200 gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]);
196 201
197 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 202 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
198 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 203 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
200 return 0; 205 return 0;
201} 206}
202 207
203static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) 208static u32 pde0_from_index(u32 i)
204{ 209{
205 return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); 210 return i * gmmu_new_dual_pde__size_v() / sizeof(u32);
206} 211}
207 212
208static int update_gmmu_pde0_locked(struct vm_gk20a *vm, 213static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
220 u32 pte_addr_small = 0, pte_addr_big = 0; 225 u32 pte_addr_small = 0, pte_addr_big = 0;
221 struct gk20a_mm_entry *entry = pte->entries + i; 226 struct gk20a_mm_entry *entry = pte->entries + i;
222 u32 pde_v[4] = {0, 0, 0, 0}; 227 u32 pde_v[4] = {0, 0, 0, 0};
223 u32 *pde; 228 u32 pde;
224 229
225 gk20a_dbg_fn(""); 230 gk20a_dbg_fn("");
226 231
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
254 pde_v[1] |= pte_addr_big >> 28; 259 pde_v[1] |= pte_addr_big >> 28;
255 } 260 }
256 261
257 pde = pde0_from_index(pte, i); 262 pde = pde0_from_index(i);
258 263
259 gk20a_mem_wr32(pde, 0, pde_v[0]); 264 gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]);
260 gk20a_mem_wr32(pde, 1, pde_v[1]); 265 gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]);
261 gk20a_mem_wr32(pde, 2, pde_v[2]); 266 gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]);
262 gk20a_mem_wr32(pde, 3, pde_v[3]); 267 gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]);
263 268
264 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", 269 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
265 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); 270 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
323 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 328 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
324 } 329 }
325 330
326 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); 331 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]);
327 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); 332 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]);
328 333
329 if (*iova) { 334 if (*iova) {
330 *iova += page_size; 335 *iova += page_size;
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
376 return gp10b_mm_levels; 381 return gp10b_mm_levels;
377} 382}
378 383
379static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 384static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
385 u64 pdb_addr)
380{ 386{
381 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 387 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
382 u32 pdb_addr_hi = u64_hi32(pdb_addr); 388 u32 pdb_addr_hi = u64_hi32(pdb_addr);
383 389
384 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 390 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
385 (g->mm.vidmem_is_vidmem ? 391 (g->mm.vidmem_is_vidmem ?
386 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 392 ram_in_page_dir_base_target_sys_mem_ncoh_f() :
387 ram_in_page_dir_base_target_vid_mem_f()) | 393 ram_in_page_dir_base_target_vid_mem_f()) |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
389 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 395 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
390 1 << 10); 396 1 << 10);
391 397
392 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), 398 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
393 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 399 ram_in_page_dir_base_hi_f(pdb_addr_hi));
394} 400}
395 401