diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-12 02:31:30 -0400 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:56:15 -0500 |
commit | 18a017865946617fd63256858a0d2300160643f4 (patch) | |
tree | 310822095e53bb0bb377f4955df7e2cc0f62fc7d /drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |
parent | e746a16f7abbaacba89e390c692620941fc1b34c (diff) |
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor
functions. This allows the functions to select the memory access method
based on the buffer aperture instead of using the cpu pointer directly
(like until now). The selection and aperture support will be in another
patch; this patch only refactors these accessors, but keeps the
underlying functionality as-is.
JIRA DNVGPU-23
Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1121914
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 44 |
1 files changed, 25 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0e1f3c4b..0b693f7c 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
146 | } | 146 | } |
147 | 147 | ||
148 | static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) | 148 | static u32 pde3_from_index(u32 i) |
149 | { | 149 | { |
150 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); | 150 | return i * gmmu_new_pde__size_v() / sizeof(u32); |
151 | } | ||
152 | |||
153 | static u32 pte3_from_index(u32 i) | ||
154 | { | ||
155 | return i * gmmu_new_pte__size_v() / sizeof(u32); | ||
151 | } | 156 | } |
152 | 157 | ||
153 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) | 158 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) |
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
176 | u64 pde_addr = 0; | 181 | u64 pde_addr = 0; |
177 | struct gk20a_mm_entry *pte = parent->entries + i; | 182 | struct gk20a_mm_entry *pte = parent->entries + i; |
178 | u32 pde_v[2] = {0, 0}; | 183 | u32 pde_v[2] = {0, 0}; |
179 | u32 *pde; | 184 | u32 pde; |
180 | 185 | ||
181 | gk20a_dbg_fn(""); | 186 | gk20a_dbg_fn(""); |
182 | 187 | ||
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
189 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); | 194 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); |
190 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 195 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
191 | pde_v[1] |= pte_addr >> 24; | 196 | pde_v[1] |= pte_addr >> 24; |
192 | pde = pde3_from_index(parent, i); | 197 | pde = pde3_from_index(i); |
193 | 198 | ||
194 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 199 | gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); |
195 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 200 | gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); |
196 | 201 | ||
197 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 202 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
198 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 203 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
200 | return 0; | 205 | return 0; |
201 | } | 206 | } |
202 | 207 | ||
203 | static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) | 208 | static u32 pde0_from_index(u32 i) |
204 | { | 209 | { |
205 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); | 210 | return i * gmmu_new_dual_pde__size_v() / sizeof(u32); |
206 | } | 211 | } |
207 | 212 | ||
208 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | 213 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, |
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
220 | u32 pte_addr_small = 0, pte_addr_big = 0; | 225 | u32 pte_addr_small = 0, pte_addr_big = 0; |
221 | struct gk20a_mm_entry *entry = pte->entries + i; | 226 | struct gk20a_mm_entry *entry = pte->entries + i; |
222 | u32 pde_v[4] = {0, 0, 0, 0}; | 227 | u32 pde_v[4] = {0, 0, 0, 0}; |
223 | u32 *pde; | 228 | u32 pde; |
224 | 229 | ||
225 | gk20a_dbg_fn(""); | 230 | gk20a_dbg_fn(""); |
226 | 231 | ||
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
254 | pde_v[1] |= pte_addr_big >> 28; | 259 | pde_v[1] |= pte_addr_big >> 28; |
255 | } | 260 | } |
256 | 261 | ||
257 | pde = pde0_from_index(pte, i); | 262 | pde = pde0_from_index(i); |
258 | 263 | ||
259 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 264 | gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); |
260 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 265 | gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); |
261 | gk20a_mem_wr32(pde, 2, pde_v[2]); | 266 | gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); |
262 | gk20a_mem_wr32(pde, 3, pde_v[3]); | 267 | gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); |
263 | 268 | ||
264 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", | 269 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", |
265 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | 270 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); |
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
323 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 328 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
324 | } | 329 | } |
325 | 330 | ||
326 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); | 331 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); |
327 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); | 332 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); |
328 | 333 | ||
329 | if (*iova) { | 334 | if (*iova) { |
330 | *iova += page_size; | 335 | *iova += page_size; |
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, | |||
376 | return gp10b_mm_levels; | 381 | return gp10b_mm_levels; |
377 | } | 382 | } |
378 | 383 | ||
379 | static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | 384 | static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, |
385 | u64 pdb_addr) | ||
380 | { | 386 | { |
381 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 387 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
382 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 388 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
383 | 389 | ||
384 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 390 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), |
385 | (g->mm.vidmem_is_vidmem ? | 391 | (g->mm.vidmem_is_vidmem ? |
386 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 392 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : |
387 | ram_in_page_dir_base_target_vid_mem_f()) | | 393 | ram_in_page_dir_base_target_vid_mem_f()) | |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | |||
389 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | 395 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | |
390 | 1 << 10); | 396 | 1 << 10); |
391 | 397 | ||
392 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | 398 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), |
393 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 399 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
394 | } | 400 | } |
395 | 401 | ||