summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-01-29 14:29:56 -0500
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:05 -0500
commitac0cd782ab539d3a89ac2cc50955f80b8be456d1 (patch)
treee7feacdff08a9798db8ff9bf343f6895eb6d3035 /drivers/gpu/nvgpu/gp10b/mm_gp10b.c
parent1fcd7fd547daac5374993f243fad77a822a5a048 (diff)
gpu: nvgpu: gp10b: Implement new page table format
Implement the 5-level Pascal page table format. It is enabled only for simulation. Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/682114
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c197
1 files changed, 197 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index e3e2c173..1608b176 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -20,6 +20,8 @@
20#include "rpfb_gp10b.h" 20#include "rpfb_gp10b.h"
21#include "hw_ram_gp10b.h" 21#include "hw_ram_gp10b.h"
22#include "hw_bus_gp10b.h" 22#include "hw_bus_gp10b.h"
23#include "hw_gmmu_gp10b.h"
24#include "gk20a/semaphore_gk20a.h"
23 25
24static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) 26static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
25{ 27{
@@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
138 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 140 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
139} 141}
140 142
143u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
144{
145 return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
146}
147
148static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
149 struct gk20a_mm_entry *parent,
150 u32 i, u32 gmmu_pgsz_idx,
151 u64 iova,
152 u32 kind_v, u32 *ctag,
153 bool cacheable, bool unmapped_pte,
154 int rw_flag, bool sparse, u32 flags)
155{
156 u64 pte_addr = 0;
157 u64 pde_addr = 0;
158 struct gk20a_mm_entry *pte = parent->entries + i;
159 u32 pde_v[2] = {0, 0};
160 u32 *pde;
161 struct gk20a *g = vm->mm->g;
162
163 gk20a_dbg_fn("");
164
165 pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
166 >> gmmu_new_pde_address_shift_v();
167 pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
168
169 pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
170 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
171
172 pde = pde3_from_index(parent, i);
173
174 gk20a_mem_wr32(pde, 0, pde_v[0]);
175 gk20a_mem_wr32(pde, 1, pde_v[1]);
176
177 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
178 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
179 gk20a_dbg_fn("done");
180 return 0;
181}
182
183u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
184{
185 return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
186}
187
188static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
189 struct gk20a_mm_entry *pte,
190 u32 i, u32 gmmu_pgsz_idx,
191 u64 iova,
192 u32 kind_v, u32 *ctag,
193 bool cacheable, bool unmapped_pte,
194 int rw_flag, bool sparse, u32 flags)
195{
196 bool small_valid, big_valid;
197 u32 pte_addr_small = 0, pte_addr_big = 0;
198 struct gk20a_mm_entry *entry = pte->entries + i;
199 u32 pde_v[4] = {0, 0, 0, 0};
200 u32 *pde;
201 struct gk20a *g = vm->mm->g;
202
203 gk20a_dbg_fn("");
204
205 small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
206 big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
207
208 if (small_valid)
209 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
210 >> gmmu_new_dual_pde_address_shift_v();
211
212 if (big_valid)
213 pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
214 >> gmmu_new_dual_pde_address_big_shift_v();
215
216 if (small_valid) {
217 pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
218 pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
219 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
220 }
221
222 if (big_valid) {
223 pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
224 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
225 pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
226 }
227
228 pde = pde0_from_index(pte, i);
229
230 gk20a_mem_wr32(pde, 0, pde_v[0]);
231 gk20a_mem_wr32(pde, 1, pde_v[1]);
232 gk20a_mem_wr32(pde, 2, pde_v[2]);
233 gk20a_mem_wr32(pde, 3, pde_v[3]);
234
235 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
236 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
237 gk20a_dbg_fn("done");
238 return 0;
239}
240
241static int update_gmmu_pte_locked(struct vm_gk20a *vm,
242 struct gk20a_mm_entry *pte,
243 u32 i, u32 gmmu_pgsz_idx,
244 u64 iova,
245 u32 kind_v, u32 *ctag,
246 bool cacheable, bool unmapped_pte,
247 int rw_flag, bool sparse, u32 flags)
248{
249 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
250 u32 pte_w[2] = {0, 0}; /* invalid pte */
251
252 gk20a_dbg_fn("");
253
254 if (iova) {
255 pte_w[0] = gmmu_new_pte_valid_true_f() |
256 gmmu_new_pte_address_sys_f(iova
257 >> gmmu_new_pte_address_shift_v());
258 pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
259 gmmu_new_pte_kind_f(kind_v) |
260 gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
261
262 if (rw_flag == gk20a_mem_flag_read_only)
263 pte_w[0] |= gmmu_new_pte_read_only_true_f();
264 if (!cacheable)
265 pte_w[1] |= gmmu_new_pte_vol_true_f();
266
267 gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
268 " ctag=%d vol=%d"
269 " [0x%08x, 0x%08x]",
270 i, iova,
271 kind_v, *ctag, !cacheable,
272 pte_w[1], pte_w[0]);
273
274 if (*ctag)
275 *ctag += page_size;
276 } else if (sparse) {
277 pte_w[0] = gmmu_new_pte_valid_false_f();
278 pte_w[1] |= gmmu_new_pte_vol_true_f();
279 } else {
280 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
281 }
282
283 gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
284 gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
285
286 gk20a_dbg_fn("done");
287 return 0;
288}
289
290const struct gk20a_mmu_level gp10b_mm_levels[] = {
291 {.hi_bit = {48, 48},
292 .lo_bit = {47, 47},
293 .update_entry = update_gmmu_pde3_locked,
294 .entry_size = 8},
295 {.hi_bit = {46, 46},
296 .lo_bit = {38, 38},
297 .update_entry = update_gmmu_pde3_locked,
298 .entry_size = 8},
299 {.hi_bit = {37, 37},
300 .lo_bit = {29, 29},
301 .update_entry = update_gmmu_pde3_locked,
302 .entry_size = 8},
303 {.hi_bit = {28, 28},
304 .lo_bit = {21, 21},
305 .update_entry = update_gmmu_pde0_locked,
306 .entry_size = 16},
307 {.hi_bit = {20, 20},
308 .lo_bit = {12, 16},
309 .update_entry = update_gmmu_pte_locked,
310 .entry_size = 8},
311 {.update_entry = NULL}
312};
313
314const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size)
315{
316 return gp10b_mm_levels;
317}
318
319static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
320{
321 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
322 u32 pdb_addr_hi = u64_hi32(pdb_addr);
323
324 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
325 ram_in_page_dir_base_target_vid_mem_f() |
326 ram_in_page_dir_base_vol_true_f() |
327 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
328 1 << 10);
329
330 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
331 ram_in_page_dir_base_hi_f(pdb_addr_hi));
332}
333
141void gp10b_init_mm(struct gpu_ops *gops) 334void gp10b_init_mm(struct gpu_ops *gops)
142{ 335{
143 gm20b_init_mm(gops); 336 gm20b_init_mm(gops);
@@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops)
146 gops->mm.init_bar2_vm = gb10b_init_bar2_vm; 339 gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
147 gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup; 340 gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
148 gops->mm.get_iova_addr = gp10b_mm_iova_addr; 341 gops->mm.get_iova_addr = gp10b_mm_iova_addr;
342 if (tegra_platform_is_linsim()) {
343 gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
344 gops->mm.init_pdb = gp10b_mm_init_pdb;
345 }
149} 346}