From f3a920cb01d1517db5432c8062b660d6b60eb4de Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 14 Jan 2015 08:54:26 -0800 Subject: gpu: nvgpu: Refactor page mapping code Pass always the directory structure to mm functions instead of pointers to members to it. Also split update_gmmu_ptes_locked() into smaller functions, and turn the hard coded MMU levels (PDE, PTE) into run-time parameters. Change-Id: I315ef7aebbea1e61156705361f2e2a63b5fb7bf1 Signed-off-by: Deepak Nibade Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/672485 Reviewed-by: Automatic_Commit_Validation_User --- drivers/gpu/nvgpu/gk20a/gk20a.c | 7 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 12 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 658 +++++++++++++++++++------------------ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 37 ++- drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 155 +-------- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 10 +- 6 files changed, 382 insertions(+), 497 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 57d5f09a..76237fae 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Graphics * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1789,7 +1789,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->big_page_size = g->mm.pmu.vm.big_page_size; gpu->compression_page_size = g->ops.fb.compression_page_size(g); - gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; + gpu->pde_coverage_bit_count = + gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm); gpu->available_big_page_sizes = gpu->big_page_size; if (g->ops.mm.get_big_page_sizes) @@ -1798,7 +1799,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; - if (g->ops.mm.set_sparse) + if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; if (IS_ENABLED(CONFIG_TEGRA_GK20A) && diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index fa80f010..ef8068e5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -296,8 +296,7 @@ struct gpu_ops { bool (*is_fw_defined)(void); } gr_ctx; struct { - int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, - u32 num_pages, u32 pgsz_idx, bool refplus); + bool (*support_sparse)(struct gk20a *g); bool (*is_debug_mode_enabled)(struct gk20a *g); u64 (*gmmu_map)(struct vm_gk20a *vm, u64 map_offset, @@ -309,13 +308,15 @@ struct gpu_ops { u32 ctag_offset, u32 flags, int rw_flag, - bool clear_ctags); + bool clear_ctags, + bool sparse); void (*gmmu_unmap)(struct vm_gk20a *vm, u64 vaddr, u64 size, int pgsz_idx, bool va_allocated, - int rw_flag); + int rw_flag, + bool sparse); void (*vm_remove)(struct vm_gk20a *vm); int (*vm_alloc_share)(struct gk20a_as_share *as_share, u32 flags); @@ -331,6 +332,9 @@ struct gpu_ops { u32 (*get_physical_addr_bits)(struct gk20a *g); int (*init_bar2_vm)(struct gk20a *g); int (*init_bar2_mm_hw_setup)(struct gk20a *g); + const struct gk20a_mmu_level * + (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); + void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); } mm; struct { int (*prepare_ucode)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 80c766b6..d8bd3e70 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -98,7 +98,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, struct sg_table *sgt, u64 buffer_offset, u64 first_vaddr, u64 last_vaddr, u8 kind_v, u32 ctag_offset, bool cacheable, - int rw_flag); + int rw_flag, + bool sparse); static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); @@ -605,34 +606,46 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry) static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, + const struct gk20a_mmu_level *l, struct gk20a_mm_entry *entry) { int err; - u32 pte_order; + int order; gk20a_dbg_fn(""); /* allocate enough pages for the table */ - pte_order = vm->page_table_sizing[pgsz_idx].order; + order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; + order += ilog2(l->entry_size); + order -= PAGE_SHIFT; + order = max(0, order); - err = alloc_gmmu_pages(vm, pte_order, entry); + err = alloc_gmmu_pages(vm, order, entry); gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", - entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), - pte_order); + entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), order); + if (err) + return err; entry->pgsz = pgsz_idx; return err; } +int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) +{ + return vm->mmu_levels[0].lo_bit[0]; +} + /* given address range (inclusive) determine the pdes crossed */ void pde_range_from_vaddr_range(struct vm_gk20a *vm, u64 addr_lo, u64 addr_hi, u32 *pde_lo, u32 *pde_hi) { - *pde_lo = (u32)(addr_lo >> vm->pde_stride_shift); - *pde_hi = (u32)(addr_hi >> vm->pde_stride_shift); + int pde_shift = gk20a_mm_pde_coverage_bit_count(vm); + + *pde_lo = (u32)(addr_lo >> pde_shift); + *pde_hi = (u32)(addr_hi >> pde_shift); gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", - addr_lo, addr_hi, vm->pde_stride_shift); + addr_lo, addr_hi, pde_shift); gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", *pde_lo, *pde_hi); } @@ -647,7 +660,7 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, { u32 ret; /* mask off pde part */ - addr = addr & ((((u64)1) << vm->pde_stride_shift) - ((u64)1)); + addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL); /* shift over to get pte index. note assumption that pte index * doesn't leak over into the high 32b */ @@ -657,57 +670,6 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, return ret; } -static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, - u32 *pte_offset) -{ - /* ptes are 8B regardless of pagesize */ - /* pte space pages are 4KB. so 512 ptes per 4KB page*/ - *pte_page = i >> 9; - - /* this offset is a pte offset, not a byte offset */ - *pte_offset = i & ((1<<9)-1); - - gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", - i, *pte_page, *pte_offset); -} - - -/* - * given a pde index/page table number make sure it has - * backing store and if not go ahead allocate it and - * record it in the appropriate pde - */ -int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, - u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) -{ - int err; - struct gk20a_mm_entry *entry = vm->pdb.entries + i; - - gk20a_dbg_fn(""); - - /* if it's already in place it's valid */ - if (entry->size) - return 0; - - gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", - vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); - - err = gk20a_zalloc_gmmu_page_table(vm, gmmu_pgsz_idx, entry); - if (err) - return err; - - /* rewrite pde */ - err = map_gmmu_pages(&vm->pdb); - if (err) - return err; - - update_gmmu_pde_locked(vm, i); - - unmap_gmmu_pages(&vm->pdb); - - return 0; -} - static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, u64 addr) { @@ -1117,11 +1079,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u32 ctag_offset, u32 flags, int rw_flag, - bool clear_ctags) + bool clear_ctags, + bool sparse) { - int err = 0, i = 0; + int err = 0; bool allocated = false; - u32 pde_lo, pde_hi; struct device *d = dev_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm); int ctag_granularity = g->ops.fb.compression_page_size(g); @@ -1146,31 +1108,16 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, allocated = true; } - pde_range_from_vaddr_range(vm, - map_offset, - map_offset + size - 1, - &pde_lo, &pde_hi); - - /* mark the addr range valid (but with 0 phys addr, which will fault) */ - for (i = pde_lo; i <= pde_hi; i++) { - err = validate_gmmu_page_table_gk20a_locked(vm, i, - pgsz_idx); - if (err) { - gk20a_err(d, "failed to validate page table %d: %d", - i, err); - goto fail_validate; - } - } - err = update_gmmu_ptes_locked(vm, pgsz_idx, sgt, buffer_offset, - map_offset, map_offset + size - 1, + map_offset, map_offset + size, kind_v, ctag_offset, flags & NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - rw_flag); + rw_flag, + sparse); if (err) { gk20a_err(d, "failed to update ptes on map"); goto fail_validate; @@ -1192,7 +1139,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, u64 size, int pgsz_idx, bool va_allocated, - int rw_flag) + int rw_flag, + bool sparse) { int err = 0; struct gk20a *g = gk20a_from_vm(vm); @@ -1212,9 +1160,10 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, NULL, /* n/a for unmap */ 0, vaddr, - vaddr + size - 1, + vaddr + size, 0, 0, false /* n/a for unmap */, - rw_flag); + rw_flag, + sparse); if (err) dev_err(dev_from_vm(vm), "failed to update gmmu ptes on unmap"); @@ -1439,7 +1388,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, bfr.kind_v, bfr.ctag_offset, flags, rw_flag, - clear_ctags); + clear_ctags, + false); if (!map_offset) goto clean_up; @@ -1555,7 +1505,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, 0, /* page size index = 0 i.e. SZ_4K */ 0, /* kind */ 0, /* ctag_offset */ - flags, rw_flag, false); + flags, rw_flag, false, false); mutex_unlock(&vm->update_gmmu_lock); if (!vaddr) { gk20a_err(dev_from_vm(vm), "failed to allocate va space"); @@ -1642,7 +1592,8 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, size, 0, /* page size 4K */ true, /*va_allocated */ - rw_flag); + rw_flag, + false); mutex_unlock(&vm->update_gmmu_lock); } @@ -1748,157 +1699,6 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); } -static int update_gmmu_ptes_locked(struct vm_gk20a *vm, - enum gmmu_pgsz_gk20a pgsz_idx, - struct sg_table *sgt, - u64 buffer_offset, - u64 first_vaddr, u64 last_vaddr, - u8 kind_v, u32 ctag_offset, - bool cacheable, - int rw_flag) -{ - int err; - u32 pde_lo, pde_hi, pde_i; - struct scatterlist *cur_chunk; - unsigned int cur_offset; - u32 pte_w[2] = {0, 0}; /* invalid pte */ - struct gk20a *g = gk20a_from_vm(vm); - u32 ctag_granularity = g->ops.fb.compression_page_size(g); - u32 ctag = ctag_offset * ctag_granularity; - u32 ctag_incr; - u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; - u64 addr = 0; - u64 space_to_skip = buffer_offset; - - pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, - &pde_lo, &pde_hi); - - gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", - pgsz_idx, pde_lo, pde_hi); - - ctag_incr = ctag_offset ? page_size : 0; - - cur_offset = 0; - if (sgt) { - cur_chunk = sgt->sgl; - /* space_to_skip must be page aligned */ - BUG_ON(space_to_skip & (page_size - 1)); - - while (space_to_skip > 0 && cur_chunk) { - u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk); - if (new_addr) { - addr = new_addr; - addr += cur_offset; - } - cur_offset += page_size; - addr += page_size; - while (cur_chunk && - cur_offset >= cur_chunk->length) { - cur_offset -= cur_chunk->length; - cur_chunk = sg_next(cur_chunk); - } - space_to_skip -= page_size; - } - } - else - cur_chunk = NULL; - - for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { - u32 pte_lo, pte_hi; - u32 pte_cur; - - struct gk20a_mm_entry *entry = vm->pdb.entries + pde_i; - - if (pde_i == pde_lo) - pte_lo = pte_index_from_vaddr(vm, first_vaddr, - pgsz_idx); - else - pte_lo = 0; - - if ((pde_i != pde_hi) && (pde_hi != pde_lo)) - pte_hi = vm->page_table_sizing[pgsz_idx].num_ptes-1; - else - pte_hi = pte_index_from_vaddr(vm, last_vaddr, - pgsz_idx); - - /* get cpu access to the ptes */ - err = map_gmmu_pages(entry); - if (err) { - gk20a_err(dev_from_vm(vm), - "couldn't map ptes for update as=%d", - vm_aspace_id(vm)); - goto clean_up; - } - - gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); - for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { - if (likely(sgt)) { - u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, - cur_chunk); - if (new_addr) { - addr = new_addr; - addr += cur_offset; - } - pte_w[0] = gmmu_pte_valid_true_f() | - gmmu_pte_address_sys_f(addr - >> gmmu_pte_address_shift_v()); - pte_w[1] = gmmu_pte_aperture_video_memory_f() | - gmmu_pte_kind_f(kind_v) | - gmmu_pte_comptagline_f(ctag - / ctag_granularity); - - if (rw_flag == gk20a_mem_flag_read_only) { - pte_w[0] |= gmmu_pte_read_only_true_f(); - pte_w[1] |= - gmmu_pte_write_disable_true_f(); - } else if (rw_flag == - gk20a_mem_flag_write_only) { - pte_w[1] |= - gmmu_pte_read_disable_true_f(); - } - if (!cacheable) - pte_w[1] |= gmmu_pte_vol_true_f(); - - gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d" - " ctag=%d vol=%d" - " [0x%08x,0x%08x]", - pte_cur, hi32(addr), lo32(addr), - kind_v, ctag, !cacheable, - pte_w[1], pte_w[0]); - ctag += ctag_incr; - cur_offset += page_size; - addr += page_size; - while (cur_chunk && - cur_offset >= cur_chunk->length) { - cur_offset -= cur_chunk->length; - cur_chunk = sg_next(cur_chunk); - } - - } else { - gk20a_dbg(gpu_dbg_pte, - "pte_cur=%d [0x0,0x0]", - pte_cur); - } - - gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); - gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); - } - - unmap_gmmu_pages(entry); - } - - smp_mb(); - - return 0; - -clean_up: - /*TBD: potentially rewrite above to pre-map everything it needs to - * as that's the only way it can fail */ - return err; - -} - - /* for gk20a the "video memory" apertures here are misnomers. */ static inline u32 big_valid_pde0_bits(u64 pte_addr) { @@ -1908,6 +1708,7 @@ static inline u32 big_valid_pde0_bits(u64 pte_addr) (u32)(pte_addr >> gmmu_pde_address_shift_v())); return pde0_bits; } + static inline u32 small_valid_pde1_bits(u64 pte_addr) { u32 pde1_bits = @@ -1924,10 +1725,15 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr) made. So, superfluous updates will cause unnecessary pde invalidations. */ -void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) +int update_gmmu_pde_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, int rw_flag, bool sparse) { bool small_valid, big_valid; - u64 pte_addr[2] = {0, 0}; + u64 pte_addr_small = 0, pte_addr_big = 0; struct gk20a_mm_entry *entry = vm->pdb.entries + i; u32 pde_v[2] = {0, 0}; u32 *pde; @@ -1938,44 +1744,227 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) big_valid = entry->size && entry->pgsz == gmmu_page_size_big; if (small_valid) - pte_addr[gmmu_page_size_small] = - gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); + pte_addr_small = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); if (big_valid) - pte_addr[gmmu_page_size_big] = - gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); + pte_addr_big = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); pde_v[0] = gmmu_pde_size_full_f(); - pde_v[0] |= big_valid ? - big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) - : + pde_v[0] |= big_valid ? big_valid_pde0_bits(pte_addr_big) : (gmmu_pde_aperture_big_invalid_f()); pde_v[1] |= (small_valid ? - small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) - : + small_valid_pde1_bits(pte_addr_small) : (gmmu_pde_aperture_small_invalid_f() | - gmmu_pde_vol_small_false_f()) - ) - | - (big_valid ? (gmmu_pde_vol_big_true_f()) : - gmmu_pde_vol_big_false_f()); + gmmu_pde_vol_small_false_f())) + | + (big_valid ? (gmmu_pde_vol_big_true_f()) : + gmmu_pde_vol_big_false_f()); pde = pde_from_index(vm, i); gk20a_mem_wr32(pde, 0, pde_v[0]); gk20a_mem_wr32(pde, 1, pde_v[1]); - smp_mb(); + gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", + i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); + return 0; +} + +int update_gmmu_pte_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, int rw_flag, bool sparse) +{ + struct gk20a *g = gk20a_from_vm(vm); + u32 ctag_granularity = g->ops.fb.compression_page_size(g); + u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; + u32 pte_w[2] = {0, 0}; /* invalid pte */ + + if (iova) { + pte_w[0] = gmmu_pte_valid_true_f() | + gmmu_pte_address_sys_f(iova + >> gmmu_pte_address_shift_v()); + pte_w[1] = gmmu_pte_aperture_video_memory_f() | + gmmu_pte_kind_f(kind_v) | + gmmu_pte_comptagline_f(*ctag / ctag_granularity); + + if (rw_flag == gk20a_mem_flag_read_only) { + pte_w[0] |= gmmu_pte_read_only_true_f(); + pte_w[1] |= + gmmu_pte_write_disable_true_f(); + } else if (rw_flag == + gk20a_mem_flag_write_only) { + pte_w[1] |= + gmmu_pte_read_disable_true_f(); + } + if (!cacheable) + pte_w[1] |= gmmu_pte_vol_true_f(); + + gk20a_dbg(gpu_dbg_pte, + "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", + i, iova, + kind_v, *ctag, !cacheable, + pte_w[1], pte_w[0]); + + if (*ctag) + *ctag += page_size; + } else if (sparse) { + pte_w[0] = gmmu_pte_valid_false_f(); + pte_w[1] |= gmmu_pte_vol_true_f(); + } else { + gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); + } + + gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); + gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); + + return 0; +} + +static int update_gmmu_level_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + enum gmmu_pgsz_gk20a pgsz_idx, + u64 iova, + u64 gpu_va, u64 gpu_end, + u8 kind_v, u32 *ctag, + bool cacheable, + int rw_flag, + bool sparse, + int lvl) +{ + const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; + const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; + int err = 0; + u32 pde_i; + u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; + + gk20a_dbg_fn(""); + + pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) + >> (u64)l->lo_bit[pgsz_idx]; + + gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", + pgsz_idx, lvl, gpu_va, gpu_end-1, iova); + + while (gpu_va < gpu_end) { + struct gk20a_mm_entry *next_pte = NULL; + u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); + + /* Allocate next level */ + if (next_l->update_entry) { + if (!pte->entries) { + int num_entries = + 1 << + (l->hi_bit[pgsz_idx] + - l->lo_bit[pgsz_idx]); + pte->entries = + kzalloc(sizeof(struct gk20a_mm_entry) * + num_entries, GFP_KERNEL); + pte->pgsz = pgsz_idx; + if (!pte->entries) + return -ENOMEM; + } + next_pte = pte->entries + pde_i; + + if (!next_pte->size) { + err = gk20a_zalloc_gmmu_page_table(vm, + pgsz_idx, next_l, next_pte); + if (err) + return err; + } + } + + err = l->update_entry(vm, pte, pde_i, pgsz_idx, + iova, kind_v, ctag, cacheable, + rw_flag, sparse); + if (err) + return err; + + if (next_l->update_entry) { + /* get cpu access to the ptes */ + err = map_gmmu_pages(next_pte); + if (err) { + gk20a_err(dev_from_vm(vm), + "couldn't map ptes for update as=%d", + vm_aspace_id(vm)); + return err; + } + err = update_gmmu_level_locked(vm, next_pte, + pgsz_idx, + iova, + gpu_va, + next, + kind_v, ctag, + cacheable, rw_flag, sparse, lvl+1); + unmap_gmmu_pages(next_pte); + + if (err) + return err; + } + + if (iova) + iova += next - gpu_va; + pde_i++; + gpu_va = next; + } + + gk20a_dbg_fn("done"); + + return 0; +} + +static int update_gmmu_ptes_locked(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + struct sg_table *sgt, + u64 buffer_offset, + u64 gpu_va, u64 gpu_end, + u8 kind_v, u32 ctag_offset, + bool cacheable, + int rw_flag, + bool sparse) +{ + struct gk20a *g = gk20a_from_vm(vm); + int ctag_granularity = g->ops.fb.compression_page_size(g); + u32 ctag = ctag_offset * ctag_granularity; + u64 iova = 0; + u64 space_to_skip = buffer_offset; + u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; + int err; + + gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", + pgsz_idx, + sgt ? gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) : 0ULL); - FLUSH_CPU_DCACHE(pde, - sg_phys(vm->pdb.sgt->sgl) + (i*gmmu_pde__size_v()), - sizeof(u32)*2); + if (space_to_skip & (page_size - 1)) + return -EINVAL; + + if (sgt) + iova = gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) + space_to_skip; - gk20a_mm_l2_invalidate(vm->mm->g); + gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", + pgsz_idx, gpu_va, gpu_end-1, iova); + err = map_gmmu_pages(&vm->pdb); + if (err) { + gk20a_err(dev_from_vm(vm), + "couldn't map ptes for update as=%d", + vm_aspace_id(vm)); + return err; + } + err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, + iova, + gpu_va, gpu_end, + kind_v, &ctag, + cacheable, rw_flag, sparse, 0); + unmap_gmmu_pages(&vm->pdb); + + smp_mb(); - gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); gk20a_dbg_fn("done"); + + return err; } /* NOTE! mapped_buffers lock must be held */ @@ -1984,29 +1973,14 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) struct vm_gk20a *vm = mapped_buffer->vm; struct gk20a *g = vm->mm->g; - if (mapped_buffer->va_node && - mapped_buffer->va_node->sparse) { - u64 vaddr = mapped_buffer->addr; - u32 pgsz_idx = mapped_buffer->pgsz_idx; - u32 num_pages = mapped_buffer->size >> - ilog2(vm->gmmu_page_sizes[pgsz_idx]); - - /* there is little we can do if this fails... */ - g->ops.mm.gmmu_unmap(vm, - mapped_buffer->addr, - mapped_buffer->size, - mapped_buffer->pgsz_idx, - mapped_buffer->va_allocated, - gk20a_mem_flag_none); - g->ops.mm.set_sparse(vm, vaddr, - num_pages, pgsz_idx, false); - } else - g->ops.mm.gmmu_unmap(vm, - mapped_buffer->addr, - mapped_buffer->size, - mapped_buffer->pgsz_idx, - mapped_buffer->va_allocated, - gk20a_mem_flag_none); + g->ops.mm.gmmu_unmap(vm, + mapped_buffer->addr, + mapped_buffer->size, + mapped_buffer->pgsz_idx, + mapped_buffer->va_allocated, + gk20a_mem_flag_none, + mapped_buffer->va_node ? + mapped_buffer->va_node->sparse : false); gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", vm_aspace_id(vm), @@ -2057,7 +2031,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) struct vm_reserved_va_node *va_node, *va_node_tmp; struct rb_node *node; int i; - u32 pde_lo, pde_hi; + u32 pde_lo = 0, pde_hi = 0; gk20a_dbg_fn(""); mutex_lock(&vm->update_gmmu_lock); @@ -2082,7 +2056,8 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) /* unmapping all buffers above may not actually free * all vm ptes. jettison them here for certain... */ - pde_range_from_vaddr_range(vm, 0, vm->va_limit-1, + pde_range_from_vaddr_range(vm, + 0, vm->va_limit-1, &pde_lo, &pde_hi); for (i = 0; i < pde_hi + 1; i++) { struct gk20a_mm_entry *entry = &vm->pdb.entries[i]; @@ -2125,6 +2100,30 @@ void gk20a_vm_put(struct vm_gk20a *vm) kref_put(&vm->ref, gk20a_vm_remove_support_kref); } +const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { + {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, + .lo_bit = {26, 26}, + .update_entry = update_gmmu_pde_locked, + .entry_size = 8}, + {.hi_bit = {25, 25}, + .lo_bit = {12, 16}, + .update_entry = update_gmmu_pte_locked, + .entry_size = 8}, + {.update_entry = NULL} +}; + +const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { + {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, + .lo_bit = {27, 27}, + .update_entry = update_gmmu_pde_locked, + .entry_size = 8}, + {.hi_bit = {26, 26}, + .lo_bit = {12, 17}, + .update_entry = update_gmmu_pte_locked, + .entry_size = 8}, + {.update_entry = NULL} +}; + int gk20a_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, u32 big_page_size, @@ -2149,38 +2148,18 @@ int gk20a_init_vm(struct mm_gk20a *mm, vm->big_pages = big_pages; vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; - vm->pde_stride = vm->big_page_size << 10; - vm->pde_stride_shift = ilog2(vm->pde_stride); - for (i = 0; i < gmmu_nr_page_sizes; i++) { - u32 num_ptes, pte_space, num_pages; + vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, + vm->big_page_size); + for (i = 0; i < gmmu_nr_page_sizes; i++) vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; - /* assuming "full" page tables */ - num_ptes = vm->pde_stride / gmmu_page_sizes[i]; - - pte_space = num_ptes * gmmu_pte__size_v(); - /* allocate whole pages */ - pte_space = roundup(pte_space, PAGE_SIZE); + gk20a_dbg_info("small page-size (%dKB)", + vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); - num_pages = pte_space / PAGE_SIZE; - /* make sure "order" is viable */ - BUG_ON(!is_power_of_2(num_pages)); - - vm->page_table_sizing[i].num_ptes = num_ptes; - vm->page_table_sizing[i].order = ilog2(num_pages); - } - - gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", - vm->gmmu_page_sizes[gmmu_page_size_small] >> 10, - (vm->page_table_sizing[gmmu_page_size_small].num_ptes * - gmmu_pte__size_v()) >> 10); - - gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", - vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, - (vm->page_table_sizing[gmmu_page_size_big].num_ptes * - gmmu_pte__size_v()) >> 10); + gk20a_dbg_info("big page-size (%dKB)", + vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); pde_range_from_vaddr_range(vm, 0, vm->va_limit-1, @@ -2197,7 +2176,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, name, vm->va_limit, pde_hi + 1); /* allocate the page table directory */ - err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->pdb); + err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], &vm->pdb); if (err) goto clean_up_ptes; @@ -2382,9 +2361,18 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, /* mark that we need to use sparse mappings here */ if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { - err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, - pgsz_idx, true); - if (err) { + u64 map_offset = g->ops.mm.gmmu_map(vm, vaddr_start, + NULL, + 0, + va_node->size, + pgsz_idx, + 0, + 0, + args->flags, + gk20a_mem_flag_none, + false, + true); + if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); vma->free(vma, start_page_nr, args->pages, 1); kfree(va_node); @@ -2462,7 +2450,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, va_node->size, va_node->pgsz_idx, true, - gk20a_mem_flag_none); + gk20a_mem_flag_none, + true); kfree(va_node); } mutex_unlock(&vm->update_gmmu_lock); @@ -2741,13 +2730,25 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm) return 0; } +void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) +{ + u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + u32 pdb_addr_hi = u64_hi32(pdb_addr); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pdb_addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(pdb_addr_hi)); +} + void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, u32 big_page_size) { struct gk20a *g = gk20a_from_vm(vm); u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); - u32 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); - u32 pde_addr_hi = u64_hi32(pde_addr); phys_addr_t inst_pa = inst_block->cpu_pa; void *inst_ptr = inst_block->cpuva; @@ -2756,13 +2757,7 @@ void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), - ram_in_page_dir_base_target_vid_mem_f() | - ram_in_page_dir_base_vol_true_f() | - ram_in_page_dir_base_lo_f(pde_addr_lo)); - - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), - ram_in_page_dir_base_hi_f(pde_addr_hi)); + g->ops.mm.init_pdb(g, inst_ptr, pde_addr); gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), u64_lo32(vm->va_limit) | 0xFFF); @@ -3030,6 +3025,13 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) return 34; } +const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, + u32 big_page_size) +{ + return (big_page_size == SZ_64K) ? + gk20a_mm_levels_64k : gk20a_mm_levels_128k; +} + void gk20a_init_mm(struct gpu_ops *gops) { gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; @@ -3043,5 +3045,7 @@ void gk20a_init_mm(struct gpu_ops *gops) gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; + gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; + gops->mm.init_pdb = gk20a_mm_init_pdb; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7b355436..42c164be 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -268,6 +268,18 @@ struct vm_reserved_va_node { bool sparse; }; +struct gk20a_mmu_level { + int hi_bit[2]; + int lo_bit[2]; + int (*update_entry)(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, int rw_flag, bool sparse); + size_t entry_size; +}; + struct vm_gk20a { struct mm_gk20a *mm; struct gk20a_as_share *as_share; /* as_share this represents */ @@ -282,13 +294,8 @@ struct vm_gk20a { bool mapped; u32 big_page_size; - u32 pde_stride; - u32 pde_stride_shift; - struct { - u32 order; - u32 num_ptes; - } page_table_sizing[gmmu_nr_page_sizes]; + const struct gk20a_mmu_level *mmu_levels; struct kref ref; @@ -450,7 +457,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u32 ctag_offset, u32 flags, int rw_flag, - bool clear_ctags); + bool clear_ctags, + bool sparse); void gk20a_gmmu_unmap(struct vm_gk20a *vm, u64 vaddr, @@ -462,7 +470,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, u64 size, int pgsz_idx, bool va_allocated, - int rw_flag); + int rw_flag, + bool sparse); struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, @@ -557,13 +566,10 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry); void pde_range_from_vaddr_range(struct vm_gk20a *vm, u64 addr_lo, u64 addr_hi, u32 *pde_lo, u32 *pde_hi); +int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); u32 *pde_from_index(struct vm_gk20a *vm, u32 i); u32 pte_index_from_vaddr(struct vm_gk20a *vm, u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); -int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, - u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx); - -void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); void free_gmmu_pages(struct vm_gk20a *vm, struct gk20a_mm_entry *entry); @@ -571,4 +577,11 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); struct gpu_ops; void gk20a_init_mm(struct gpu_ops *gops); +const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, + u32 big_page_size); +void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); + +extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; +extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; + #endif /* MM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 37ab70fa..f85a1718 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -21,152 +21,6 @@ #include "hw_gr_gm20b.h" #include "hw_ram_gm20b.h" -static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, - enum gmmu_pgsz_gk20a pgsz_idx, - u64 first_vaddr, u64 last_vaddr, - bool clear, bool refplus) -{ - int err; - u32 pte_lo, pte_hi; - u32 pde_lo, pde_hi; - u32 pte_w[2] = {0, 0}; /* invalid pte */ - u64 addr = 0; - u32 pte_cur; - struct gk20a_mm_entry *entry; - struct gk20a *g = gk20a_from_vm(vm); - - gk20a_dbg_fn(""); - - pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, - &pde_lo, &pde_hi); - - gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", - pgsz_idx, pde_lo, pde_hi); - - /* Expect ptes of the same pde */ - BUG_ON(pde_lo != pde_hi); - - entry = vm->pdb.entries + pde_lo; - - pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); - pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); - - /* get cpu access to the ptes */ - err = map_gmmu_pages(entry); - if (err) - goto fail; - - gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); - for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { - pte_w[0] = gmmu_pte_valid_false_f(); - pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); - - gk20a_dbg(gpu_dbg_pte, - "pte_cur=%d addr=%llx" - " [0x%08x,0x%08x]", - pte_cur, addr, - pte_w[1], pte_w[0]); - - gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); - gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); - } - - unmap_gmmu_pages(entry); - - smp_mb(); - g->ops.mm.tlb_invalidate(vm); - - return 0; -fail: - return err; - -} - -static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, - u64 vaddr_hi, u32 pde) -{ - u64 pde_vaddr_lo, pde_vaddr_hi; - - gk20a_dbg_fn(""); - - pde_vaddr_lo = (u64)pde << vm->pde_stride_shift; - pde_vaddr_hi = pde_vaddr_lo | - ((0x1UL << (vm->pde_stride_shift)) - 1); - - return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); -} - -static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, - u32 num_pages, u32 pgsz_idx, bool refplus) -{ - struct mm_gk20a *mm = vm->mm; - u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; - u32 pde_shift = vm->pde_stride_shift; - u64 vaddr_hi; - u64 vaddr_pde_start; - u32 i; - u32 pde_lo, pde_hi; - int err; - - gk20a_dbg_fn(""); - - vaddr_hi = vaddr + pgsz * (u64)num_pages - 1; - pde_range_from_vaddr_range(vm, - vaddr, - vaddr_hi, - &pde_lo, &pde_hi); - - gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " - "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", - vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, - vm->pde_stride_shift); - - for (i = pde_lo; i <= pde_hi; i++) { - /* Mark all ptes as sparse. */ - err = validate_gmmu_page_table_gk20a_locked(vm, i, - pgsz_idx); - if (err) { - gk20a_err(dev_from_vm(vm), - "failed to validate page table %d: %d", - i, err); - goto fail; - } - - if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { - /* entire pde is marked as sparse */ - vaddr_pde_start = (u64)i << pde_shift; - allocate_gmmu_ptes_sparse(vm, pgsz_idx, - vaddr_pde_start, - PDE_ADDR_END(vaddr_pde_start, - pde_shift), false, refplus); - } else { - /* Check leading and trailing spaces which doesn't fit - * into entire pde. */ - if (pde_lo == pde_hi) - allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, - vaddr_hi, false, refplus); - else if (i == pde_lo) - allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, - PDE_ADDR_END(vaddr, pde_shift), false, - refplus); - else - allocate_gmmu_ptes_sparse(vm, pgsz_idx, - PDE_ADDR_START(vaddr_hi, pde_shift), - vaddr_hi, false, - refplus); - } - } - - gk20a_mm_l2_flush(mm->g, true); - - return 0; - -fail: - WARN_ON(1); - - return err; -} - static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, const unsigned int msec) { @@ -249,9 +103,14 @@ static u32 gm20b_mm_get_big_page_sizes(void) return SZ_64K | SZ_128K; } +static bool gm20b_mm_support_sparse(struct gk20a *g) +{ + return true; +} + void gm20b_init_mm(struct gpu_ops *gops) { - gops->mm.set_sparse = gm20b_vm_put_sparse; + gops->mm.support_sparse = gm20b_mm_support_sparse; gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; gops->mm.gmmu_map = gk20a_locked_gmmu_map; gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; @@ -265,4 +124,6 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; + gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; + gops->mm.init_pdb = gk20a_mm_init_pdb; } diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 57814f1b..9b7c7dbd 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -40,8 +40,8 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g) /* gk20a_init_gpu_characteristics expects this to be populated */ vm->big_page_size = big_page_size; - vm->pde_stride = vm->big_page_size << 10; - vm->pde_stride_shift = ilog2(vm->pde_stride); + vm->mmu_levels = (vm->big_page_size == SZ_64K) ? + gk20a_mm_levels_64k : gk20a_mm_levels_128k; mm->sw_ready = true; @@ -65,7 +65,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, u32 ctag_offset, u32 flags, int rw_flag, - bool clear_ctags) + bool clear_ctags, + bool sparse) { int err = 0; struct device *d = dev_from_vm(vm); @@ -128,7 +129,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, u64 size, int pgsz_idx, bool va_allocated, - int rw_flag) + int rw_flag, + bool sparse) { struct gk20a *g = gk20a_from_vm(vm); struct gk20a_platform *platform = gk20a_get_platform(g->dev); -- cgit v1.2.2