From fbafc7eba41ba7654dfdadf51a53acf1638e9fa1 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 1 May 2017 16:12:16 -0700 Subject: gpu: nvgpu: Refactor VM init/cleanup Refactor the API for initializing and cleaning up VMs. This also involved moving a bunch of GMMU code out into the gmmu code since part of initializing a VM involves initializing the page tables for the VM. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I4710f08c26a6e39806f0762a35f6db5c94b64c50 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1477746 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/common/mm/gmmu.c | 138 +++++++++ drivers/gpu/nvgpu/common/mm/vm.c | 344 +++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 485 +------------------------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 - drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 6 + drivers/gpu/nvgpu/include/nvgpu/vm.h | 6 +- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 4 +- 9 files changed, 493 insertions(+), 498 deletions(-) diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index a2ed3f3a..695347bc 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -15,12 +15,150 @@ */ #include +#include #include #include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, + struct gk20a_mm_entry *entry) +{ + u32 num_pages = 1 << order; + u32 len = num_pages * PAGE_SIZE; + int err; + struct page *pages; + struct gk20a *g = vm->mm->g; + + /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ + + pages = alloc_pages(GFP_KERNEL, order); + if (!pages) { + nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed"); + goto err_out; + } + entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); + if (!entry->mem.priv.sgt) { + nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table"); + goto err_alloced; + } + err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); + if (err) { + nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed"); + goto err_sg_table; + } + sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); + entry->mem.cpu_va = page_address(pages); + memset(entry->mem.cpu_va, 0, len); + entry->mem.size = len; + entry->mem.aperture = APERTURE_SYSMEM; + FLUSH_CPU_DCACHE(entry->mem.cpu_va, + sg_phys(entry->mem.priv.sgt->sgl), len); + + return 0; + +err_sg_table: + nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); +err_alloced: + __free_pages(pages, order); +err_out: + return -ENOMEM; +} + +static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, + struct gk20a_mm_entry *entry) +{ + struct gk20a *g = gk20a_from_vm(vm); + u32 num_pages = 1 << order; + u32 len = num_pages * PAGE_SIZE; + int err; + + if (g->is_fmodel) + return alloc_gmmu_phys_pages(vm, order, entry); + + /* + * On arm32 we're limited by vmalloc space, so we do not map pages by + * default. + */ + if (IS_ENABLED(CONFIG_ARM64)) + err = nvgpu_dma_alloc(g, len, &entry->mem); + else + err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, + len, &entry->mem); + + + if (err) { + nvgpu_err(g, "memory allocation failed"); + return -ENOMEM; + } + + return 0; +} + +/* + * Allocate a phys contig region big enough for a full + * sized gmmu page table for the given gmmu_page_size. + * the whole range is zeroed so it's "invalid"/will fault. + * + * If a previous entry is supplied, its memory will be used for + * suballocation for this next entry too, if there is space. + */ +int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + const struct gk20a_mmu_level *l, + struct gk20a_mm_entry *entry, + struct gk20a_mm_entry *prev_entry) +{ + int err = -ENOMEM; + int order; + struct gk20a *g = gk20a_from_vm(vm); + u32 bytes; + + /* allocate enough pages for the table */ + order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; + order += ilog2(l->entry_size); + bytes = 1 << order; + order -= PAGE_SHIFT; + if (order < 0 && prev_entry) { + /* try to suballocate from previous chunk */ + u32 capacity = prev_entry->mem.size / bytes; + u32 prev = prev_entry->woffset * sizeof(u32) / bytes; + u32 free = capacity - prev - 1; + + nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d", + capacity, prev, free, bytes); + + if (free) { + memcpy(&entry->mem, &prev_entry->mem, + sizeof(entry->mem)); + entry->woffset = prev_entry->woffset + + bytes / sizeof(u32); + err = 0; + } + } + + if (err) { + /* no suballoc space */ + order = max(0, order); + err = nvgpu_alloc_gmmu_pages(vm, order, entry); + entry->woffset = 0; + } + + nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", + entry, + (entry->mem.priv.sgt && + entry->mem.aperture == APERTURE_SYSMEM) ? + g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, + order, entry->woffset); + if (err) + return err; + entry->pgsz = pgsz_idx; + entry->mem.skip_wmb = true; + + return err; +} + /* * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU * VA will be allocated for you. If addr is non-zero then the buffer will be diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3b3b7a10..e42c7c5a 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -14,6 +14,8 @@ * along with this program. If not, see . */ +#include +#include #include #include #include @@ -23,6 +25,7 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +#include "gk20a/platform_gk20a.h" int vm_aspace_id(struct vm_gk20a *vm) { @@ -104,6 +107,341 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, nvgpu_mutex_release(&vm->update_gmmu_lock); } +static int nvgpu_vm_init_page_tables(struct vm_gk20a *vm) +{ + u32 pde_lo, pde_hi; + int err; + + pde_range_from_vaddr_range(vm, + 0, vm->va_limit-1, + &pde_lo, &pde_hi); + vm->pdb.entries = nvgpu_vzalloc(vm->mm->g, + sizeof(struct gk20a_mm_entry) * + (pde_hi + 1)); + vm->pdb.num_entries = pde_hi + 1; + + if (!vm->pdb.entries) + return -ENOMEM; + + err = nvgpu_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], + &vm->pdb, NULL); + if (err) { + nvgpu_vfree(vm->mm->g, vm->pdb.entries); + return err; + } + + return 0; +} + +/* + * Determine if the passed address space can support big pages or not. + */ +int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) +{ + u64 mask = ((u64)vm->big_page_size << 10) - 1; + + if (base & mask || size & mask) + return 0; + return 1; +} + +/* + * Initialize a semaphore pool. Just return successfully if we do not need + * semaphores (i.e when sync-pts are active). + */ +static int nvgpu_init_sema_pool(struct vm_gk20a *vm) +{ + struct nvgpu_semaphore_sea *sema_sea; + struct mm_gk20a *mm = vm->mm; + struct gk20a *g = mm->g; + int err; + + /* + * Don't waste the memory on semaphores if we don't need them. + */ + if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) + return 0; + + if (vm->sema_pool) + return 0; + + sema_sea = nvgpu_semaphore_sea_create(g); + if (!sema_sea) + return -ENOMEM; + + vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea); + if (!vm->sema_pool) + return -ENOMEM; + + /* + * Allocate a chunk of GPU VA space for mapping the semaphores. We will + * do a fixed alloc in the kernel VM so that all channels have the same + * RO address range for the semaphores. + * + * !!! TODO: cleanup. + */ + sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, + vm->va_limit - + mm->channel.kernel_size, + 512 * PAGE_SIZE, + SZ_4K); + if (!sema_sea->gpu_va) { + nvgpu_free(&vm->kernel, sema_sea->gpu_va); + nvgpu_vm_put(vm); + return -ENOMEM; + } + + err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); + if (err) { + nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); + nvgpu_free(vm->vma[gmmu_page_size_small], + vm->sema_pool->gpu_va); + return err; + } + + return 0; +} + +/** + * nvgpu_init_vm() - Initialize an address space. + * + * @mm - Parent MM. + * @vm - The VM to init. + * @big_page_size - Size of big pages associated with this VM. + * @low_hole - The size of the low hole (unaddressable memory at the bottom of + * the address space. + * @kernel_reserved - Space reserved for kernel only allocations. + * @aperture_size - Total size of the aperture. + * @big_pages - Ignored. Will be set based on other passed params. + * @name - Name of the address space. + * + * This function initializes an address space according to the following map: + * + * +--+ 0x0 + * | | + * +--+ @low_hole + * | | + * ~ ~ This is the "user" section. + * | | + * +--+ @aperture_size - @kernel_reserved + * | | + * ~ ~ This is the "kernel" section. + * | | + * +--+ @aperture_size + * + * The user section is therefor what ever is left over after the @low_hole and + * @kernel_reserved memory have been portioned out. The @kernel_reserved is + * always persent at the top of the memory space and the @low_hole is always at + * the bottom. + * + * For certain address spaces a "user" section makes no sense (bar1, etc) so in + * such cases the @kernel_reserved and @low_hole should sum to exactly + * @aperture_size. + */ +int nvgpu_init_vm(struct mm_gk20a *mm, + struct vm_gk20a *vm, + u32 big_page_size, + u64 low_hole, + u64 kernel_reserved, + u64 aperture_size, + bool big_pages, + bool userspace_managed, + char *name) +{ + int err; + char alloc_name[32]; + u64 kernel_vma_flags; + u64 user_vma_start, user_vma_limit; + u64 user_lp_vma_start, user_lp_vma_limit; + u64 kernel_vma_start, kernel_vma_limit; + struct gk20a *g = mm->g; + struct gk20a_platform *p = gk20a_get_platform(g->dev); + + if (WARN_ON(kernel_reserved + low_hole > aperture_size)) + return -ENOMEM; + + nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, " + "LP size=0x%x lowhole=0x%llx", + name, aperture_size, + (unsigned int)big_page_size, low_hole); + + vm->mm = mm; + + vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K; + vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size; + vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K; + + /* Set up vma pointers. */ + vm->vma[gmmu_page_size_small] = &vm->user; + vm->vma[gmmu_page_size_big] = &vm->user; + vm->vma[gmmu_page_size_kernel] = &vm->kernel; + if (!p->unify_address_spaces) + vm->vma[gmmu_page_size_big] = &vm->user_lp; + + vm->va_start = low_hole; + vm->va_limit = aperture_size; + vm->big_pages = big_pages; + + vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big]; + vm->userspace_managed = userspace_managed; + vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size); + + /* Initialize the page table data structures. */ + err = nvgpu_vm_init_page_tables(vm); + if (err) + return err; + + /* Setup vma limits. */ + if (kernel_reserved + low_hole < aperture_size) { + if (p->unify_address_spaces) { + user_vma_start = low_hole; + user_vma_limit = vm->va_limit - kernel_reserved; + user_lp_vma_start = user_vma_limit; + user_lp_vma_limit = user_vma_limit; + } else { + user_vma_start = low_hole; + user_vma_limit = __nv_gmmu_va_small_page_limit(); + user_lp_vma_start = __nv_gmmu_va_small_page_limit(); + user_lp_vma_limit = vm->va_limit - kernel_reserved; + } + } else { + user_vma_start = 0; + user_vma_limit = 0; + user_lp_vma_start = 0; + user_lp_vma_limit = 0; + } + kernel_vma_start = vm->va_limit - kernel_reserved; + kernel_vma_limit = vm->va_limit; + + nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)", + user_vma_start, user_vma_limit); + nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)", + user_lp_vma_start, user_lp_vma_limit); + nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)", + kernel_vma_start, kernel_vma_limit); + + if (WARN_ON(user_vma_start > user_vma_limit) || + WARN_ON(user_lp_vma_start > user_lp_vma_limit) || + WARN_ON(kernel_vma_start >= kernel_vma_limit)) { + err = -EINVAL; + goto clean_up_page_tables; + } + + kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ? + 0 : GPU_ALLOC_GVA_SPACE; + + /* + * A "user" area only makes sense for the GVA spaces. For VMs where + * there is no "user" area user_vma_start will be equal to + * user_vma_limit (i.e a 0 sized space). In such a situation the kernel + * area must be non-zero in length. + */ + if (user_vma_start >= user_vma_limit && + kernel_vma_start >= kernel_vma_limit) { + err = -EINVAL; + goto clean_up_page_tables; + } + + /* + * Determine if big pages are possible in this VM. If a split address + * space is used then check the user_lp vma instead of the user vma. + */ + if (p->unify_address_spaces) + vm->big_pages = nvgpu_big_pages_possible(vm, user_vma_start, + user_vma_limit - user_vma_start); + else + vm->big_pages = nvgpu_big_pages_possible(vm, user_lp_vma_start, + user_lp_vma_limit - user_lp_vma_start); + + /* + * User VMA. + */ + if (user_vma_start < user_vma_limit) { + snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); + err = __nvgpu_buddy_allocator_init(g, &vm->user, + vm, alloc_name, + user_vma_start, + user_vma_limit - + user_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE); + if (err) + goto clean_up_page_tables; + } else { + /* + * Make these allocator pointers point to the kernel allocator + * since we still use the legacy notion of page size to choose + * the allocator. + */ + vm->vma[0] = &vm->kernel; + vm->vma[1] = &vm->kernel; + } + + /* + * User VMA for large pages when a split address range is used. + */ + if (user_lp_vma_start < user_lp_vma_limit) { + snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name); + err = __nvgpu_buddy_allocator_init(g, &vm->user_lp, + vm, alloc_name, + user_lp_vma_start, + user_lp_vma_limit - + user_lp_vma_start, + vm->big_page_size, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE); + if (err) + goto clean_up_allocators; + } + + /* + * Kernel VMA. Must always exist for an address space. + */ + snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); + err = __nvgpu_buddy_allocator_init(g, &vm->kernel, + vm, alloc_name, + kernel_vma_start, + kernel_vma_limit - kernel_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + kernel_vma_flags); + if (err) + goto clean_up_allocators; + + vm->mapped_buffers = NULL; + + nvgpu_mutex_init(&vm->update_gmmu_lock); + kref_init(&vm->ref); + nvgpu_init_list_node(&vm->vm_area_list); + + /* + * This is only necessary for channel address spaces. The best way to + * distinguish channel address spaces from other address spaces is by + * size - if the address space is 4GB or less, it's not a channel. + */ + if (vm->va_limit > SZ_4G) { + err = nvgpu_init_sema_pool(vm); + if (err) + goto clean_up_allocators; + } + + return 0; + +clean_up_allocators: + if (nvgpu_alloc_initialized(&vm->kernel)) + nvgpu_alloc_destroy(&vm->kernel); + if (nvgpu_alloc_initialized(&vm->user)) + nvgpu_alloc_destroy(&vm->user); + if (nvgpu_alloc_initialized(&vm->user_lp)) + nvgpu_alloc_destroy(&vm->user_lp); +clean_up_page_tables: + /* Cleans up nvgpu_vm_init_page_tables() */ + nvgpu_vfree(g, vm->pdb.entries); + free_gmmu_pages(vm, &vm->pdb); + return err; +} + void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) { struct nvgpu_mapped_buf *mapped_buffer; @@ -111,8 +449,6 @@ void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) struct nvgpu_rbtree_node *node = NULL; struct gk20a *g = vm->mm->g; - gk20a_dbg_fn(""); - /* * Do this outside of the update_gmmu_lock since unmapping the semaphore * pool involves unmapping a GMMU mapping which means aquiring the @@ -172,12 +508,10 @@ void nvgpu_vm_put(struct vm_gk20a *vm) kref_put(&vm->ref, nvgpu_vm_remove_support_kref); } -void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) +void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) { struct gk20a *g = vm->mm->g; - gk20a_dbg_fn(""); - gk20a_free_inst_block(g, inst_block); nvgpu_vm_remove_support_nofree(vm); } diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 54317195..82bf7b3e 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, err_unmap: nvgpu_vm_unmap_buffer(vm, args->offset, NULL); err_remove_vm: - nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); + nvgpu_vm_remove(vm, &mm->perfbuf.inst_block); nvgpu_mutex_release(&g->dbg_sessions_lock); return err; } @@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) err = gk20a_perfbuf_disable_locked(g); nvgpu_vm_unmap_buffer(vm, offset, NULL); - nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); + nvgpu_vm_remove(vm, &mm->perfbuf.inst_block); g->perfbuf.owner = NULL; g->perfbuf.offset = 0; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2642a0b1..17f1622f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -476,9 +476,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) g->ops.mm.remove_bar2_vm(g); if (g->ops.mm.is_bar1_supported(g)) - nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); + nvgpu_vm_remove(&mm->bar1.vm, &mm->bar1.inst_block); - nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); + nvgpu_vm_remove(&mm->pmu.vm, &mm->pmu.inst_block); gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); nvgpu_vm_remove_support_nofree(&mm->cde.vm); @@ -779,52 +779,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g) #endif } -static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, - struct gk20a_mm_entry *entry) -{ - u32 num_pages = 1 << order; - u32 len = num_pages * PAGE_SIZE; - int err; - struct page *pages; - struct gk20a *g = vm->mm->g; - - gk20a_dbg_fn(""); - - /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ - - pages = alloc_pages(GFP_KERNEL, order); - if (!pages) { - gk20a_dbg(gpu_dbg_pte, "alloc_pages failed"); - goto err_out; - } - entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); - if (!entry->mem.priv.sgt) { - gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); - goto err_alloced; - } - err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); - if (err) { - gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed"); - goto err_sg_table; - } - sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); - entry->mem.cpu_va = page_address(pages); - memset(entry->mem.cpu_va, 0, len); - entry->mem.size = len; - entry->mem.aperture = APERTURE_SYSMEM; - FLUSH_CPU_DCACHE(entry->mem.cpu_va, - sg_phys(entry->mem.priv.sgt->sgl), len); - - return 0; - -err_sg_table: - nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); -err_alloced: - __free_pages(pages, order); -err_out: - return -ENOMEM; -} - static void free_gmmu_phys_pages(struct vm_gk20a *vm, struct gk20a_mm_entry *entry) { @@ -857,38 +811,6 @@ static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) entry->mem.priv.sgt->sgl->length); } -static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, - struct gk20a_mm_entry *entry) -{ - struct gk20a *g = gk20a_from_vm(vm); - u32 num_pages = 1 << order; - u32 len = num_pages * PAGE_SIZE; - int err; - - gk20a_dbg_fn(""); - - if (g->is_fmodel) - return alloc_gmmu_phys_pages(vm, order, entry); - - /* - * On arm32 we're limited by vmalloc space, so we do not map pages by - * default. - */ - if (IS_ENABLED(CONFIG_ARM64)) - err = nvgpu_dma_alloc(g, len, &entry->mem); - else - err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, - len, &entry->mem); - - - if (err) { - nvgpu_err(g, "memory allocation failed"); - return -ENOMEM; - } - - return 0; -} - void free_gmmu_pages(struct vm_gk20a *vm, struct gk20a_mm_entry *entry) { @@ -955,72 +877,6 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) } } -/* - * Allocate a phys contig region big enough for a full - * sized gmmu page table for the given gmmu_page_size. - * the whole range is zeroed so it's "invalid"/will fault. - * - * If a previous entry is supplied, its memory will be used for - * suballocation for this next entry too, if there is space. - */ - -static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, - enum gmmu_pgsz_gk20a pgsz_idx, - const struct gk20a_mmu_level *l, - struct gk20a_mm_entry *entry, - struct gk20a_mm_entry *prev_entry) -{ - int err = -ENOMEM; - int order; - struct gk20a *g = gk20a_from_vm(vm); - u32 bytes; - - gk20a_dbg_fn(""); - - /* allocate enough pages for the table */ - order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; - order += ilog2(l->entry_size); - bytes = 1 << order; - order -= PAGE_SHIFT; - if (order < 0 && prev_entry) { - /* try to suballocate from previous chunk */ - u32 capacity = prev_entry->mem.size / bytes; - u32 prev = prev_entry->woffset * sizeof(u32) / bytes; - u32 free = capacity - prev - 1; - - gk20a_dbg(gpu_dbg_pte, "cap %d prev %d free %d bytes %d", - capacity, prev, free, bytes); - - if (free) { - memcpy(&entry->mem, &prev_entry->mem, - sizeof(entry->mem)); - entry->woffset = prev_entry->woffset - + bytes / sizeof(u32); - err = 0; - } - } - - if (err) { - /* no suballoc space */ - order = max(0, order); - err = alloc_gmmu_pages(vm, order, entry); - entry->woffset = 0; - } - - gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", - entry, - (entry->mem.priv.sgt && - entry->mem.aperture == APERTURE_SYSMEM) ? - g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, - order, entry->woffset); - if (err) - return err; - entry->pgsz = pgsz_idx; - entry->mem.skip_wmb = true; - - return err; -} - int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) { return vm->mmu_levels[0].lo_bit[0]; @@ -2230,7 +2086,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, next_pte = pte->entries + pde_i; if (!next_pte->mem.size) { - err = gk20a_zalloc_gmmu_page_table(vm, + err = nvgpu_zalloc_gmmu_page_table(vm, pgsz_idx, next_l, next_pte, prev_pte); if (err) return err; @@ -2522,75 +2378,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { {.update_entry = NULL} }; -/* - * Initialize a semaphore pool. Just return successfully if we do not need - * semaphores (i.e when sync-pts are active). - */ -static int gk20a_init_sema_pool(struct vm_gk20a *vm) -{ - struct nvgpu_semaphore_sea *sema_sea; - struct mm_gk20a *mm = vm->mm; - struct gk20a *g = mm->g; - int err; - - /* - * Don't waste the memory on semaphores if we don't need them. - */ - if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) - return 0; - - if (vm->sema_pool) - return 0; - - sema_sea = nvgpu_semaphore_sea_create(g); - if (!sema_sea) - return -ENOMEM; - - vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea); - if (!vm->sema_pool) - return -ENOMEM; - - /* - * Allocate a chunk of GPU VA space for mapping the semaphores. We will - * do a fixed alloc in the kernel VM so that all channels have the same - * RO address range for the semaphores. - * - * !!! TODO: cleanup. - */ - sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, - vm->va_limit - - mm->channel.kernel_size, - 512 * PAGE_SIZE, - SZ_4K); - if (!sema_sea->gpu_va) { - nvgpu_free(&vm->kernel, sema_sea->gpu_va); - nvgpu_vm_put(vm); - return -ENOMEM; - } - - err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); - if (err) { - nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); - nvgpu_free(vm->vma[gmmu_page_size_small], - vm->sema_pool->gpu_va); - return err; - } - - return 0; -} - -/* - * Determine if the passed address space can support big pages or not. - */ -int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) -{ - u64 mask = ((u64)vm->big_page_size << 10) - 1; - - if (base & mask || size & mask) - return 0; - return 1; -} - /* * Attempt to find a reserved memory area to determine PTE size for the passed * mapping. If no reserved area can be found use small pages. @@ -2661,272 +2448,6 @@ enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) return gmmu_page_size_small; } -static int init_vm_page_tables(struct vm_gk20a *vm) -{ - u32 pde_lo, pde_hi; - int err; - - pde_range_from_vaddr_range(vm, - 0, vm->va_limit-1, - &pde_lo, &pde_hi); - vm->pdb.entries = nvgpu_vzalloc(vm->mm->g, - sizeof(struct gk20a_mm_entry) * - (pde_hi + 1)); - vm->pdb.num_entries = pde_hi + 1; - - if (!vm->pdb.entries) - return -ENOMEM; - - err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], - &vm->pdb, NULL); - if (err) { - nvgpu_vfree(vm->mm->g, vm->pdb.entries); - return err; - } - - return 0; -} - -/** - * nvgpu_init_vm() - Initialize an address space. - * - * @mm - Parent MM. - * @vm - The VM to init. - * @big_page_size - Size of big pages associated with this VM. - * @low_hole - The size of the low hole (unaddressable memory at the bottom of - * the address space. - * @kernel_reserved - Space reserved for kernel only allocations. - * @aperture_size - Total size of the aperture. - * @big_pages - Ignored. Will be set based on other passed params. - * @name - Name of the address space. - * - * This function initializes an address space according to the following map: - * - * +--+ 0x0 - * | | - * +--+ @low_hole - * | | - * ~ ~ This is the "user" section. - * | | - * +--+ @aperture_size - @kernel_reserved - * | | - * ~ ~ This is the "kernel" section. - * | | - * +--+ @aperture_size - * - * The user section is therefor what ever is left over after the @low_hole and - * @kernel_reserved memory have been portioned out. The @kernel_reserved is - * always persent at the top of the memory space and the @low_hole is always at - * the bottom. - * - * For certain address spaces a "user" section makes no sense (bar1, etc) so in - * such cases the @kernel_reserved and @low_hole should sum to exactly - * @aperture_size. - */ -int nvgpu_init_vm(struct mm_gk20a *mm, - struct vm_gk20a *vm, - u32 big_page_size, - u64 low_hole, - u64 kernel_reserved, - u64 aperture_size, - bool big_pages, - bool userspace_managed, - char *name) -{ - int err; - char alloc_name[32]; - u64 kernel_vma_flags; - u64 user_vma_start, user_vma_limit; - u64 user_lp_vma_start, user_lp_vma_limit; - u64 kernel_vma_start, kernel_vma_limit; - struct gk20a *g = mm->g; - struct gk20a_platform *p = gk20a_get_platform(g->dev); - - if (WARN_ON(kernel_reserved + low_hole > aperture_size)) - return -ENOMEM; - - gk20a_dbg_info("Init space for %s: va_limit=0x%llx, " - "big_page_size=0x%x low_hole=0x%llx", - name, aperture_size, - (unsigned int)big_page_size, low_hole); - - vm->mm = mm; - - vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K; - vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size; - vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K; - - /* Set up vma pointers. */ - vm->vma[gmmu_page_size_small] = &vm->user; - vm->vma[gmmu_page_size_big] = &vm->user; - vm->vma[gmmu_page_size_kernel] = &vm->kernel; - if (!p->unify_address_spaces) - vm->vma[gmmu_page_size_big] = &vm->user_lp; - - vm->va_start = low_hole; - vm->va_limit = aperture_size; - vm->big_pages = big_pages; - - vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big]; - vm->userspace_managed = userspace_managed; - vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size); - - /* Initialize the page table data structures. */ - err = init_vm_page_tables(vm); - if (err) - return err; - - /* Setup vma limits. */ - if (kernel_reserved + low_hole < aperture_size) { - if (p->unify_address_spaces) { - user_vma_start = low_hole; - user_vma_limit = vm->va_limit - kernel_reserved; - user_lp_vma_start = user_vma_limit; - user_lp_vma_limit = user_vma_limit; - } else { - user_vma_start = low_hole; - user_vma_limit = __nv_gmmu_va_small_page_limit(); - user_lp_vma_start = __nv_gmmu_va_small_page_limit(); - user_lp_vma_limit = vm->va_limit - kernel_reserved; - } - } else { - user_vma_start = 0; - user_vma_limit = 0; - user_lp_vma_start = 0; - user_lp_vma_limit = 0; - } - kernel_vma_start = vm->va_limit - kernel_reserved; - kernel_vma_limit = vm->va_limit; - - gk20a_dbg_info("user_vma [0x%llx,0x%llx)", - user_vma_start, user_vma_limit); - gk20a_dbg_info("user_lp_vma [0x%llx,0x%llx)", - user_lp_vma_start, user_lp_vma_limit); - gk20a_dbg_info("kernel_vma [0x%llx,0x%llx)", - kernel_vma_start, kernel_vma_limit); - - if (WARN_ON(user_vma_start > user_vma_limit) || - WARN_ON(user_lp_vma_start > user_lp_vma_limit) || - WARN_ON(kernel_vma_start >= kernel_vma_limit)) { - err = -EINVAL; - goto clean_up_page_tables; - } - - kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ? - 0 : GPU_ALLOC_GVA_SPACE; - - /* - * A "user" area only makes sense for the GVA spaces. For VMs where - * there is no "user" area user_vma_start will be equal to - * user_vma_limit (i.e a 0 sized space). In such a situation the kernel - * area must be non-zero in length. - */ - if (user_vma_start >= user_vma_limit && - kernel_vma_start >= kernel_vma_limit) { - err = -EINVAL; - goto clean_up_page_tables; - } - - /* - * Determine if big pages are possible in this VM. If a split address - * space is used then check the user_lp vma instead of the user vma. - */ - if (p->unify_address_spaces) - vm->big_pages = gk20a_big_pages_possible(vm, user_vma_start, - user_vma_limit - user_vma_start); - else - vm->big_pages = gk20a_big_pages_possible(vm, user_lp_vma_start, - user_lp_vma_limit - user_lp_vma_start); - - /* - * User VMA. - */ - if (user_vma_start < user_vma_limit) { - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); - err = __nvgpu_buddy_allocator_init(g, &vm->user, - vm, alloc_name, - user_vma_start, - user_vma_limit - - user_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); - if (err) - goto clean_up_page_tables; - } else { - /* - * Make these allocator pointers point to the kernel allocator - * since we still use the legacy notion of page size to choose - * the allocator. - */ - vm->vma[0] = &vm->kernel; - vm->vma[1] = &vm->kernel; - } - - /* - * User VMA for large pages when a split address range is used. - */ - if (user_lp_vma_start < user_lp_vma_limit) { - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name); - err = __nvgpu_buddy_allocator_init(g, &vm->user_lp, - vm, alloc_name, - user_lp_vma_start, - user_lp_vma_limit - - user_lp_vma_start, - vm->big_page_size, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); - if (err) - goto clean_up_allocators; - } - - /* - * Kernel VMA. Must always exist for an address space. - */ - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); - err = __nvgpu_buddy_allocator_init(g, &vm->kernel, - vm, alloc_name, - kernel_vma_start, - kernel_vma_limit - kernel_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - kernel_vma_flags); - if (err) - goto clean_up_allocators; - - vm->mapped_buffers = NULL; - - nvgpu_mutex_init(&vm->update_gmmu_lock); - kref_init(&vm->ref); - nvgpu_init_list_node(&vm->vm_area_list); - - /* - * This is only necessary for channel address spaces. The best way to - * distinguish channel address spaces from other address spaces is by - * size - if the address space is 4GB or less, it's not a channel. - */ - if (vm->va_limit > SZ_4G) { - err = gk20a_init_sema_pool(vm); - if (err) - goto clean_up_allocators; - } - - return 0; - -clean_up_allocators: - if (nvgpu_alloc_initialized(&vm->kernel)) - nvgpu_alloc_destroy(&vm->kernel); - if (nvgpu_alloc_initialized(&vm->user)) - nvgpu_alloc_destroy(&vm->user); - if (nvgpu_alloc_initialized(&vm->user_lp)) - nvgpu_alloc_destroy(&vm->user_lp); -clean_up_page_tables: - /* Cleans up init_vm_page_tables() */ - nvgpu_vfree(g, vm->pdb.entries); - free_gmmu_pages(vm, &vm->pdb); - return err; -} - /* address space interfaces for the gk20a module */ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, u32 flags) diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 27681199..90a72811 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -456,8 +456,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, struct vm_gk20a *vm); -int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); - extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 75dfcc86..6acea549 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g) struct mm_gk20a *mm = &g->mm; gp10b_replayable_pagefault_buffer_deinit(g); - nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); + nvgpu_vm_remove(&mm->bar2.vm, &mm->bar2.inst_block); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 6d8aa025..45c5def4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -63,6 +63,12 @@ struct gk20a_mmu_level { size_t entry_size; }; +int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + const struct gk20a_mmu_level *l, + struct gk20a_mm_entry *entry, + struct gk20a_mm_entry *prev_entry); + /** * nvgpu_gmmu_map - Map memory into the GMMU. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index fb55483d..c89282bf 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -181,6 +181,7 @@ void nvgpu_vm_get(struct vm_gk20a *vm); void nvgpu_vm_put(struct vm_gk20a *vm); int vm_aspace_id(struct vm_gk20a *vm); +int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); /* batching eliminates redundant cache flushes and invalidates */ void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); @@ -194,7 +195,6 @@ void nvgpu_vm_mapping_batch_finish_locked( int nvgpu_vm_get_buffers(struct vm_gk20a *vm, struct nvgpu_mapped_buf ***mapped_buffers, int *num_buffers); - /* put references on the given buffers */ void nvgpu_vm_put_buffers(struct vm_gk20a *vm, struct nvgpu_mapped_buf **mapped_buffers, @@ -220,7 +220,6 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, struct dma_buf **dmabuf, u64 *offset); - int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, struct nvgpu_mapped_buf *mapped_buffer); void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, @@ -228,8 +227,7 @@ void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); void nvgpu_vm_remove_support(struct vm_gk20a *vm); - -void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); +void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); int nvgpu_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index b8b5985c..63490aa5 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -364,7 +364,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, if (user_vma_start < user_vma_limit) { snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_small] >> 10); - if (!gk20a_big_pages_possible(vm, user_vma_start, + if (!nvgpu_big_pages_possible(vm, user_vma_start, user_vma_limit - user_vma_start)) vm->big_pages = false; @@ -391,7 +391,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", gmmu_page_sizes[gmmu_page_size_kernel] >> 10); - if (!gk20a_big_pages_possible(vm, kernel_vma_start, + if (!nvgpu_big_pages_possible(vm, kernel_vma_start, kernel_vma_limit - kernel_vma_start)) vm->big_pages = false; -- cgit v1.2.2