/* * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, struct gk20a_mm_entry *entry) { u32 num_pages = 1 << order; u32 len = num_pages * PAGE_SIZE; int err; struct page *pages; struct gk20a *g = vm->mm->g; /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ pages = alloc_pages(GFP_KERNEL, order); if (!pages) { nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed"); goto err_out; } entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); if (!entry->mem.priv.sgt) { nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table"); goto err_alloced; } err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); if (err) { nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed"); goto err_sg_table; } sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); entry->mem.cpu_va = page_address(pages); memset(entry->mem.cpu_va, 0, len); entry->mem.size = len; entry->mem.aperture = APERTURE_SYSMEM; FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.priv.sgt->sgl), len); return 0; err_sg_table: nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); err_alloced: __free_pages(pages, order); err_out: return -ENOMEM; } static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, struct gk20a_mm_entry *entry) { struct gk20a *g = gk20a_from_vm(vm); u32 num_pages = 1 << order; u32 len = num_pages * PAGE_SIZE; int err; if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) return alloc_gmmu_phys_pages(vm, order, entry); /* * On arm32 we're limited by vmalloc space, so we do not map pages by * default. */ if (IS_ENABLED(CONFIG_ARM64)) err = nvgpu_dma_alloc(g, len, &entry->mem); else err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, len, &entry->mem); if (err) { nvgpu_err(g, "memory allocation failed"); return -ENOMEM; } return 0; } /* * Allocate a phys contig region big enough for a full * sized gmmu page table for the given gmmu_page_size. * the whole range is zeroed so it's "invalid"/will fault. * * If a previous entry is supplied, its memory will be used for * suballocation for this next entry too, if there is space. */ int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, const struct gk20a_mmu_level *l, struct gk20a_mm_entry *entry, struct gk20a_mm_entry *prev_entry) { int err = -ENOMEM; int order; struct gk20a *g = gk20a_from_vm(vm); u32 bytes; /* allocate enough pages for the table */ order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; order += ilog2(l->entry_size); bytes = 1 << order; order -= PAGE_SHIFT; if (order < 0 && prev_entry) { /* try to suballocate from previous chunk */ u32 capacity = prev_entry->mem.size / bytes; u32 prev = prev_entry->woffset * sizeof(u32) / bytes; u32 free = capacity - prev - 1; nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d", capacity, prev, free, bytes); if (free) { memcpy(&entry->mem, &prev_entry->mem, sizeof(entry->mem)); entry->woffset = prev_entry->woffset + bytes / sizeof(u32); err = 0; } } if (err) { /* no suballoc space */ order = max(0, order); err = nvgpu_alloc_gmmu_pages(vm, order, entry); entry->woffset = 0; } nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", entry, (entry->mem.priv.sgt && entry->mem.aperture == APERTURE_SYSMEM) ? g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, order, entry->woffset); if (err) return err; entry->pgsz = pgsz_idx; entry->mem.skip_wmb = true; return err; } /* * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU * VA will be allocated for you. If addr is non-zero then the buffer will be * mapped at @addr. */ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 addr, u64 size, u32 flags, int rw_flag, bool priv, enum nvgpu_aperture aperture) { struct gk20a *g = gk20a_from_vm(vm); u64 vaddr; struct sg_table *sgt = mem->priv.sgt; nvgpu_mutex_acquire(&vm->update_gmmu_lock); vaddr = g->ops.mm.gmmu_map(vm, addr, sgt, /* sg table */ 0, /* sg offset */ size, gmmu_page_size_kernel, 0, /* kind */ 0, /* ctag_offset */ flags, rw_flag, false, /* clear_ctags */ false, /* sparse */ priv, /* priv */ NULL, /* mapping_batch handle */ aperture); nvgpu_mutex_release(&vm->update_gmmu_lock); if (!vaddr) { nvgpu_err(g, "failed to allocate va space"); return 0; } return vaddr; } u64 nvgpu_gmmu_map(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 size, u32 flags, int rw_flag, bool priv, enum nvgpu_aperture aperture) { return __nvgpu_gmmu_map(vm, mem, 0, size, flags, rw_flag, priv, aperture); } /* * Like nvgpu_gmmu_map() except it can work on a fixed address instead. */ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 addr, u64 size, u32 flags, int rw_flag, bool priv, enum nvgpu_aperture aperture) { return __nvgpu_gmmu_map(vm, mem, addr, size, flags, rw_flag, priv, aperture); } void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) { struct gk20a *g = gk20a_from_vm(vm); nvgpu_mutex_acquire(&vm->update_gmmu_lock); g->ops.mm.gmmu_unmap(vm, gpu_va, mem->size, gmmu_page_size_kernel, true, /*va_allocated */ gk20a_mem_flag_none, false, NULL); nvgpu_mutex_release(&vm->update_gmmu_lock); }