/* * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include "gk20a/gk20a.h" #if defined(CONFIG_GK20A_VIDMEM) static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, size_t size) { u64 addr = 0; if (at) addr = nvgpu_alloc_fixed(allocator, at, size, 0); else addr = nvgpu_alloc(allocator, size); return addr; } #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, unsigned long flags) #define ATTR_ARG(x) *x #else static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, unsigned long flags) #define ATTR_ARG(x) x #endif { if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); if (flags & NVGPU_DMA_READ_ONLY) dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs)); #undef ATTR_ARG } int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_flags(g, 0, size, mem); } int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem) { if (g->mm.vidmem_is_vidmem) { /* * Force the no-kernel-mapping flag on because we don't support * the lack of it for vidmem - the user should not care when * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a * difference, the user should use the flag explicitly anyway. */ int err = nvgpu_dma_alloc_flags_vid(g, flags | NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); if (!err) return 0; /* * Fall back to sysmem (which may then also fail) in case * vidmem is exhausted. */ } return nvgpu_dma_alloc_flags_sys(g, flags, size, mem); } int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_flags_sys(g, 0, size, mem); } int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem) { struct device *d = dev_from_gk20a(g); int err; dma_addr_t iova; gk20a_dbg_fn(""); if (flags) { DEFINE_DMA_ATTRS(dma_attrs); nvgpu_dma_flags_to_attrs(&dma_attrs, flags); if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { mem->pages = dma_alloc_attrs(d, size, &iova, GFP_KERNEL, __DMA_ATTR(dma_attrs)); if (!mem->pages) return -ENOMEM; } else { mem->cpu_va = dma_alloc_attrs(d, size, &iova, GFP_KERNEL, __DMA_ATTR(dma_attrs)); if (!mem->cpu_va) return -ENOMEM; } } else { mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL); if (!mem->cpu_va) return -ENOMEM; } if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages, iova, size); else { err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size); memset(mem->cpu_va, 0, size); } if (err) goto fail_free; mem->size = size; mem->aperture = APERTURE_SYSMEM; mem->flags = flags; gk20a_dbg_fn("done"); return 0; fail_free: dma_free_coherent(d, size, mem->cpu_va, iova); mem->cpu_va = NULL; mem->sgt = NULL; return err; } int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_flags_vid(g, NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); } int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0); } int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem, dma_addr_t at) { #if defined(CONFIG_GK20A_VIDMEM) u64 addr; int err; struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? &g->mm.vidmem.allocator : &g->mm.vidmem.bootstrap_allocator; int before_pending; gk20a_dbg_fn(""); if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) return -ENOSYS; /* * Our own allocator doesn't have any flags yet, and we can't * kernel-map these, so require explicit flags. */ WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); before_pending = atomic64_read(&g->mm.vidmem.bytes_pending); addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); if (!addr) { /* * If memory is known to be freed soon, let the user know that * it may be available after a while. */ if (before_pending) return -EAGAIN; else return -ENOMEM; } if (at) mem->fixed = true; else mem->fixed = false; mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); if (!mem->sgt) { err = -ENOMEM; goto fail_physfree; } err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL); if (err) goto fail_kfree; set_vidmem_page_alloc(mem->sgt->sgl, addr); sg_set_page(mem->sgt->sgl, NULL, size, 0); mem->size = size; mem->aperture = APERTURE_VIDMEM; mem->allocator = vidmem_alloc; mem->flags = flags; nvgpu_init_list_node(&mem->clear_list_entry); gk20a_dbg_fn("done at 0x%llx size %zu", addr, size); return 0; fail_kfree: nvgpu_kfree(g, mem->sgt); fail_physfree: nvgpu_free(&g->mm.vidmem.allocator, addr); return err; #else return -ENOSYS; #endif } int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_map_flags(vm, 0, size, mem); } int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem) { if (vm->mm->vidmem_is_vidmem) { /* * Force the no-kernel-mapping flag on because we don't support * the lack of it for vidmem - the user should not care when * using nvgpu_dma_alloc_map and it's vidmem, or if there's a * difference, the user should use the flag explicitly anyway. */ int err = nvgpu_dma_alloc_map_flags_vid(vm, flags | NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); if (!err) return 0; /* * Fall back to sysmem (which may then also fail) in case * vidmem is exhausted. */ } return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem); } int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem); } int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem) { int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem); if (err) return err; mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none, false, mem->aperture); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; } return 0; fail_free: nvgpu_dma_free(vm->mm->g, mem); return err; } int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem) { return nvgpu_dma_alloc_map_flags_vid(vm, NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); } int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem) { int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem); if (err) return err; mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none, false, mem->aperture); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; } return 0; fail_free: nvgpu_dma_free(vm->mm->g, mem); return err; } static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) { struct device *d = dev_from_gk20a(g); if (mem->cpu_va || mem->pages) { if (mem->flags) { DEFINE_DMA_ATTRS(dma_attrs); nvgpu_dma_flags_to_attrs(&dma_attrs, mem->flags); if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) { dma_free_attrs(d, mem->size, mem->pages, sg_dma_address(mem->sgt->sgl), __DMA_ATTR(dma_attrs)); } else { dma_free_attrs(d, mem->size, mem->cpu_va, sg_dma_address(mem->sgt->sgl), __DMA_ATTR(dma_attrs)); } } else { dma_free_coherent(d, mem->size, mem->cpu_va, sg_dma_address(mem->sgt->sgl)); } mem->cpu_va = NULL; mem->pages = NULL; } if (mem->sgt) gk20a_free_sgtable(g, &mem->sgt); mem->size = 0; mem->aperture = APERTURE_INVALID; } static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) { #if defined(CONFIG_GK20A_VIDMEM) bool was_empty; /* Sanity check - only this supported when allocating. */ WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING); if (mem->user_mem) { nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); nvgpu_list_add_tail(&mem->clear_list_entry, &g->mm.vidmem.clear_list_head); atomic64_add(mem->size, &g->mm.vidmem.bytes_pending); nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); if (was_empty) { cancel_work_sync(&g->mm.vidmem.clear_mem_worker); schedule_work(&g->mm.vidmem.clear_mem_worker); } } else { nvgpu_memset(g, mem, 0, 0, mem->size); nvgpu_free(mem->allocator, (u64)get_vidmem_page_alloc(mem->sgt->sgl)); gk20a_free_sgtable(g, &mem->sgt); mem->size = 0; mem->aperture = APERTURE_INVALID; } #endif } void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem) { switch (mem->aperture) { case APERTURE_SYSMEM: return nvgpu_dma_free_sys(g, mem); case APERTURE_VIDMEM: return nvgpu_dma_free_vid(g, mem); default: break; /* like free() on "null" memory */ } } void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem) { if (mem->gpu_va) gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none); mem->gpu_va = 0; nvgpu_dma_free(vm->mm->g, mem); }