From 3c37701377459fbea2b460e1b9c65a863dfb04b2 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 31 Jul 2017 12:32:07 -0700 Subject: gpu: nvgpu: Split VIDMEM support from mm_gk20a.c Split VIDMEM support into its own code files organized as such: common/mm/vidmem.c - Base vidmem support common/linux/vidmem.c - Linux specific user-space interaction include/nvgpu/vidmem.h - Vidmem API definitions Also use the config to enable/disable VIDMEM support in the makefile and remove as many CONFIG_GK20A_VIDMEM preprocessor checks as possible from the source code. And lastly update a while-loop that iterated over an SGT to use the new for_each construct for iterating over SGTs. Currently this organization is not perfectly adhered to. More patches will fix that. JIRA NVGPU-30 JIRA NVGPU-138 Change-Id: Ic0f4d2cf38b65849c7dc350a69b175421477069c Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1540705 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/mm/gmmu.c | 1 + drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 1 + drivers/gpu/nvgpu/common/mm/vidmem.c | 259 ++++++++++++++++++++++++++++++++ 3 files changed, 261 insertions(+) create mode 100644 drivers/gpu/nvgpu/common/mm/vidmem.c (limited to 'drivers/gpu/nvgpu/common/mm') diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index f61ec3fc..1eed3a3b 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index faee482d..2b6e6e6a 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c new file mode 100644 index 00000000..1ba07ca6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +void gk20a_vidmem_destroy(struct gk20a *g) +{ + if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) + nvgpu_alloc_destroy(&g->mm.vidmem.allocator); +} + +int gk20a_vidmem_clear_all(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + struct gk20a_fence *gk20a_fence_out = NULL; + u64 region2_base = 0; + int err = 0; + + if (mm->vidmem.ce_ctx_id == (u32)~0) + return -EINVAL; + + err = gk20a_ce_execute_ops(g, + mm->vidmem.ce_ctx_id, + 0, + mm->vidmem.base, + mm->vidmem.bootstrap_base - mm->vidmem.base, + 0x00000000, + NVGPU_CE_DST_LOCATION_LOCAL_FB, + NVGPU_CE_MEMSET, + NULL, + 0, + NULL); + if (err) { + nvgpu_err(g, + "Failed to clear vidmem region 1 : %d", err); + return err; + } + + region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size; + + err = gk20a_ce_execute_ops(g, + mm->vidmem.ce_ctx_id, + 0, + region2_base, + mm->vidmem.size - region2_base, + 0x00000000, + NVGPU_CE_DST_LOCATION_LOCAL_FB, + NVGPU_CE_MEMSET, + NULL, + 0, + &gk20a_fence_out); + if (err) { + nvgpu_err(g, + "Failed to clear vidmem region 2 : %d", err); + return err; + } + + if (gk20a_fence_out) { + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, + gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + + do { + err = gk20a_fence_wait(g, gk20a_fence_out, + gk20a_get_gr_idle_timeout(g)); + } while (err == -ERESTARTSYS && + !nvgpu_timeout_expired(&timeout)); + + gk20a_fence_put(gk20a_fence_out); + if (err) { + nvgpu_err(g, + "fence wait failed for CE execute ops"); + return err; + } + } + + mm->vidmem.cleared = true; + + return 0; +} + +int gk20a_init_vidmem(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + size_t size = g->ops.mm.get_vidmem_size ? + g->ops.mm.get_vidmem_size(g) : 0; + u64 bootstrap_base, bootstrap_size, base; + u64 default_page_size = SZ_64K; + int err; + + static struct nvgpu_alloc_carveout wpr_co = + NVGPU_CARVEOUT("wpr-region", 0, SZ_16M); + + if (!size) + return 0; + + wpr_co.base = size - SZ_256M; + bootstrap_base = wpr_co.base; + bootstrap_size = SZ_16M; + base = default_page_size; + + /* + * Bootstrap allocator for use before the CE is initialized (CE + * initialization requires vidmem but we want to use the CE to zero + * out vidmem before allocating it... + */ + err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, + "vidmem-bootstrap", + bootstrap_base, bootstrap_size, + SZ_4K, 0); + + err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator, + "vidmem", + base, size - base, + default_page_size, + GPU_ALLOC_4K_VIDMEM_PAGES); + if (err) { + nvgpu_err(g, "Failed to register vidmem for size %zu: %d", + size, err); + return err; + } + + /* Reserve bootstrap region in vidmem allocator */ + nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); + + mm->vidmem.base = base; + mm->vidmem.size = size - base; + mm->vidmem.bootstrap_base = bootstrap_base; + mm->vidmem.bootstrap_size = bootstrap_size; + + nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); + + INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker); + nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); + nvgpu_init_list_node(&mm->vidmem.clear_list_head); + nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); + + gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); + + return 0; +} + +int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) +{ + struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator; + + gk20a_dbg_fn(""); + + if (!nvgpu_alloc_initialized(allocator)) + return -ENOSYS; + + nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); + *space = nvgpu_alloc_space(allocator) + + nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending); + nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + return 0; +} + +int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct gk20a_fence *gk20a_fence_out = NULL; + struct gk20a_fence *gk20a_last_fence = NULL; + struct nvgpu_page_alloc *alloc = NULL; + void *sgl = NULL; + int err = 0; + + if (g->mm.vidmem.ce_ctx_id == (u32)~0) + return -EINVAL; + + alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); + + nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) { + if (gk20a_last_fence) + gk20a_fence_put(gk20a_last_fence); + + err = gk20a_ce_execute_ops(g, + g->mm.vidmem.ce_ctx_id, + 0, + nvgpu_sgt_get_phys(&alloc->sgt, sgl), + nvgpu_sgt_get_length(&alloc->sgt, sgl), + 0x00000000, + NVGPU_CE_DST_LOCATION_LOCAL_FB, + NVGPU_CE_MEMSET, + NULL, + 0, + &gk20a_fence_out); + + if (err) { + nvgpu_err(g, + "Failed gk20a_ce_execute_ops[%d]", err); + return err; + } + + gk20a_last_fence = gk20a_fence_out; + } + + if (gk20a_last_fence) { + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, + gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + + do { + err = gk20a_fence_wait(g, gk20a_last_fence, + gk20a_get_gr_idle_timeout(g)); + } while (err == -ERESTARTSYS && + !nvgpu_timeout_expired(&timeout)); + + gk20a_fence_put(gk20a_last_fence); + if (err) + nvgpu_err(g, + "fence wait failed for CE execute ops"); + } + + return err; +} + +struct nvgpu_mem *get_pending_mem_desc(struct mm_gk20a *mm) +{ + struct nvgpu_mem *mem = NULL; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { + mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, + nvgpu_mem, clear_list_entry); + nvgpu_list_del(&mem->clear_list_entry); + } + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + return mem; +} -- cgit v1.2.2