From 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Wed, 28 Jun 2023 18:24:25 -0400 Subject: Include nvgpu headers These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future. --- include/gk20a/gk20a.c | 590 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 590 insertions(+) create mode 100644 include/gk20a/gk20a.c (limited to 'include/gk20a/gk20a.c') diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c new file mode 100644 index 0000000..c3068b7 --- /dev/null +++ b/include/gk20a/gk20a.c @@ -0,0 +1,590 @@ +/* + * GK20A Graphics + * + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gk20a.h" + +#include "dbg_gpu_gk20a.h" +#include "pstate/pstate.h" + +void __nvgpu_check_gpu_state(struct gk20a *g) +{ + u32 boot_0 = 0xffffffff; + + boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL); + if (boot_0 == 0xffffffff) { + nvgpu_err(g, "GPU has disappeared from bus!!"); + nvgpu_err(g, "Rebooting system!!"); + nvgpu_kernel_restart(NULL); + } +} + +void __gk20a_warn_on_no_regs(void) +{ + WARN_ONCE(1, "Attempted access to GPU regs after unmapping!"); +} + +static void gk20a_mask_interrupts(struct gk20a *g) +{ + if (g->ops.mc.intr_mask != NULL) { + g->ops.mc.intr_mask(g); + } + + if (g->ops.mc.log_pending_intrs != NULL) { + g->ops.mc.log_pending_intrs(g); + } +} + +int gk20a_prepare_poweroff(struct gk20a *g) +{ + int ret = 0; + + nvgpu_log_fn(g, " "); + + if (g->ops.fifo.channel_suspend) { + ret = g->ops.fifo.channel_suspend(g); + if (ret) { + return ret; + } + } + + /* disable elpg before gr or fifo suspend */ + if (g->ops.pmu.is_pmu_supported(g)) { + ret |= nvgpu_pmu_destroy(g); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + ret |= nvgpu_sec2_destroy(g); + } + + ret |= gk20a_gr_suspend(g); + ret |= nvgpu_mm_suspend(g); + ret |= gk20a_fifo_suspend(g); + + gk20a_ce_suspend(g); + + /* Disable GPCPLL */ + if (g->ops.clk.suspend_clk_support) { + ret |= g->ops.clk.suspend_clk_support(g); + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + gk20a_deinit_pstate_support(g); + } + + gk20a_mask_interrupts(g); + + g->power_on = false; + + return ret; +} + +int gk20a_finalize_poweron(struct gk20a *g) +{ + int err = 0; +#if defined(CONFIG_TEGRA_GK20A_NVHOST) + u32 nr_pages; +#endif + + u32 fuse_status; + + nvgpu_log_fn(g, " "); + + if (g->power_on) { + return 0; + } + + g->power_on = true; + + /* + * Before probing the GPU make sure the GPU's state is cleared. This is + * relevant for rebind operations. + */ + if (g->ops.xve.reset_gpu && !g->gpu_reset_done) { + g->ops.xve.reset_gpu(g); + g->gpu_reset_done = true; + } + + if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) { + g->ops.clock_gating.slcg_acb_load_gating_prod(g, true); + } + + /* + * Do this early so any early VMs that get made are capable of mapping + * buffers. + */ + err = nvgpu_pd_cache_init(g); + if (err) { + return err; + } + + /* init interface layer support for PMU falcon */ + err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_PMU"); + goto done; + } + err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_SEC2"); + goto done; + } + err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC"); + goto done; + } + err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE"); + goto done; + } + + if (g->ops.acr.acr_sw_init != NULL && + nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + g->ops.acr.acr_sw_init(g, &g->acr); + } + + if (g->ops.bios.init) { + err = g->ops.bios.init(g); + } + if (err) { + goto done; + } + + g->ops.bus.init_hw(g); + + if (g->ops.clk.disable_slowboot) { + g->ops.clk.disable_slowboot(g); + } + + g->ops.priv_ring.enable_priv_ring(g); + + /* TBD: move this after graphics init in which blcg/slcg is enabled. + This function removes SlowdownOnBoot which applies 32x divider + on gpcpll bypass path. The purpose of slowdown is to save power + during boot but it also significantly slows down gk20a init on + simulation and emulation. We should remove SOB after graphics power + saving features (blcg/slcg) are enabled. For now, do it here. */ + if (g->ops.clk.init_clk_support) { + err = g->ops.clk.init_clk_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a clk"); + goto done; + } + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + err = g->ops.nvlink.init(g); + if (err) { + nvgpu_err(g, "failed to init nvlink"); + goto done; + } + } + + if (g->ops.fb.init_fbpa) { + err = g->ops.fb.init_fbpa(g); + if (err) { + nvgpu_err(g, "failed to init fbpa"); + goto done; + } + } + + if (g->ops.fb.mem_unlock) { + err = g->ops.fb.mem_unlock(g); + if (err) { + nvgpu_err(g, "failed to unlock memory"); + goto done; + } + } + + err = g->ops.fifo.reset_enable_hw(g); + + if (err) { + nvgpu_err(g, "failed to reset gk20a fifo"); + goto done; + } + + err = nvgpu_init_ltc_support(g); + if (err) { + nvgpu_err(g, "failed to init ltc"); + goto done; + } + + err = nvgpu_init_mm_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a mm"); + goto done; + } + + err = gk20a_init_fifo_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a fifo"); + goto done; + } + + if (g->ops.therm.elcg_init_idle_filters) { + g->ops.therm.elcg_init_idle_filters(g); + } + + g->ops.mc.intr_enable(g); + + /* + * Power gate the chip as per the TPC PG mask + * and the fuse_status register. + * If TPC PG mask is invalid halt the GPU poweron. + */ + g->can_tpc_powergate = false; + fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0); + + if (g->ops.tpc.tpc_powergate) { + err = g->ops.tpc.tpc_powergate(g, fuse_status); + } + + if (err) { + nvgpu_err(g, "failed to power ON GPU"); + goto done; + } + + nvgpu_mutex_acquire(&g->tpc_pg_lock); + + if (g->can_tpc_powergate) { + if (g->ops.gr.powergate_tpc != NULL) + g->ops.gr.powergate_tpc(g); + } + + err = gk20a_enable_gr_hw(g); + if (err) { + nvgpu_err(g, "failed to enable gr"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + + if (g->ops.pmu.is_pmu_supported(g)) { + if (g->ops.pmu.prepare_ucode) { + err = g->ops.pmu.prepare_ucode(g); + } + if (err) { + nvgpu_err(g, "failed to init pmu ucode"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + err = gk20a_init_pstate_support(g); + if (err) { + nvgpu_err(g, "failed to init pstates"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + } + + if (g->acr.bootstrap_hs_acr != NULL && + nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr); + if (err != 0) { + nvgpu_err(g, "ACR bootstrap failed"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + err = nvgpu_init_sec2_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init sec2"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + } + + if (g->ops.pmu.is_pmu_supported(g)) { + err = nvgpu_init_pmu_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a pmu"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + } + + err = gk20a_init_gr_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gr"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + + nvgpu_mutex_release(&g->tpc_pg_lock); + + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + err = gk20a_init_pstate_pmu_support(g); + if (err) { + nvgpu_err(g, "failed to init pstates"); + goto done; + } + } + + if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + g->ops.pmu_ver.clk.clk_set_boot_clk(g); + } else { + err = nvgpu_clk_arb_init_arbiter(g); + if (err) { + nvgpu_err(g, "failed to init clk arb"); + goto done; + } + } + + err = nvgpu_init_therm_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a therm"); + goto done; + } + + err = g->ops.chip_init_gpu_characteristics(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gpu characteristics"); + goto done; + } + +#ifdef CONFIG_GK20A_CTXSW_TRACE + err = gk20a_ctxsw_trace_init(g); + if (err) + nvgpu_warn(g, "could not initialize ctxsw tracing"); +#endif + + /* Restore the debug setting */ + g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); + + gk20a_init_ce_support(g); + + if (g->ops.xve.available_speeds) { + u32 speed; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) { + g->ops.xve.disable_aspm(g); + } + + g->ops.xve.available_speeds(g, &speed); + + /* Set to max speed */ + speed = 1 << (fls(speed) - 1); + err = g->ops.xve.set_speed(g, speed); + if (err) { + nvgpu_err(g, "Failed to set PCIe bus speed!"); + goto done; + } + } + +#if defined(CONFIG_TEGRA_GK20A_NVHOST) + if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) { + if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { + nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE); + __nvgpu_mem_create_from_phys(g, &g->syncpt_mem, + g->syncpt_unit_base, nr_pages); + } + } +#endif + + if (g->ops.fifo.channel_resume) { + g->ops.fifo.channel_resume(g); + } + +done: + if (err) { + g->power_on = false; + } + + return err; +} + +int gk20a_wait_for_idle(struct gk20a *g) +{ + int wait_length = 150; /* 3 second overall max wait. */ + int target_usage_count = 0; + + if (!g) { + return -ENODEV; + } + + while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count) + && (wait_length-- >= 0)) { + nvgpu_msleep(20); + } + + if (wait_length < 0) { + nvgpu_warn(g, "Timed out waiting for idle (%d)!\n", + nvgpu_atomic_read(&g->usage_count)); + return -ETIMEDOUT; + } + + return 0; +} + +int gk20a_init_gpu_characteristics(struct gk20a *g) +{ + __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true); + + if (IS_ENABLED(CONFIG_SYNC)) { + __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true); + } + + if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) { + __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true); + } + + /* + * Fast submits are supported as long as the user doesn't request + * anything that depends on job tracking. (Here, fast means strictly no + * metadata, just the gpfifo contents are copied and gp_put updated). + */ + __nvgpu_set_enabled(g, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, + true); + + /* + * Sync framework requires deferred job cleanup, wrapping syncs in FDs, + * and other heavy stuff, which prevents deterministic submits. This is + * supported otherwise, provided that the user doesn't request anything + * that depends on deferred cleanup. + */ + if (!nvgpu_channel_sync_needs_os_fence_framework(g)) { + __nvgpu_set_enabled(g, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, + true); + } + + __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true); + + __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); + + if (g->ops.clk_arb.get_arbiter_clk_domains != NULL && + g->ops.clk.support_clk_freq_controller) { + __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true); + } + + g->ops.gr.detect_sm_arch(g); + + if (g->ops.gr.init_cyclestats) { + g->ops.gr.init_cyclestats(g); + } + + g->ops.gr.get_rop_l2_en_mask(g); + + return 0; +} + +/* + * Free the gk20a struct. + */ +static void gk20a_free_cb(struct nvgpu_ref *refcount) +{ + struct gk20a *g = container_of(refcount, + struct gk20a, refcount); + + nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!"); + + gk20a_ce_destroy(g); + + if (g->remove_support) { + g->remove_support(g); + } + + if (g->free) { + g->free(g); + } +} + +/** + * gk20a_get() - Increment ref count on driver + * + * @g The driver to increment + * This will fail if the driver is in the process of being released. In that + * case it will return NULL. Otherwise a pointer to the driver passed in will + * be returned. + */ +struct gk20a * __must_check gk20a_get(struct gk20a *g) +{ + int success; + + /* + * Handle the possibility we are still freeing the gk20a struct while + * gk20a_get() is called. Unlikely but plausible race condition. Ideally + * the code will never be in such a situation that this race is + * possible. + */ + success = nvgpu_ref_get_unless_zero(&g->refcount); + + nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s", + nvgpu_atomic_read(&g->refcount.refcount), + success ? "" : "(FAILED)"); + + return success ? g : NULL; +} + +/** + * gk20a_put() - Decrement ref count on driver + * + * @g - The driver to decrement + * + * Decrement the driver ref-count. If neccesary also free the underlying driver + * memory + */ +void gk20a_put(struct gk20a *g) +{ + /* + * Note - this is racy, two instances of this could run before the + * actual kref_put(0 runs, you could see something like: + * + * ... PUT: refs currently 2 + * ... PUT: refs currently 2 + * ... Freeing GK20A struct! + */ + nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d", + nvgpu_atomic_read(&g->refcount.refcount)); + + nvgpu_ref_put(&g->refcount, gk20a_free_cb); +} -- cgit v1.2.2