1 files changed, 590 insertions, 0 deletions
diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c
new file mode 100644
index 0000000..c3068b7
--- /dev/null
+++ b/include/gk20a/gk20a.c
@@ -0,0 +1,590 @@
+/*
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/clk_arb.h>
+#include <nvgpu/therm.h>
+#include <nvgpu/mc.h>
+#include <nvgpu/channel_sync.h>
+#include <trace/events/gk20a.h>
+#include "gk20a.h"
+#include "dbg_gpu_gk20a.h"
+#include "pstate/pstate.h"
+void __nvgpu_check_gpu_state(struct gk20a *g)
+{
+        u32 boot_0 = 0xffffffff;
+        boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL);
+        if (boot_0 == 0xffffffff) {
+                nvgpu_err(g, "GPU has disappeared from bus!!");
+                nvgpu_err(g, "Rebooting system!!");
+                nvgpu_kernel_restart(NULL);
+        }
+}
+void __gk20a_warn_on_no_regs(void)
+{
+        WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
+}
+static void gk20a_mask_interrupts(struct gk20a *g)
+{
+        if (g->ops.mc.intr_mask != NULL) {
+                g->ops.mc.intr_mask(g);
+        }
+        if (g->ops.mc.log_pending_intrs != NULL) {
+                g->ops.mc.log_pending_intrs(g);
+        }
+}
+int gk20a_prepare_poweroff(struct gk20a *g)
+{
+        int ret = 0;
+        nvgpu_log_fn(g, " ");
+        if (g->ops.fifo.channel_suspend) {
+                ret = g->ops.fifo.channel_suspend(g);
+                if (ret) {
+                        return ret;
+                }
+        }
+        /* disable elpg before gr or fifo suspend */
+        if (g->ops.pmu.is_pmu_supported(g)) {
+                ret |= nvgpu_pmu_destroy(g);
+        }
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
+                ret |= nvgpu_sec2_destroy(g);
+        }
+        ret |= gk20a_gr_suspend(g);
+        ret |= nvgpu_mm_suspend(g);
+        ret |= gk20a_fifo_suspend(g);
+        gk20a_ce_suspend(g);
+        /* Disable GPCPLL */
+        if (g->ops.clk.suspend_clk_support) {
+                ret |= g->ops.clk.suspend_clk_support(g);
+        }
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                gk20a_deinit_pstate_support(g);
+        }
+        gk20a_mask_interrupts(g);
+        g->power_on = false;
+        return ret;
+}
+int gk20a_finalize_poweron(struct gk20a *g)
+{
+        int err = 0;
+#if defined(CONFIG_TEGRA_GK20A_NVHOST)
+        u32 nr_pages;
+#endif
+        u32 fuse_status;
+        nvgpu_log_fn(g, " ");
+        if (g->power_on) {
+                return 0;
+        }
+        g->power_on = true;
+        /*
+         * Before probing the GPU make sure the GPU's state is cleared. This is
+         * relevant for rebind operations.
+         */
+        if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
+                g->ops.xve.reset_gpu(g);
+                g->gpu_reset_done = true;
+        }
+        if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) {
+                g->ops.clock_gating.slcg_acb_load_gating_prod(g, true);
+        }
+        /*
+         * Do this early so any early VMs that get made are capable of mapping
+         * buffers.
+         */
+        err = nvgpu_pd_cache_init(g);
+        if (err) {
+                return err;
+        }
+        /* init interface layer support for PMU falcon */
+        err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
+        if (err != 0) {
+                nvgpu_err(g, "failed to sw init FALCON_ID_PMU");
+                goto done;
+        }
+        err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
+        if (err != 0) {
+                nvgpu_err(g, "failed to sw init FALCON_ID_SEC2");
+                goto done;
+        }
+        err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
+        if (err != 0) {
+                nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC");
+                goto done;
+        }
+        err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE);
+        if (err != 0) {
+                nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE");
+                goto done;
+        }
+        if (g->ops.acr.acr_sw_init != NULL &&
+                nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                g->ops.acr.acr_sw_init(g, &g->acr);
+        }
+        if (g->ops.bios.init) {
+                err = g->ops.bios.init(g);
+        }
+        if (err) {
+                goto done;
+        }
+        g->ops.bus.init_hw(g);
+        if (g->ops.clk.disable_slowboot) {
+                g->ops.clk.disable_slowboot(g);
+        }
+        g->ops.priv_ring.enable_priv_ring(g);
+        /* TBD: move this after graphics init in which blcg/slcg is enabled.
+           This function removes SlowdownOnBoot which applies 32x divider
+           on gpcpll bypass path. The purpose of slowdown is to save power
+           during boot but it also significantly slows down gk20a init on
+           simulation and emulation. We should remove SOB after graphics power
+           saving features (blcg/slcg) are enabled. For now, do it here. */
+        if (g->ops.clk.init_clk_support) {
+                err = g->ops.clk.init_clk_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init gk20a clk");
+                        goto done;
+                }
+        }
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
+                err = g->ops.nvlink.init(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init nvlink");
+                        goto done;
+                }
+        }
+        if (g->ops.fb.init_fbpa) {
+                err = g->ops.fb.init_fbpa(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init fbpa");
+                        goto done;
+                }
+        }
+        if (g->ops.fb.mem_unlock) {
+                err = g->ops.fb.mem_unlock(g);
+                if (err) {
+                        nvgpu_err(g, "failed to unlock memory");
+                        goto done;
+                }
+        }
+        err = g->ops.fifo.reset_enable_hw(g);
+        if (err) {
+                nvgpu_err(g, "failed to reset gk20a fifo");
+                goto done;
+        }
+        err = nvgpu_init_ltc_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init ltc");
+                goto done;
+        }
+        err = nvgpu_init_mm_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a mm");
+                goto done;
+        }
+        err = gk20a_init_fifo_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a fifo");
+                goto done;
+        }
+        if (g->ops.therm.elcg_init_idle_filters) {
+                g->ops.therm.elcg_init_idle_filters(g);
+        }
+        g->ops.mc.intr_enable(g);
+        /*
+         *  Power gate the chip as per the TPC PG mask
+         *  and the fuse_status register.
+         *  If TPC PG mask is invalid halt the GPU poweron.
+         */
+        g->can_tpc_powergate = false;
+        fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
+        if (g->ops.tpc.tpc_powergate) {
+                err = g->ops.tpc.tpc_powergate(g, fuse_status);
+        }
+        if (err) {
+                nvgpu_err(g, "failed to power ON GPU");
+                goto done;
+        }
+        nvgpu_mutex_acquire(&g->tpc_pg_lock);
+        if (g->can_tpc_powergate) {
+                if (g->ops.gr.powergate_tpc != NULL)
+                        g->ops.gr.powergate_tpc(g);
+        }
+        err = gk20a_enable_gr_hw(g);
+        if (err) {
+                nvgpu_err(g, "failed to enable gr");
+                nvgpu_mutex_release(&g->tpc_pg_lock);
+                goto done;
+        }
+        if (g->ops.pmu.is_pmu_supported(g)) {
+                if (g->ops.pmu.prepare_ucode) {
+                        err = g->ops.pmu.prepare_ucode(g);
+                }
+                if (err) {
+                        nvgpu_err(g, "failed to init pmu ucode");
+                        nvgpu_mutex_release(&g->tpc_pg_lock);
+                        goto done;
+                }
+        }
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                err = gk20a_init_pstate_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init pstates");
+                        nvgpu_mutex_release(&g->tpc_pg_lock);
+                        goto done;
+                }
+        }
+        if (g->acr.bootstrap_hs_acr != NULL &&
+                nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr);
+                if (err != 0) {
+                        nvgpu_err(g, "ACR bootstrap failed");
+                        nvgpu_mutex_release(&g->tpc_pg_lock);
+                        goto done;
+                }
+        }
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
+                err = nvgpu_init_sec2_support(g);
+                if (err != 0) {
+                        nvgpu_err(g, "failed to init sec2");
+                        nvgpu_mutex_release(&g->tpc_pg_lock);
+                        goto done;
+                }
+        }
+        if (g->ops.pmu.is_pmu_supported(g)) {
+                err = nvgpu_init_pmu_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init gk20a pmu");
+                        nvgpu_mutex_release(&g->tpc_pg_lock);
+                        goto done;
+                }
+        }
+        err = gk20a_init_gr_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gr");
+                nvgpu_mutex_release(&g->tpc_pg_lock);
+                goto done;
+        }
+        nvgpu_mutex_release(&g->tpc_pg_lock);
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                err = gk20a_init_pstate_pmu_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init pstates");
+                        goto done;
+                }
+        }
+        if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                g->ops.pmu_ver.clk.clk_set_boot_clk(g);
+        } else {
+                err = nvgpu_clk_arb_init_arbiter(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init clk arb");
+                        goto done;
+                }
+        }
+        err = nvgpu_init_therm_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a therm");
+                goto done;
+        }
+        err = g->ops.chip_init_gpu_characteristics(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gpu characteristics");
+                goto done;
+        }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        err = gk20a_ctxsw_trace_init(g);
+        if (err)
+                nvgpu_warn(g, "could not initialize ctxsw tracing");
+#endif
+        /* Restore the debug setting */
+        g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
+        gk20a_init_ce_support(g);
+        if (g->ops.xve.available_speeds) {
+                u32 speed;
+                if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) {
+                        g->ops.xve.disable_aspm(g);
+                }
+                g->ops.xve.available_speeds(g, &speed);
+                /* Set to max speed */
+                speed = 1 << (fls(speed) - 1);
+                err = g->ops.xve.set_speed(g, speed);
+                if (err) {
+                        nvgpu_err(g, "Failed to set PCIe bus speed!");
+                        goto done;
+                }
+        }
+#if defined(CONFIG_TEGRA_GK20A_NVHOST)
+        if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
+                if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
+                        nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
+                        __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
+                                        g->syncpt_unit_base, nr_pages);
+                }
+        }
+#endif
+        if (g->ops.fifo.channel_resume) {
+                g->ops.fifo.channel_resume(g);
+        }
+done:
+        if (err) {
+                g->power_on = false;
+        }
+        return err;
+}
+int gk20a_wait_for_idle(struct gk20a *g)
+{
+        int wait_length = 150; /* 3 second overall max wait. */
+        int target_usage_count = 0;
+        if (!g) {
+                return -ENODEV;
+        }
+        while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
+                        && (wait_length-- >= 0)) {
+                nvgpu_msleep(20);
+        }
+        if (wait_length < 0) {
+                nvgpu_warn(g, "Timed out waiting for idle (%d)!\n",
+                           nvgpu_atomic_read(&g->usage_count));
+                return -ETIMEDOUT;
+        }
+        return 0;
+}
+int gk20a_init_gpu_characteristics(struct gk20a *g)
+{
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
+        if (IS_ENABLED(CONFIG_SYNC)) {
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
+        }
+        if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) {
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
+        }
+        /*
+         * Fast submits are supported as long as the user doesn't request
+         * anything that depends on job tracking. (Here, fast means strictly no
+         * metadata, just the gpfifo contents are copied and gp_put updated).
+         */
+        __nvgpu_set_enabled(g,
+                        NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
+                        true);
+        /*
+         * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
+         * and other heavy stuff, which prevents deterministic submits. This is
+         * supported otherwise, provided that the user doesn't request anything
+         * that depends on deferred cleanup.
+         */
+        if (!nvgpu_channel_sync_needs_os_fence_framework(g)) {
+                __nvgpu_set_enabled(g,
+                                NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
+                                true);
+        }
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
+        if (g->ops.clk_arb.get_arbiter_clk_domains != NULL &&
+                g->ops.clk.support_clk_freq_controller) {
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
+        }
+        g->ops.gr.detect_sm_arch(g);
+        if (g->ops.gr.init_cyclestats) {
+                g->ops.gr.init_cyclestats(g);
+        }
+        g->ops.gr.get_rop_l2_en_mask(g);
+        return 0;
+}
+/*
+ * Free the gk20a struct.
+ */
+static void gk20a_free_cb(struct nvgpu_ref *refcount)
+{
+        struct gk20a *g = container_of(refcount,
+                struct gk20a, refcount);
+        nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!");
+        gk20a_ce_destroy(g);
+        if (g->remove_support) {
+                g->remove_support(g);
+        }
+        if (g->free) {
+                g->free(g);
+        }
+}
+/**
+ * gk20a_get() - Increment ref count on driver
+ *
+ * @g The driver to increment
+ * This will fail if the driver is in the process of being released. In that
+ * case it will return NULL. Otherwise a pointer to the driver passed in will
+ * be returned.
+ */
+struct gk20a * __must_check gk20a_get(struct gk20a *g)
+{
+        int success;
+        /*
+         * Handle the possibility we are still freeing the gk20a struct while
+         * gk20a_get() is called. Unlikely but plausible race condition. Ideally
+         * the code will never be in such a situation that this race is
+         * possible.
+         */
+        success = nvgpu_ref_get_unless_zero(&g->refcount);
+        nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s",
+                nvgpu_atomic_read(&g->refcount.refcount),
+                        success ? "" : "(FAILED)");
+        return success ? g : NULL;
+}
+/**
+ * gk20a_put() - Decrement ref count on driver
+ *
+ * @g - The driver to decrement
+ *
+ * Decrement the driver ref-count. If neccesary also free the underlying driver
+ * memory
+ */
+void gk20a_put(struct gk20a *g)
+{
+        /*
+         * Note - this is racy, two instances of this could run before the
+         * actual kref_put(0 runs, you could see something like:
+         *
+         *  ... PUT: refs currently 2
+         *  ... PUT: refs currently 2
+         *  ... Freeing GK20A struct!
+         */
+        nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d",
+                nvgpu_atomic_read(&g->refcount.refcount));
+        nvgpu_ref_put(&g->refcount, gk20a_free_cb);
+}

diff --git a/include/gk20a/gk20a.c b/include/gk20a/gk20a.c new file mode 100644 index 0000000..c3068b7 --- /dev/null +++ b/include/gk20a/gk20a.c
@@ -0,0 +1,590 @@
	1	/*
	2	* GK20A Graphics
	3	*
	4	* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	22	* DEALINGS IN THE SOFTWARE.
	23	*/
	24
	25	#include <nvgpu/nvgpu_common.h>
	26	#include <nvgpu/kmem.h>
	27	#include <nvgpu/allocator.h>
	28	#include <nvgpu/timers.h>
	29	#include <nvgpu/soc.h>
	30	#include <nvgpu/enabled.h>
	31	#include <nvgpu/pmu.h>
	32	#include <nvgpu/gmmu.h>
	33	#include <nvgpu/ltc.h>
	34	#include <nvgpu/vidmem.h>
	35	#include <nvgpu/mm.h>
	36	#include <nvgpu/ctxsw_trace.h>
	37	#include <nvgpu/soc.h>
	38	#include <nvgpu/clk_arb.h>
	39	#include <nvgpu/therm.h>
	40	#include <nvgpu/mc.h>
	41	#include <nvgpu/channel_sync.h>
	42
	43	#include <trace/events/gk20a.h>
	44
	45	#include "gk20a.h"
	46
	47	#include "dbg_gpu_gk20a.h"
	48	#include "pstate/pstate.h"
	49
	50	void __nvgpu_check_gpu_state(struct gk20a *g)
	51	{
	52	u32 boot_0 = 0xffffffff;
	53
	54	boot_0 = nvgpu_mc_boot_0(g, NULL, NULL, NULL);
	55	if (boot_0 == 0xffffffff) {
	56	nvgpu_err(g, "GPU has disappeared from bus!!");
	57	nvgpu_err(g, "Rebooting system!!");
	58	nvgpu_kernel_restart(NULL);
	59	}
	60	}
	61
	62	void __gk20a_warn_on_no_regs(void)
	63	{
	64	WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
	65	}
	66
	67	static void gk20a_mask_interrupts(struct gk20a *g)
	68	{
	69	if (g->ops.mc.intr_mask != NULL) {
	70	g->ops.mc.intr_mask(g);
	71	}
	72
	73	if (g->ops.mc.log_pending_intrs != NULL) {
	74	g->ops.mc.log_pending_intrs(g);
	75	}
	76	}
	77
	78	int gk20a_prepare_poweroff(struct gk20a *g)
	79	{
	80	int ret = 0;
	81
	82	nvgpu_log_fn(g, " ");
	83
	84	if (g->ops.fifo.channel_suspend) {
	85	ret = g->ops.fifo.channel_suspend(g);
	86	if (ret) {
	87	return ret;
	88	}
	89	}
	90
	91	/* disable elpg before gr or fifo suspend */
	92	if (g->ops.pmu.is_pmu_supported(g)) {
	93	ret \|= nvgpu_pmu_destroy(g);
	94	}
	95
	96	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
	97	ret \|= nvgpu_sec2_destroy(g);
	98	}
	99
	100	ret \|= gk20a_gr_suspend(g);
	101	ret \|= nvgpu_mm_suspend(g);
	102	ret \|= gk20a_fifo_suspend(g);
	103
	104	gk20a_ce_suspend(g);
	105
	106	/* Disable GPCPLL */
	107	if (g->ops.clk.suspend_clk_support) {
	108	ret \|= g->ops.clk.suspend_clk_support(g);
	109	}
	110
	111	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	112	gk20a_deinit_pstate_support(g);
	113	}
	114
	115	gk20a_mask_interrupts(g);
	116
	117	g->power_on = false;
	118
	119	return ret;
	120	}
	121
	122	int gk20a_finalize_poweron(struct gk20a *g)
	123	{
	124	int err = 0;
	125	#if defined(CONFIG_TEGRA_GK20A_NVHOST)
	126	u32 nr_pages;
	127	#endif
	128
	129	u32 fuse_status;
	130
	131	nvgpu_log_fn(g, " ");
	132
	133	if (g->power_on) {
	134	return 0;
	135	}
	136
	137	g->power_on = true;
	138
	139	/*
	140	* Before probing the GPU make sure the GPU's state is cleared. This is
	141	* relevant for rebind operations.
	142	*/
	143	if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
	144	g->ops.xve.reset_gpu(g);
	145	g->gpu_reset_done = true;
	146	}
	147
	148	if (g->ops.clock_gating.slcg_acb_load_gating_prod != NULL) {
	149	g->ops.clock_gating.slcg_acb_load_gating_prod(g, true);
	150	}
	151
	152	/*
	153	* Do this early so any early VMs that get made are capable of mapping
	154	* buffers.
	155	*/
	156	err = nvgpu_pd_cache_init(g);
	157	if (err) {
	158	return err;
	159	}
	160
	161	/* init interface layer support for PMU falcon */
	162	err = nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
	163	if (err != 0) {
	164	nvgpu_err(g, "failed to sw init FALCON_ID_PMU");
	165	goto done;
	166	}
	167	err = nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
	168	if (err != 0) {
	169	nvgpu_err(g, "failed to sw init FALCON_ID_SEC2");
	170	goto done;
	171	}
	172	err = nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
	173	if (err != 0) {
	174	nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC");
	175	goto done;
	176	}
	177	err = nvgpu_flcn_sw_init(g, FALCON_ID_GSPLITE);
	178	if (err != 0) {
	179	nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE");
	180	goto done;
	181	}
	182
	183	if (g->ops.acr.acr_sw_init != NULL &&
	184	nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
	185	g->ops.acr.acr_sw_init(g, &g->acr);
	186	}
	187
	188	if (g->ops.bios.init) {
	189	err = g->ops.bios.init(g);
	190	}
	191	if (err) {
	192	goto done;
	193	}
	194
	195	g->ops.bus.init_hw(g);
	196
	197	if (g->ops.clk.disable_slowboot) {
	198	g->ops.clk.disable_slowboot(g);
	199	}
	200
	201	g->ops.priv_ring.enable_priv_ring(g);
	202
	203	/* TBD: move this after graphics init in which blcg/slcg is enabled.
	204	This function removes SlowdownOnBoot which applies 32x divider
	205	on gpcpll bypass path. The purpose of slowdown is to save power
	206	during boot but it also significantly slows down gk20a init on
	207	simulation and emulation. We should remove SOB after graphics power
	208	saving features (blcg/slcg) are enabled. For now, do it here. */
	209	if (g->ops.clk.init_clk_support) {
	210	err = g->ops.clk.init_clk_support(g);
	211	if (err) {
	212	nvgpu_err(g, "failed to init gk20a clk");
	213	goto done;
	214	}
	215	}
	216
	217	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
	218	err = g->ops.nvlink.init(g);
	219	if (err) {
	220	nvgpu_err(g, "failed to init nvlink");
	221	goto done;
	222	}
	223	}
	224
	225	if (g->ops.fb.init_fbpa) {
	226	err = g->ops.fb.init_fbpa(g);
	227	if (err) {
	228	nvgpu_err(g, "failed to init fbpa");
	229	goto done;
	230	}
	231	}
	232
	233	if (g->ops.fb.mem_unlock) {
	234	err = g->ops.fb.mem_unlock(g);
	235	if (err) {
	236	nvgpu_err(g, "failed to unlock memory");
	237	goto done;
	238	}
	239	}
	240
	241	err = g->ops.fifo.reset_enable_hw(g);
	242
	243	if (err) {
	244	nvgpu_err(g, "failed to reset gk20a fifo");
	245	goto done;
	246	}
	247
	248	err = nvgpu_init_ltc_support(g);
	249	if (err) {
	250	nvgpu_err(g, "failed to init ltc");
	251	goto done;
	252	}
	253
	254	err = nvgpu_init_mm_support(g);
	255	if (err) {
	256	nvgpu_err(g, "failed to init gk20a mm");
	257	goto done;
	258	}
	259
	260	err = gk20a_init_fifo_support(g);
	261	if (err) {
	262	nvgpu_err(g, "failed to init gk20a fifo");
	263	goto done;
	264	}
	265
	266	if (g->ops.therm.elcg_init_idle_filters) {
	267	g->ops.therm.elcg_init_idle_filters(g);
	268	}
	269
	270	g->ops.mc.intr_enable(g);
	271
	272	/*
	273	* Power gate the chip as per the TPC PG mask
	274	* and the fuse_status register.
	275	* If TPC PG mask is invalid halt the GPU poweron.
	276	*/
	277	g->can_tpc_powergate = false;
	278	fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0);
	279
	280	if (g->ops.tpc.tpc_powergate) {
	281	err = g->ops.tpc.tpc_powergate(g, fuse_status);
	282	}
	283
	284	if (err) {
	285	nvgpu_err(g, "failed to power ON GPU");
	286	goto done;
	287	}
	288
	289	nvgpu_mutex_acquire(&g->tpc_pg_lock);
	290
	291	if (g->can_tpc_powergate) {
	292	if (g->ops.gr.powergate_tpc != NULL)
	293	g->ops.gr.powergate_tpc(g);
	294	}
	295
	296	err = gk20a_enable_gr_hw(g);
	297	if (err) {
	298	nvgpu_err(g, "failed to enable gr");
	299	nvgpu_mutex_release(&g->tpc_pg_lock);
	300	goto done;
	301	}
	302
	303	if (g->ops.pmu.is_pmu_supported(g)) {
	304	if (g->ops.pmu.prepare_ucode) {
	305	err = g->ops.pmu.prepare_ucode(g);
	306	}
	307	if (err) {
	308	nvgpu_err(g, "failed to init pmu ucode");
	309	nvgpu_mutex_release(&g->tpc_pg_lock);
	310	goto done;
	311	}
	312	}
	313
	314	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	315	err = gk20a_init_pstate_support(g);
	316	if (err) {
	317	nvgpu_err(g, "failed to init pstates");
	318	nvgpu_mutex_release(&g->tpc_pg_lock);
	319	goto done;
	320	}
	321	}
	322
	323	if (g->acr.bootstrap_hs_acr != NULL &&
	324	nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
	325	err = g->acr.bootstrap_hs_acr(g, &g->acr, &g->acr.acr);
	326	if (err != 0) {
	327	nvgpu_err(g, "ACR bootstrap failed");
	328	nvgpu_mutex_release(&g->tpc_pg_lock);
	329	goto done;
	330	}
	331	}
	332
	333	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
	334	err = nvgpu_init_sec2_support(g);
	335	if (err != 0) {
	336	nvgpu_err(g, "failed to init sec2");
	337	nvgpu_mutex_release(&g->tpc_pg_lock);
	338	goto done;
	339	}
	340	}
	341
	342	if (g->ops.pmu.is_pmu_supported(g)) {
	343	err = nvgpu_init_pmu_support(g);
	344	if (err) {
	345	nvgpu_err(g, "failed to init gk20a pmu");
	346	nvgpu_mutex_release(&g->tpc_pg_lock);
	347	goto done;
	348	}
	349	}
	350
	351	err = gk20a_init_gr_support(g);
	352	if (err) {
	353	nvgpu_err(g, "failed to init gk20a gr");
	354	nvgpu_mutex_release(&g->tpc_pg_lock);
	355	goto done;
	356	}
	357
	358	nvgpu_mutex_release(&g->tpc_pg_lock);
	359
	360	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	361	err = gk20a_init_pstate_pmu_support(g);
	362	if (err) {
	363	nvgpu_err(g, "failed to init pstates");
	364	goto done;
	365	}
	366	}
	367
	368	if (g->ops.pmu_ver.clk.clk_set_boot_clk && nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	369	g->ops.pmu_ver.clk.clk_set_boot_clk(g);
	370	} else {
	371	err = nvgpu_clk_arb_init_arbiter(g);
	372	if (err) {
	373	nvgpu_err(g, "failed to init clk arb");
	374	goto done;
	375	}
	376	}
	377
	378	err = nvgpu_init_therm_support(g);
	379	if (err) {
	380	nvgpu_err(g, "failed to init gk20a therm");
	381	goto done;
	382	}
	383
	384	err = g->ops.chip_init_gpu_characteristics(g);
	385	if (err) {
	386	nvgpu_err(g, "failed to init gk20a gpu characteristics");
	387	goto done;
	388	}
	389
	390	#ifdef CONFIG_GK20A_CTXSW_TRACE
	391	err = gk20a_ctxsw_trace_init(g);
	392	if (err)
	393	nvgpu_warn(g, "could not initialize ctxsw tracing");
	394	#endif
	395
	396	/* Restore the debug setting */
	397	g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
	398
	399	gk20a_init_ce_support(g);
	400
	401	if (g->ops.xve.available_speeds) {
	402	u32 speed;
	403
	404	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm) {
	405	g->ops.xve.disable_aspm(g);
	406	}
	407
	408	g->ops.xve.available_speeds(g, &speed);
	409
	410	/* Set to max speed */
	411	speed = 1 << (fls(speed) - 1);
	412	err = g->ops.xve.set_speed(g, speed);
	413	if (err) {
	414	nvgpu_err(g, "Failed to set PCIe bus speed!");
	415	goto done;
	416	}
	417	}
	418
	419	#if defined(CONFIG_TEGRA_GK20A_NVHOST)
	420	if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
	421	if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
	422	nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
	423	__nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
	424	g->syncpt_unit_base, nr_pages);
	425	}
	426	}
	427	#endif
	428
	429	if (g->ops.fifo.channel_resume) {
	430	g->ops.fifo.channel_resume(g);
	431	}
	432
	433	done:
	434	if (err) {
	435	g->power_on = false;
	436	}
	437
	438	return err;
	439	}
	440
	441	int gk20a_wait_for_idle(struct gk20a *g)
	442	{
	443	int wait_length = 150; /* 3 second overall max wait. */
	444	int target_usage_count = 0;
	445
	446	if (!g) {
	447	return -ENODEV;
	448	}
	449
	450	while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
	451	&& (wait_length-- >= 0)) {
	452	nvgpu_msleep(20);
	453	}
	454
	455	if (wait_length < 0) {
	456	nvgpu_warn(g, "Timed out waiting for idle (%d)!\n",
	457	nvgpu_atomic_read(&g->usage_count));
	458	return -ETIMEDOUT;
	459	}
	460
	461	return 0;
	462	}
	463
	464	int gk20a_init_gpu_characteristics(struct gk20a *g)
	465	{
	466	__nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
	467	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
	468	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
	469
	470	if (IS_ENABLED(CONFIG_SYNC)) {
	471	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
	472	}
	473
	474	if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) {
	475	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
	476	}
	477
	478	/*
	479	* Fast submits are supported as long as the user doesn't request
	480	* anything that depends on job tracking. (Here, fast means strictly no
	481	* metadata, just the gpfifo contents are copied and gp_put updated).
	482	*/
	483	__nvgpu_set_enabled(g,
	484	NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
	485	true);
	486
	487	/*
	488	* Sync framework requires deferred job cleanup, wrapping syncs in FDs,
	489	* and other heavy stuff, which prevents deterministic submits. This is
	490	* supported otherwise, provided that the user doesn't request anything
	491	* that depends on deferred cleanup.
	492	*/
	493	if (!nvgpu_channel_sync_needs_os_fence_framework(g)) {
	494	__nvgpu_set_enabled(g,
	495	NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
	496	true);
	497	}
	498
	499	__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
	500
	501	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
	502	__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
	503
	504	if (g->ops.clk_arb.get_arbiter_clk_domains != NULL &&
	505	g->ops.clk.support_clk_freq_controller) {
	506	__nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
	507	}
	508
	509	g->ops.gr.detect_sm_arch(g);
	510
	511	if (g->ops.gr.init_cyclestats) {
	512	g->ops.gr.init_cyclestats(g);
	513	}
	514
	515	g->ops.gr.get_rop_l2_en_mask(g);
	516
	517	return 0;
	518	}
	519
	520	/*
	521	* Free the gk20a struct.
	522	*/
	523	static void gk20a_free_cb(struct nvgpu_ref *refcount)
	524	{
	525	struct gk20a *g = container_of(refcount,
	526	struct gk20a, refcount);
	527
	528	nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!");
	529
	530	gk20a_ce_destroy(g);
	531
	532	if (g->remove_support) {
	533	g->remove_support(g);
	534	}
	535
	536	if (g->free) {
	537	g->free(g);
	538	}
	539	}
	540
	541	/**
	542	* gk20a_get() - Increment ref count on driver
	543	*
	544	* @g The driver to increment
	545	* This will fail if the driver is in the process of being released. In that
	546	* case it will return NULL. Otherwise a pointer to the driver passed in will
	547	* be returned.
	548	*/
	549	struct gk20a * __must_check gk20a_get(struct gk20a *g)
	550	{
	551	int success;
	552
	553	/*
	554	* Handle the possibility we are still freeing the gk20a struct while
	555	* gk20a_get() is called. Unlikely but plausible race condition. Ideally
	556	* the code will never be in such a situation that this race is
	557	* possible.
	558	*/
	559	success = nvgpu_ref_get_unless_zero(&g->refcount);
	560
	561	nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s",
	562	nvgpu_atomic_read(&g->refcount.refcount),
	563	success ? "" : "(FAILED)");
	564
	565	return success ? g : NULL;
	566	}
	567
	568	/**
	569	* gk20a_put() - Decrement ref count on driver
	570	*
	571	* @g - The driver to decrement
	572	*
	573	* Decrement the driver ref-count. If neccesary also free the underlying driver
	574	* memory
	575	*/
	576	void gk20a_put(struct gk20a *g)
	577	{
	578	/*
	579	* Note - this is racy, two instances of this could run before the
	580	* actual kref_put(0 runs, you could see something like:
	581	*
	582	* ... PUT: refs currently 2
	583	* ... PUT: refs currently 2
	584	* ... Freeing GK20A struct!
	585	*/
	586	nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d",
	587	nvgpu_atomic_read(&g->refcount.refcount));
	588
	589	nvgpu_ref_put(&g->refcount, gk20a_free_cb);
	590	}