1 files changed, 503 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
new file mode 100644
index 00000000..5dc60917
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,503 @@
+/*
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/reboot.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/ctxsw_trace.h>
+#include <trace/events/gk20a.h>
+#include "gk20a.h"
+#include "channel_sync_gk20a.h"
+#include "dbg_gpu_gk20a.h"
+#include "mc_gk20a.h"
+#include "hal.h"
+#include "vgpu/vgpu.h"
+#include "bus_gk20a.h"
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+#include "pstate/pstate.h"
+#endif
+#ifdef CONFIG_TEGRA_19x_GPU
+#include "nvgpu_gpuid_t19x.h"
+#endif
+void __nvgpu_check_gpu_state(struct gk20a *g)
+{
+        u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL);
+        if (boot_0 == 0xffffffff) {
+                pr_err("nvgpu: GPU has disappeared from bus!!\n");
+                pr_err("nvgpu: Rebooting system!!\n");
+                kernel_restart(NULL);
+        }
+}
+void __gk20a_warn_on_no_regs(void)
+{
+        WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
+}
+static int gk20a_detect_chip(struct gk20a *g)
+{
+        struct nvgpu_gpu_params *p = &g->params;
+        u32 val;
+        if (p->gpu_arch)
+                return 0;
+        val = gk20a_mc_boot_0(g, &p->gpu_arch, &p->gpu_impl, &p->gpu_rev);
+        gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
+                        g->params.gpu_arch,
+                        g->params.gpu_impl,
+                        g->params.gpu_rev);
+        return gpu_init_hal(g);
+}
+int gk20a_prepare_poweroff(struct gk20a *g)
+{
+        int ret = 0;
+        gk20a_dbg_fn("");
+        ret = gk20a_channel_suspend(g);
+        if (ret)
+                return ret;
+        /* disable elpg before gr or fifo suspend */
+        if (g->ops.pmu.is_pmu_supported(g))
+                ret |= nvgpu_pmu_destroy(g);
+        ret |= gk20a_gr_suspend(g);
+        ret |= nvgpu_mm_suspend(g);
+        ret |= gk20a_fifo_suspend(g);
+        gk20a_ce_suspend(g);
+        /* Disable GPCPLL */
+        if (g->ops.clk.suspend_clk_support)
+                ret |= g->ops.clk.suspend_clk_support(g);
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
+                gk20a_deinit_pstate_support(g);
+#endif
+        g->power_on = false;
+        return ret;
+}
+int gk20a_finalize_poweron(struct gk20a *g)
+{
+        int err;
+#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
+        u32 nr_pages;
+#endif
+        gk20a_dbg_fn("");
+        if (g->power_on)
+                return 0;
+        g->power_on = true;
+        err = gk20a_detect_chip(g);
+        if (err)
+                goto done;
+        /*
+         * Before probing the GPU make sure the GPU's state is cleared. This is
+         * relevant for rebind operations.
+         */
+        if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
+                g->ops.xve.reset_gpu(g);
+                g->gpu_reset_done = true;
+        }
+        /*
+         * Do this early so any early VMs that get made are capable of mapping
+         * buffers.
+         */
+        err = nvgpu_pd_cache_init(g);
+        if (err)
+                return err;
+        /* init interface layer support for PMU falcon */
+        nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
+        nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
+        nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
+        if (g->ops.bios.init)
+                err = g->ops.bios.init(g);
+        if (err)
+                goto done;
+        g->ops.bus.init_hw(g);
+        if (g->ops.clk.disable_slowboot)
+                g->ops.clk.disable_slowboot(g);
+        gk20a_enable_priv_ring(g);
+        /* TBD: move this after graphics init in which blcg/slcg is enabled.
+           This function removes SlowdownOnBoot which applies 32x divider
+           on gpcpll bypass path. The purpose of slowdown is to save power
+           during boot but it also significantly slows down gk20a init on
+           simulation and emulation. We should remove SOB after graphics power
+           saving features (blcg/slcg) are enabled. For now, do it here. */
+        if (g->ops.clk.init_clk_support) {
+                err = g->ops.clk.init_clk_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init gk20a clk");
+                        goto done;
+                }
+        }
+        if (g->ops.fb.mem_unlock) {
+                err = g->ops.fb.mem_unlock(g);
+                if (err) {
+                        nvgpu_err(g, "failed to unlock memory");
+                        goto done;
+                }
+        }
+        err = g->ops.fifo.reset_enable_hw(g);
+        if (err) {
+                nvgpu_err(g, "failed to reset gk20a fifo");
+                goto done;
+        }
+        err = nvgpu_init_ltc_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init ltc");
+                goto done;
+        }
+        err = nvgpu_init_mm_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a mm");
+                goto done;
+        }
+        err = gk20a_init_fifo_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a fifo");
+                goto done;
+        }
+        if (g->ops.therm.elcg_init_idle_filters)
+                g->ops.therm.elcg_init_idle_filters(g);
+        g->ops.mc.intr_enable(g);
+        err = gk20a_enable_gr_hw(g);
+        if (err) {
+                nvgpu_err(g, "failed to enable gr");
+                goto done;
+        }
+        if (g->ops.pmu.is_pmu_supported(g)) {
+                if (g->ops.pmu.prepare_ucode)
+                        err = g->ops.pmu.prepare_ucode(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init pmu ucode");
+                        goto done;
+                }
+        }
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                err = gk20a_init_pstate_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init pstates");
+                        goto done;
+                }
+        }
+#endif
+        if (g->ops.pmu.is_pmu_supported(g)) {
+                err = nvgpu_init_pmu_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init gk20a pmu");
+                        goto done;
+                }
+        }
+        err = gk20a_init_gr_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gr");
+                goto done;
+        }
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
+                err = gk20a_init_pstate_pmu_support(g);
+                if (err) {
+                        nvgpu_err(g, "failed to init pstates");
+                        goto done;
+                }
+        }
+        err = nvgpu_clk_arb_init_arbiter(g);
+        if (err) {
+                nvgpu_err(g, "failed to init clk arb");
+                goto done;
+        }
+#endif
+        err = gk20a_init_therm_support(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a therm");
+                goto done;
+        }
+        err = g->ops.chip_init_gpu_characteristics(g);
+        if (err) {
+                nvgpu_err(g, "failed to init gk20a gpu characteristics");
+                goto done;
+        }
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+        err = gk20a_ctxsw_trace_init(g);
+        if (err)
+                nvgpu_warn(g, "could not initialize ctxsw tracing");
+#endif
+        /* Restore the debug setting */
+        g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
+        gk20a_channel_resume(g);
+        gk20a_init_ce_support(g);
+        nvgpu_init_mm_ce_context(g);
+        if (g->ops.xve.available_speeds) {
+                u32 speed;
+                if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm)
+                        g->ops.xve.disable_aspm(g);
+                g->ops.xve.available_speeds(g, &speed);
+                /* Set to max speed */
+                speed = 1 << (fls(speed) - 1);
+                err = g->ops.xve.set_speed(g, speed);
+                if (err) {
+                        nvgpu_err(g, "Failed to set PCIe bus speed!");
+                        goto done;
+                }
+        }
+        nvgpu_vidmem_thread_unpause(&g->mm);
+#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
+        if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) {
+                if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
+                        nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
+                        __nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
+                                        g->syncpt_unit_base, nr_pages);
+                }
+        }
+#endif
+done:
+        if (err)
+                g->power_on = false;
+        return err;
+}
+/*
+ * Check if the device can go busy. Basically if the driver is currently
+ * in the process of dying then do not let new places make the driver busy.
+ */
+int gk20a_can_busy(struct gk20a *g)
+{
+        if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
+                return 0;
+        return 1;
+}
+int gk20a_wait_for_idle(struct gk20a *g)
+{
+        int wait_length = 150; /* 3 second overall max wait. */
+        int target_usage_count = 0;
+        if (!g)
+                return -ENODEV;
+        if (g->user_railgate_disabled)
+                target_usage_count = 1;
+        while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
+                        && (wait_length-- >= 0))
+                nvgpu_msleep(20);
+        if (wait_length < 0) {
+                pr_warn("%s: Timed out waiting for idle (%d)!\n",
+                        __func__, nvgpu_atomic_read(&g->usage_count));
+                return -ETIMEDOUT;
+        }
+        return 0;
+}
+int gk20a_init_gpu_characteristics(struct gk20a *g)
+{
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
+        if (IS_ENABLED(CONFIG_SYNC))
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
+        if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g))
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
+        if (gk20a_platform_has_syncpoints(g))
+                __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, true);
+        /*
+         * Fast submits are supported as long as the user doesn't request
+         * anything that depends on job tracking. (Here, fast means strictly no
+         * metadata, just the gpfifo contents are copied and gp_put updated).
+         */
+        __nvgpu_set_enabled(g,
+                        NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
+                        true);
+        /*
+         * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
+         * and other heavy stuff, which prevents deterministic submits. This is
+         * supported otherwise, provided that the user doesn't request anything
+         * that depends on deferred cleanup.
+         */
+        if (!gk20a_channel_sync_needs_sync_framework(g))
+                __nvgpu_set_enabled(g,
+                                NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
+                                true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
+        if (g->ops.clk_arb.get_arbiter_clk_domains)
+                __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
+        g->ops.gr.detect_sm_arch(g);
+        if (g->ops.gr.init_cyclestats)
+                g->ops.gr.init_cyclestats(g);
+        g->ops.gr.get_rop_l2_en_mask(g);
+        return 0;
+}
+/*
+ * Free the gk20a struct.
+ */
+static void gk20a_free_cb(struct nvgpu_ref *refcount)
+{
+        struct gk20a *g = container_of(refcount,
+                struct gk20a, refcount);
+        gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
+        gk20a_ce_destroy(g);
+        if (g->remove_support)
+                g->remove_support(g);
+        if (g->free)
+                g->free(g);
+}
+/**
+ * gk20a_get() - Increment ref count on driver
+ *
+ * @g The driver to increment
+ * This will fail if the driver is in the process of being released. In that
+ * case it will return NULL. Otherwise a pointer to the driver passed in will
+ * be returned.
+ */
+struct gk20a * __must_check gk20a_get(struct gk20a *g)
+{
+        int success;
+        /*
+         * Handle the possibility we are still freeing the gk20a struct while
+         * gk20a_get() is called. Unlikely but plausible race condition. Ideally
+         * the code will never be in such a situation that this race is
+         * possible.
+         */
+        success = nvgpu_ref_get_unless_zero(&g->refcount);
+        gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s",
+                nvgpu_atomic_read(&g->refcount.refcount),
+                        success ? "" : "(FAILED)");
+        return success ? g : NULL;
+}
+/**
+ * gk20a_put() - Decrement ref count on driver
+ *
+ * @g - The driver to decrement
+ *
+ * Decrement the driver ref-count. If neccesary also free the underlying driver
+ * memory
+ */
+void gk20a_put(struct gk20a *g)
+{
+        /*
+         * Note - this is racy, two instances of this could run before the
+         * actual kref_put(0 runs, you could see something like:
+         *
+         *  ... PUT: refs currently 2
+         *  ... PUT: refs currently 2
+         *  ... Freeing GK20A struct!
+         */
+        gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d",
+                nvgpu_atomic_read(&g->refcount.refcount));
+        nvgpu_ref_put(&g->refcount, gk20a_free_cb);
+}

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c new file mode 100644 index 00000000..5dc60917 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,503 @@
	1	/*
	2	* GK20A Graphics
	3	*
	4	* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	22	* DEALINGS IN THE SOFTWARE.
	23	*/
	24
	25	#include <linux/reboot.h>
	26
	27	#include <nvgpu/nvgpu_common.h>
	28	#include <nvgpu/kmem.h>
	29	#include <nvgpu/allocator.h>
	30	#include <nvgpu/timers.h>
	31	#include <nvgpu/soc.h>
	32	#include <nvgpu/enabled.h>
	33	#include <nvgpu/pmu.h>
	34	#include <nvgpu/gmmu.h>
	35	#include <nvgpu/ltc.h>
	36	#include <nvgpu/vidmem.h>
	37	#include <nvgpu/mm.h>
	38	#include <nvgpu/ctxsw_trace.h>
	39
	40	#include <trace/events/gk20a.h>
	41
	42	#include "gk20a.h"
	43	#include "channel_sync_gk20a.h"
	44
	45	#include "dbg_gpu_gk20a.h"
	46	#include "mc_gk20a.h"
	47	#include "hal.h"
	48	#include "vgpu/vgpu.h"
	49	#include "bus_gk20a.h"
	50	#ifdef CONFIG_ARCH_TEGRA_18x_SOC
	51	#include "pstate/pstate.h"
	52	#endif
	53
	54	#ifdef CONFIG_TEGRA_19x_GPU
	55	#include "nvgpu_gpuid_t19x.h"
	56	#endif
	57
	58	void __nvgpu_check_gpu_state(struct gk20a *g)
	59	{
	60	u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL);
	61
	62	if (boot_0 == 0xffffffff) {
	63	pr_err("nvgpu: GPU has disappeared from bus!!\n");
	64	pr_err("nvgpu: Rebooting system!!\n");
	65	kernel_restart(NULL);
	66	}
	67	}
	68
	69	void __gk20a_warn_on_no_regs(void)
	70	{
	71	WARN_ONCE(1, "Attempted access to GPU regs after unmapping!");
	72	}
	73
	74	static int gk20a_detect_chip(struct gk20a *g)
	75	{
	76	struct nvgpu_gpu_params *p = &g->params;
	77	u32 val;
	78
	79	if (p->gpu_arch)
	80	return 0;
	81
	82	val = gk20a_mc_boot_0(g, &p->gpu_arch, &p->gpu_impl, &p->gpu_rev);
	83
	84	gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
	85	g->params.gpu_arch,
	86	g->params.gpu_impl,
	87	g->params.gpu_rev);
	88
	89	return gpu_init_hal(g);
	90	}
	91
	92	int gk20a_prepare_poweroff(struct gk20a *g)
	93	{
	94	int ret = 0;
	95
	96	gk20a_dbg_fn("");
	97
	98	ret = gk20a_channel_suspend(g);
	99	if (ret)
	100	return ret;
	101
	102	/* disable elpg before gr or fifo suspend */
	103	if (g->ops.pmu.is_pmu_supported(g))
	104	ret \|= nvgpu_pmu_destroy(g);
	105
	106	ret \|= gk20a_gr_suspend(g);
	107	ret \|= nvgpu_mm_suspend(g);
	108	ret \|= gk20a_fifo_suspend(g);
	109
	110	gk20a_ce_suspend(g);
	111
	112	/* Disable GPCPLL */
	113	if (g->ops.clk.suspend_clk_support)
	114	ret \|= g->ops.clk.suspend_clk_support(g);
	115
	116	#ifdef CONFIG_ARCH_TEGRA_18x_SOC
	117	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
	118	gk20a_deinit_pstate_support(g);
	119	#endif
	120	g->power_on = false;
	121
	122	return ret;
	123	}
	124
	125	int gk20a_finalize_poweron(struct gk20a *g)
	126	{
	127	int err;
	128	#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
	129	u32 nr_pages;
	130	#endif
	131
	132	gk20a_dbg_fn("");
	133
	134	if (g->power_on)
	135	return 0;
	136
	137	g->power_on = true;
	138
	139	err = gk20a_detect_chip(g);
	140	if (err)
	141	goto done;
	142
	143	/*
	144	* Before probing the GPU make sure the GPU's state is cleared. This is
	145	* relevant for rebind operations.
	146	*/
	147	if (g->ops.xve.reset_gpu && !g->gpu_reset_done) {
	148	g->ops.xve.reset_gpu(g);
	149	g->gpu_reset_done = true;
	150	}
	151
	152	/*
	153	* Do this early so any early VMs that get made are capable of mapping
	154	* buffers.
	155	*/
	156	err = nvgpu_pd_cache_init(g);
	157	if (err)
	158	return err;
	159
	160	/* init interface layer support for PMU falcon */
	161	nvgpu_flcn_sw_init(g, FALCON_ID_PMU);
	162	nvgpu_flcn_sw_init(g, FALCON_ID_SEC2);
	163	nvgpu_flcn_sw_init(g, FALCON_ID_NVDEC);
	164
	165	if (g->ops.bios.init)
	166	err = g->ops.bios.init(g);
	167	if (err)
	168	goto done;
	169
	170	g->ops.bus.init_hw(g);
	171
	172	if (g->ops.clk.disable_slowboot)
	173	g->ops.clk.disable_slowboot(g);
	174
	175	gk20a_enable_priv_ring(g);
	176
	177	/* TBD: move this after graphics init in which blcg/slcg is enabled.
	178	This function removes SlowdownOnBoot which applies 32x divider
	179	on gpcpll bypass path. The purpose of slowdown is to save power
	180	during boot but it also significantly slows down gk20a init on
	181	simulation and emulation. We should remove SOB after graphics power
	182	saving features (blcg/slcg) are enabled. For now, do it here. */
	183	if (g->ops.clk.init_clk_support) {
	184	err = g->ops.clk.init_clk_support(g);
	185	if (err) {
	186	nvgpu_err(g, "failed to init gk20a clk");
	187	goto done;
	188	}
	189	}
	190
	191	if (g->ops.fb.mem_unlock) {
	192	err = g->ops.fb.mem_unlock(g);
	193	if (err) {
	194	nvgpu_err(g, "failed to unlock memory");
	195	goto done;
	196	}
	197	}
	198
	199	err = g->ops.fifo.reset_enable_hw(g);
	200
	201	if (err) {
	202	nvgpu_err(g, "failed to reset gk20a fifo");
	203	goto done;
	204	}
	205
	206	err = nvgpu_init_ltc_support(g);
	207	if (err) {
	208	nvgpu_err(g, "failed to init ltc");
	209	goto done;
	210	}
	211
	212	err = nvgpu_init_mm_support(g);
	213	if (err) {
	214	nvgpu_err(g, "failed to init gk20a mm");
	215	goto done;
	216	}
	217
	218	err = gk20a_init_fifo_support(g);
	219	if (err) {
	220	nvgpu_err(g, "failed to init gk20a fifo");
	221	goto done;
	222	}
	223
	224	if (g->ops.therm.elcg_init_idle_filters)
	225	g->ops.therm.elcg_init_idle_filters(g);
	226
	227	g->ops.mc.intr_enable(g);
	228
	229	err = gk20a_enable_gr_hw(g);
	230	if (err) {
	231	nvgpu_err(g, "failed to enable gr");
	232	goto done;
	233	}
	234
	235	if (g->ops.pmu.is_pmu_supported(g)) {
	236	if (g->ops.pmu.prepare_ucode)
	237	err = g->ops.pmu.prepare_ucode(g);
	238	if (err) {
	239	nvgpu_err(g, "failed to init pmu ucode");
	240	goto done;
	241	}
	242	}
	243
	244	#ifdef CONFIG_ARCH_TEGRA_18x_SOC
	245	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	246	err = gk20a_init_pstate_support(g);
	247	if (err) {
	248	nvgpu_err(g, "failed to init pstates");
	249	goto done;
	250	}
	251	}
	252	#endif
	253
	254	if (g->ops.pmu.is_pmu_supported(g)) {
	255	err = nvgpu_init_pmu_support(g);
	256	if (err) {
	257	nvgpu_err(g, "failed to init gk20a pmu");
	258	goto done;
	259	}
	260	}
	261
	262	err = gk20a_init_gr_support(g);
	263	if (err) {
	264	nvgpu_err(g, "failed to init gk20a gr");
	265	goto done;
	266	}
	267
	268	#ifdef CONFIG_ARCH_TEGRA_18x_SOC
	269	if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
	270	err = gk20a_init_pstate_pmu_support(g);
	271	if (err) {
	272	nvgpu_err(g, "failed to init pstates");
	273	goto done;
	274	}
	275	}
	276
	277	err = nvgpu_clk_arb_init_arbiter(g);
	278	if (err) {
	279	nvgpu_err(g, "failed to init clk arb");
	280	goto done;
	281	}
	282	#endif
	283
	284	err = gk20a_init_therm_support(g);
	285	if (err) {
	286	nvgpu_err(g, "failed to init gk20a therm");
	287	goto done;
	288	}
	289
	290	err = g->ops.chip_init_gpu_characteristics(g);
	291	if (err) {
	292	nvgpu_err(g, "failed to init gk20a gpu characteristics");
	293	goto done;
	294	}
	295
	296	#ifdef CONFIG_GK20A_CTXSW_TRACE
	297	err = gk20a_ctxsw_trace_init(g);
	298	if (err)
	299	nvgpu_warn(g, "could not initialize ctxsw tracing");
	300	#endif
	301
	302	/* Restore the debug setting */
	303	g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);
	304
	305	gk20a_channel_resume(g);
	306
	307	gk20a_init_ce_support(g);
	308
	309	nvgpu_init_mm_ce_context(g);
	310
	311	if (g->ops.xve.available_speeds) {
	312	u32 speed;
	313
	314	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && g->ops.xve.disable_aspm)
	315	g->ops.xve.disable_aspm(g);
	316
	317	g->ops.xve.available_speeds(g, &speed);
	318
	319	/* Set to max speed */
	320	speed = 1 << (fls(speed) - 1);
	321	err = g->ops.xve.set_speed(g, speed);
	322	if (err) {
	323	nvgpu_err(g, "Failed to set PCIe bus speed!");
	324	goto done;
	325	}
	326	}
	327
	328	nvgpu_vidmem_thread_unpause(&g->mm);
	329
	330	#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
	331	if (gk20a_platform_has_syncpoints(g) && g->syncpt_unit_size) {
	332	if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
	333	nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
	334	__nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
	335	g->syncpt_unit_base, nr_pages);
	336	}
	337	}
	338	#endif
	339
	340	done:
	341	if (err)
	342	g->power_on = false;
	343
	344	return err;
	345	}
	346
	347	/*
	348	* Check if the device can go busy. Basically if the driver is currently
	349	* in the process of dying then do not let new places make the driver busy.
	350	*/
	351	int gk20a_can_busy(struct gk20a *g)
	352	{
	353	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
	354	return 0;
	355	return 1;
	356	}
	357
	358	int gk20a_wait_for_idle(struct gk20a *g)
	359	{
	360	int wait_length = 150; /* 3 second overall max wait. */
	361	int target_usage_count = 0;
	362
	363	if (!g)
	364	return -ENODEV;
	365
	366	if (g->user_railgate_disabled)
	367	target_usage_count = 1;
	368
	369	while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
	370	&& (wait_length-- >= 0))
	371	nvgpu_msleep(20);
	372
	373	if (wait_length < 0) {
	374	pr_warn("%s: Timed out waiting for idle (%d)!\n",
	375	__func__, nvgpu_atomic_read(&g->usage_count));
	376	return -ETIMEDOUT;
	377	}
	378
	379	return 0;
	380	}
	381
	382	int gk20a_init_gpu_characteristics(struct gk20a *g)
	383	{
	384	__nvgpu_set_enabled(g, NVGPU_SUPPORT_PARTIAL_MAPPINGS, true);
	385	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true);
	386	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true);
	387
	388	if (IS_ENABLED(CONFIG_SYNC))
	389	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNC_FENCE_FDS, true);
	390
	391	if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g))
	392	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true);
	393
	394	if (gk20a_platform_has_syncpoints(g))
	395	__nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, true);
	396
	397	/*
	398	* Fast submits are supported as long as the user doesn't request
	399	* anything that depends on job tracking. (Here, fast means strictly no
	400	* metadata, just the gpfifo contents are copied and gp_put updated).
	401	*/
	402	__nvgpu_set_enabled(g,
	403	NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
	404	true);
	405
	406	/*
	407	* Sync framework requires deferred job cleanup, wrapping syncs in FDs,
	408	* and other heavy stuff, which prevents deterministic submits. This is
	409	* supported otherwise, provided that the user doesn't request anything
	410	* that depends on deferred cleanup.
	411	*/
	412	if (!gk20a_channel_sync_needs_sync_framework(g))
	413	__nvgpu_set_enabled(g,
	414	NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
	415	true);
	416
	417	__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
	418
	419	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
	420	__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
	421
	422	if (g->ops.clk_arb.get_arbiter_clk_domains)
	423	__nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true);
	424
	425	g->ops.gr.detect_sm_arch(g);
	426
	427	if (g->ops.gr.init_cyclestats)
	428	g->ops.gr.init_cyclestats(g);
	429
	430	g->ops.gr.get_rop_l2_en_mask(g);
	431
	432	return 0;
	433	}
	434
	435	/*
	436	* Free the gk20a struct.
	437	*/
	438	static void gk20a_free_cb(struct nvgpu_ref *refcount)
	439	{
	440	struct gk20a *g = container_of(refcount,
	441	struct gk20a, refcount);
	442
	443	gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
	444
	445	gk20a_ce_destroy(g);
	446
	447	if (g->remove_support)
	448	g->remove_support(g);
	449
	450	if (g->free)
	451	g->free(g);
	452	}
	453
	454	/**
	455	* gk20a_get() - Increment ref count on driver
	456	*
	457	* @g The driver to increment
	458	* This will fail if the driver is in the process of being released. In that
	459	* case it will return NULL. Otherwise a pointer to the driver passed in will
	460	* be returned.
	461	*/
	462	struct gk20a * __must_check gk20a_get(struct gk20a *g)
	463	{
	464	int success;
	465
	466	/*
	467	* Handle the possibility we are still freeing the gk20a struct while
	468	* gk20a_get() is called. Unlikely but plausible race condition. Ideally
	469	* the code will never be in such a situation that this race is
	470	* possible.
	471	*/
	472	success = nvgpu_ref_get_unless_zero(&g->refcount);
	473
	474	gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s",
	475	nvgpu_atomic_read(&g->refcount.refcount),
	476	success ? "" : "(FAILED)");
	477
	478	return success ? g : NULL;
	479	}
	480
	481	/**
	482	* gk20a_put() - Decrement ref count on driver
	483	*
	484	* @g - The driver to decrement
	485	*
	486	* Decrement the driver ref-count. If neccesary also free the underlying driver
	487	* memory
	488	*/
	489	void gk20a_put(struct gk20a *g)
	490	{
	491	/*
	492	* Note - this is racy, two instances of this could run before the
	493	* actual kref_put(0 runs, you could see something like:
	494	*
	495	* ... PUT: refs currently 2
	496	* ... PUT: refs currently 2
	497	* ... Freeing GK20A struct!
	498	*/
	499	gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d",
	500	nvgpu_atomic_read(&g->refcount.refcount));
	501
	502	nvgpu_ref_put(&g->refcount, gk20a_free_cb);
	503	}