/*
* GK20A Graphics
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/reboot.h>
#include <linux/notifier.h>
#include <linux/platform/tegra/common.h>
#include <linux/pci.h>
#include <uapi/linux/nvgpu.h>
#include <dt-bindings/soc/gm20b-fuse.h>
#include <dt-bindings/soc/gp10b-fuse.h>
#include <dt-bindings/soc/gv11b-fuse.h>
#include <soc/tegra/fuse.h>
#include <nvgpu/hal_init.h>
#include <nvgpu/dma.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/soc.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/sim.h>
#include <nvgpu/clk_arb.h>
#include <nvgpu/timers.h>
#include <nvgpu/channel.h>
#include <nvgpu/nvgpu_err.h>
#include "platform_gk20a.h"
#include "sysfs.h"
#include "vgpu/vgpu_linux.h"
#include "scale.h"
#include "pci.h"
#include "module.h"
#include "module_usermode.h"
#include "intr.h"
#include "ioctl.h"
#include "ioctl_ctrl.h"
#include "os_linux.h"
#include "os_ops.h"
#include "ctxsw_trace.h"
#include "driver_common.h"
#include "channel.h"
#include "debug_pmgr.h"
#ifdef CONFIG_NVGPU_SUPPORT_CDE
#include "cde.h"
#endif
#define CLASS_NAME "nvidia-gpu"
/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
#define GK20A_WAIT_FOR_IDLE_MS 2000
#define CREATE_TRACE_POINTS
#include <trace/events/gk20a.h>
static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
unsigned long event, void *unused)
{
struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb);
__nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
return NOTIFY_DONE;
}
struct device_node *nvgpu_get_node(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (dev_is_pci(dev)) {
struct pci_bus *bus = to_pci_dev(dev)->bus;
while (!pci_is_root_bus(bus))
bus = bus->parent;
return bus->bridge->parent->of_node;
}
return dev->of_node;
}
void gk20a_busy_noresume(struct gk20a *g)
{
pm_runtime_get_noresume(dev_from_gk20a(g));
}
/*
* Check if the device can go busy.
*/
static int nvgpu_can_busy(struct gk20a *g)
{
/* Can't do anything if the system is rebooting/shutting down. */
if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING))
return 0;
/* Can't do anything if the driver is restarting. */
if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
return 0;
return 1;
}
int gk20a_busy(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int ret = 0;
struct device *dev;
if (!g)
return -ENODEV;
atomic_inc(&g->usage_count.atomic_var);
down_read(&l->busy_lock);
if (!nvgpu_can_busy(g)) {
ret = -ENODEV;
atomic_dec(&g->usage_count.atomic_var);
goto fail;
}
dev = dev_from_gk20a(g);
if (pm_runtime_enabled(dev)) {
/* Increment usage count and attempt to resume device */
ret = pm_runtime_get_sync(dev);
if (ret < 0) {
/* Mark suspended so runtime pm will retry later */
pm_runtime_set_suspended(dev);
pm_runtime_put_noidle(dev);
atomic_dec(&g->usage_count.atomic_var);
goto fail;
}
} else {
ret = gk20a_gpu_is_virtual(dev) ?
vgpu_pm_finalize_poweron(dev) :
gk20a_pm_finalize_poweron(dev);
if (ret) {
atomic_dec(&g->usage_count.atomic_var);
goto fail;
}
}
fail:
up_read(&l->busy_lock);
return ret < 0 ? ret : 0;
}
void gk20a_idle_nosuspend(struct gk20a *g)
{
pm_runtime_put_noidle(dev_from_gk20a(g));
}
void gk20a_idle(struct gk20a *g)
{
struct device *dev;
atomic_dec(&g->usage_count.atomic_var);
dev = dev_from_gk20a(g);
if (!(dev && nvgpu_can_busy(g)))
return;
if (pm_runtime_enabled(dev)) {
pm_runtime_mark_last_busy(dev);
pm_runtime_put_sync_autosuspend(dev);
}
}
/*
* Undoes gk20a_lockout_registers().
*/
static int gk20a_restore_registers(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->regs = l->regs_saved;
l->bar1 = l->bar1_saved;
nvgpu_restore_usermode_registers(g);
return 0;
}
int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
{
struct gk20a *g = &l->g;
int err;
if (l->init_done)
return 0;
err = nvgpu_init_channel_support_linux(l);
if (err) {
nvgpu_err(g, "failed to init linux channel support");
return err;
}
if (l->ops.clk.init_debugfs) {
err = l->ops.clk.init_debugfs(g);
if (err) {
nvgpu_err(g, "failed to init linux clk debugfs");
return err;
}
}
if (l->ops.therm.init_debugfs) {
err = l->ops.therm.init_debugfs(g);
if (err) {
nvgpu_err(g, "failed to init linux therm debugfs");
return err;
}
}
if (l->ops.fecs_trace.init_debugfs) {
err = l->ops.fecs_trace.init_debugfs(g);
if (err) {
nvgpu_err(g, "failed to init linux fecs trace debugfs");
return err;
}
}
err = nvgpu_pmgr_init_debugfs_linux(l);
if (err) {
nvgpu_err(g, "failed to init linux pmgr debugfs");
return err;
}
l->init_done = true;
return 0;
}
bool gk20a_check_poweron(struct gk20a *g)
{
bool ret;
nvgpu_mutex_acquire(&g->power_lock);
ret = g->power_on;
nvgpu_mutex_release(&g->power_lock);
return ret;
}
int gk20a_pm_finalize_poweron(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
int err = 0;
nvgpu_log_fn(g, " ");
nvgpu_mutex_acquire(&g->power_lock);
if (g->power_on)
goto done;
trace_gk20a_finalize_poweron(dev_name(dev));
/* Increment platform power refcount */
if (platform->busy) {
err = platform->busy(dev);
if (err < 0) {
nvgpu_err(g, "failed to poweron platform dependency");
goto done;
}
}
err = gk20a_restore_registers(g);
if (err)
goto done;
nvgpu_restore_usermode_for_poweron(g);
/* Enable interrupt workqueue */
if (!l->nonstall_work_queue) {
l->nonstall_work_queue = alloc_workqueue("%s",
WQ_HIGHPRI, 1, "mc_nonstall");
INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb);
}
err = nvgpu_detect_chip(g);
if (err)
goto done;
if (g->sim) {
if (g->sim->sim_init_late)
g->sim->sim_init_late(g);
}
err = gk20a_finalize_poweron(g);
if (err)
goto done;
err = nvgpu_init_os_linux_ops(l);
if (err)
goto done;
err = nvgpu_finalize_poweron_linux(l);
if (err)
goto done;
nvgpu_init_mm_ce_context(g);
nvgpu_vidmem_thread_unpause(&g->mm);
/* Initialise scaling: it will initialize scaling drive only once */
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) &&
nvgpu_platform_is_silicon(g)) {
gk20a_scale_init(dev);
if (platform->initscale)
platform->initscale(dev);
}
trace_gk20a_finalize_poweron_done(dev_name(dev));
enable_irq(g->irq_stall);
if (g->irq_stall != g->irq_nonstall)
enable_irq(g->irq_nonstall);
g->irqs_enabled = 1;
gk20a_scale_resume(dev_from_gk20a(g));
#ifdef CONFIG_NVGPU_SUPPORT_CDE
if (platform->has_cde)
gk20a_init_cde_support(l);
#endif
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
nvgpu_enable_ecc_reporting(g);
#endif
err = gk20a_sched_ctrl_init(g);
if (err) {
nvgpu_err(g, "failed to init sched control");
goto done;
}
g->sw_ready = true;
done:
if (err) {
g->power_on = false;
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
nvgpu_disable_ecc_reporting(g);
#endif
}
nvgpu_mutex_release(&g->power_lock);
return err;
}
/*
* Locks out the driver from accessing GPU registers. This prevents access to
* thse registers after the GPU has been clock or power gated. This should help
* find annoying bugs where register reads and writes are silently dropped
* after the GPU has been turned off. On older chips these reads and writes can
* also lock the entire CPU up.
*/
static int gk20a_lockout_registers(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->regs = NULL;
l->bar1 = NULL;
nvgpu_lockout_usermode_registers(g);
return 0;
}
static int gk20a_pm_prepare_poweroff(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
#endif
struct gk20a_platform *platform = gk20a_get_platform(dev);
bool irqs_enabled;
int ret = 0;
nvgpu_log_fn(g, " ");
nvgpu_mutex_acquire(&g->power_lock);
if (!g->power_on)
goto done;
/* disable IRQs and wait for completion */
irqs_enabled = g->irqs_enabled;
if (irqs_enabled) {
disable_irq(g->irq_stall);
if (g->irq_stall != g->irq_nonstall)
disable_irq(g->irq_nonstall);
g->irqs_enabled = 0;
}
gk20a_scale_suspend(dev);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
gk20a_cde_suspend(l);
#endif
ret = gk20a_prepare_poweroff(g);
if (ret)
goto error;
/* Decrement platform power refcount */
if (platform->idle)
platform->idle(dev);
/* Stop CPU from accessing the GPU registers. */
gk20a_lockout_registers(g);
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
nvgpu_disable_ecc_reporting(g);
#endif
nvgpu_hide_usermode_for_poweroff(g);
nvgpu_mutex_release(&g->power_lock);
return 0;
error:
/* re-enabled IRQs if previously enabled */
if (irqs_enabled) {
enable_irq(g->irq_stall);
if (g->irq_stall != g->irq_nonstall)
enable_irq(g->irq_nonstall);
g->irqs_enabled = 1;
}
gk20a_scale_resume(dev);
done:
nvgpu_mutex_release(&g->power_lock);
return ret;
}
static struct of_device_id tegra_gk20a_of_match[] = {
#ifdef CONFIG_TEGRA_GK20A
{ .compatible = "nvidia,tegra210-gm20b",
.data = &gm20b_tegra_platform },
{ .compatible = "nvidia,tegra186-gp10b",
.data = &gp10b_tegra_platform },
{ .compatible = "nvidia,gv11b",
.data = &gv11b_tegra_platform },
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
{ .compatible = "nvidia,gv11b-vgpu",
.data = &gv11b_vgpu_tegra_platform},
#endif
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
{ .compatible = "nvidia,tegra124-gk20a-vgpu",
.data = &vgpu_tegra_platform },
#endif
#endif
{ },
};
MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match);
#ifdef CONFIG_PM
/**
* __gk20a_do_idle() - force the GPU to idle and railgate
*
* In success, this call MUST be balanced by caller with __gk20a_do_unidle()
*
* Acquires two locks : &l->busy_lock and &platform->railgate_lock
* In success, we hold these locks and return
* In failure, we release these locks and return
*/
int __gk20a_do_idle(struct gk20a *g, bool force_reset)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct nvgpu_timeout timeout;
int ref_cnt;
int target_ref_cnt = 0;
bool is_railgated;
int err = 0;
/*
* Hold back deterministic submits and changes to deterministic
* channels - this must be outside the power busy locks.
*/
gk20a_channel_deterministic_idle(g);
/* acquire busy lock to block other busy() calls */
down_write(&l->busy_lock);
/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
nvgpu_mutex_acquire(&platform->railgate_lock);
/* check if it is already railgated ? */
if (platform->is_railgated(dev))
return 0;
/*
* release railgate_lock, prevent suspend by incrementing usage counter,
* re-acquire railgate_lock
*/
nvgpu_mutex_release(&platform->railgate_lock);
pm_runtime_get_sync(dev);
/*
* One refcount taken in this API
* If User disables rail gating, we take one more
* extra refcount
*/
if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
target_ref_cnt = 1;
else
target_ref_cnt = 2;
nvgpu_mutex_acquire(&platform->railgate_lock);
nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
NVGPU_TIMER_CPU_TIMER);
/* check and wait until GPU is idle (with a timeout) */
do {
nvgpu_usleep_range(1000, 1100);
ref_cnt = atomic_read(&dev->power.usage_count);
} while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
if (ref_cnt != target_ref_cnt) {
nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
ref_cnt);
goto fail_drop_usage_count;
}
/* check if global force_reset flag is set */
force_reset |= platform->force_reset_in_do_idle;
nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
NVGPU_TIMER_CPU_TIMER);
if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) && !force_reset) {
/*
* Case 1 : GPU railgate is supported
*
* if GPU is now idle, we will have only one ref count,
* drop this ref which will rail gate the GPU
*/
pm_runtime_put_sync(dev);
/* add sufficient delay to allow GPU to rail gate */
nvgpu_msleep(g->railgate_delay);
/* check in loop if GPU is railgated or not */
do {
nvgpu_usleep_range(1000, 1100);
is_railgated = platform->is_railgated(dev);
} while (!is_railgated && !nvgpu_timeout_expired(&timeout));
if (is_railgated) {
return 0;
} else {
nvgpu_err(g, "failed to idle in timeout");
goto fail_timeout;
}
} else {
/*
* Case 2 : GPU railgate is not supported or we explicitly
* do not want to depend on runtime PM
*
* if GPU is now idle, call prepare_poweroff() to save the
* state and then do explicit railgate
*
* __gk20a_do_unidle() needs to unrailgate, call
* finalize_poweron(), and then call pm_runtime_put_sync()
* to balance the GPU usage counter
*/
/* Save the GPU state */
err = gk20a_pm_prepare_poweroff(dev);
if (err)
goto fail_drop_usage_count;
/* railgate GPU */
platform->railgate(dev);
nvgpu_udelay(10);
g->forced_reset = true;
return 0;
}
fail_drop_usage_count:
pm_runtime_put_noidle(dev);
fail_timeout:
nvgpu_mutex_release(&platform->railgate_lock);
up_write(&l->busy_lock);
gk20a_channel_deterministic_unidle(g);
return -EBUSY;
}
/**
* gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
* from outside of GPU driver
*
* In success, this call MUST be balanced by caller with gk20a_do_unidle()
*/
static int gk20a_do_idle(void *_g)
{
struct gk20a *g = (struct gk20a *)_g;
return __gk20a_do_idle(g, true);
}
/**
* __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
*/
int __gk20a_do_unidle(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
int err;
if (g->forced_reset) {
/*
* If we did a forced-reset/railgate
* then unrailgate the GPU here first
*/
platform->unrailgate(dev);
/* restore the GPU state */
err = gk20a_pm_finalize_poweron(dev);
if (err)
return err;
/* balance GPU usage counter */
pm_runtime_put_sync(dev);
g->forced_reset = false;
}
/* release the lock and open up all other busy() calls */
nvgpu_mutex_release(&platform->railgate_lock);
up_write(&l->busy_lock);
gk20a_channel_deterministic_unidle(g);
return 0;
}
/**
* gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
*/
static int gk20a_do_unidle(void *_g)
{
struct gk20a *g = (struct gk20a *)_g;
return __gk20a_do_unidle(g);
}
#endif
void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i,
struct resource **out)
{
struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
if (!r)
return NULL;
if (out)
*out = r;
return devm_ioremap_resource(&dev->dev, r);
}
void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset,
resource_size_t size)
{
return devm_ioremap(dev, offset, size);
}
u64 nvgpu_resource_addr(struct platform_device *dev, int i)
{
struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
if (!r)
return 0;
return r->start;
}
static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
{
struct gk20a *g = dev_id;
return nvgpu_intr_stall(g);
}
static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
{
struct gk20a *g = dev_id;
return nvgpu_intr_nonstall(g);
}
static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
{
struct gk20a *g = dev_id;
return nvgpu_intr_thread_stall(g);
}
void gk20a_remove_support(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct sim_nvgpu_linux *sim_linux;
tegra_unregister_idle_unidle(gk20a_do_idle);
nvgpu_kfree(g, g->dbg_regops_tmp_buf);
nvgpu_remove_channel_support_linux(l);
if (g->pmu.remove_support)
g->pmu.remove_support(&g->pmu);
if (g->acr.remove_support != NULL) {
g->acr.remove_support(&g->acr);
}
if (g->gr.remove_support)
g->gr.remove_support(&g->gr);
if (g->mm.remove_ce_support)
g->mm.remove_ce_support(&g->mm);
if (g->fifo.remove_support)
g->fifo.remove_support(&g->fifo);
if (g->mm.remove_support)
g->mm.remove_support(&g->mm);
if (g->sim) {
sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
if (g->sim->remove_support)
g->sim->remove_support(g);
if (sim_linux->remove_support_linux)
sim_linux->remove_support_linux(g);
}
nvgpu_remove_usermode_support(g);
nvgpu_free_enabled_flags(g);
gk20a_lockout_registers(g);
}
static int gk20a_init_support(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int err = -ENOMEM;
tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g);
l->regs = nvgpu_devm_ioremap_resource(pdev,
GK20A_BAR0_IORESOURCE_MEM,
&l->reg_mem);
if (IS_ERR(l->regs)) {
nvgpu_err(g, "failed to remap gk20a registers");
err = PTR_ERR(l->regs);
goto fail;
}
l->regs_bus_addr = nvgpu_resource_addr(pdev,
GK20A_BAR0_IORESOURCE_MEM);
if (!l->regs_bus_addr) {
nvgpu_err(g, "failed to read register bus offset");
err = -ENODEV;
goto fail;
}
l->bar1 = nvgpu_devm_ioremap_resource(pdev,
GK20A_BAR1_IORESOURCE_MEM,
&l->bar1_mem);
if (IS_ERR(l->bar1)) {
nvgpu_err(g, "failed to remap gk20a bar1");
err = PTR_ERR(l->bar1);
goto fail;
}
err = nvgpu_init_sim_support_linux(g, pdev);
if (err)
goto fail;
err = nvgpu_init_sim_support(g);
if (err)
goto fail_sim;
nvgpu_init_usermode_support(g);
return 0;
fail_sim:
nvgpu_remove_sim_support_linux(g);
fail:
if (l->regs)
l->regs = NULL;
if (l->bar1)
l->bar1 = NULL;
return err;
}
static int gk20a_pm_railgate(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
int ret = 0;
struct gk20a *g = get_gk20a(dev);
/* return early if platform didn't implement railgate */
if (!platform->railgate)
return 0;
/* if platform is already railgated, then just return */
if (platform->is_railgated && platform->is_railgated(dev))
return ret;
#ifdef CONFIG_DEBUG_FS
g->pstats.last_rail_gate_start = jiffies;
if (g->pstats.railgating_cycle_count >= 1)
g->pstats.total_rail_ungate_time_ms =
g->pstats.total_rail_ungate_time_ms +
jiffies_to_msecs(g->pstats.last_rail_gate_start -
g->pstats.last_rail_ungate_complete);
#endif
ret = platform->railgate(dev);
if (ret) {
nvgpu_err(g, "failed to railgate platform, err=%d", ret);
return ret;
}
#ifdef CONFIG_DEBUG_FS
g->pstats.last_rail_gate_complete = jiffies;
#endif
ret = tegra_fuse_clock_disable();
if (ret)
nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret);
return ret;
}
static int gk20a_pm_unrailgate(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
int ret = 0;
struct gk20a *g = get_gk20a(dev);
/* return early if platform didn't implement unrailgate */
if (!platform->unrailgate)
return 0;
ret = tegra_fuse_clock_enable();
if (ret) {
nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret);
return ret;
}
#ifdef CONFIG_DEBUG_FS
g->pstats.last_rail_ungate_start = jiffies;
if (g->pstats.railgating_cycle_count >= 1)
g->pstats.total_rail_gate_time_ms =
g->pstats.total_rail_gate_time_ms +
jiffies_to_msecs(g->pstats.last_rail_ungate_start -
g->pstats.last_rail_gate_complete);
g->pstats.railgating_cycle_count++;
#endif
trace_gk20a_pm_unrailgate(dev_name(dev));
nvgpu_mutex_acquire(&platform->railgate_lock);
ret = platform->unrailgate(dev);
nvgpu_mutex_release(&platform->railgate_lock);
#ifdef CONFIG_DEBUG_FS
g->pstats.last_rail_ungate_complete = jiffies;
#endif
return ret;
}
/*
* Remove association of the driver with OS interrupt handler
*/
void nvgpu_free_irq(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
devm_free_irq(dev, g->irq_stall, g);
if (g->irq_stall != g->irq_nonstall)
devm_free_irq(dev, g->irq_nonstall, g);
}
/*
* Idle the GPU in preparation of shutdown/remove.
* gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
* state to prevent further activity on the driver SW side.
* On driver removal quiesce() should be called after start_unload()
*/
int nvgpu_quiesce(struct gk20a *g)
{
int err;
struct device *dev = dev_from_gk20a(g);
if (g->power_on) {
err = gk20a_wait_for_idle(g);
if (err) {
nvgpu_err(g, "failed to idle GPU, err=%d", err);
return err;
}
err = gk20a_fifo_disable_all_engine_activity(g, true);
if (err) {
nvgpu_err(g,
"failed to disable engine activity, err=%d",
err);
return err;
}
err = gk20a_fifo_wait_engine_idle(g);
if (err) {
nvgpu_err(g, "failed to idle engines, err=%d",
err);
return err;
}
}
if (gk20a_gpu_is_virtual(dev))
err = vgpu_pm_prepare_poweroff(dev);
else
err = gk20a_pm_prepare_poweroff(dev);
if (err)
nvgpu_err(g, "failed to prepare for poweroff, err=%d",
err);
return err;
}
static void gk20a_pm_shutdown(struct platform_device *pdev)
{
struct gk20a_platform *platform = platform_get_drvdata(pdev);
struct gk20a *g = platform->g;
int err;
nvgpu_info(g, "shutting down");
/* vgpu has nothing to clean up currently */
if (gk20a_gpu_is_virtual(&pdev->dev))
return;
if (!g->power_on)
goto finish;
gk20a_driver_start_unload(g);
/* If GPU is already railgated,
* just prevent more requests, and return */
if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
__pm_runtime_disable(&pdev->dev, false);
nvgpu_info(g, "already railgated, shut down complete");
return;
}
/* Prevent more requests by disabling Runtime PM */
__pm_runtime_disable(&pdev->dev, false);
err = nvgpu_quiesce(g);
if (err)
goto finish;
err = gk20a_pm_railgate(&pdev->dev);
if (err)
nvgpu_err(g, "failed to railgate, err=%d", err);
finish:
nvgpu_info(g, "shut down complete");
}
#ifdef CONFIG_PM
static int gk20a_pm_runtime_resume(struct device *dev)
{
int err = 0;
err = gk20a_pm_unrailgate(dev);
if (err)
goto fail;
if (gk20a_gpu_is_virtual(dev))
err = vgpu_pm_finalize_poweron(dev);
else
err = gk20a_pm_finalize_poweron(dev);
if (err)
goto fail_poweron;
return 0;
fail_poweron:
gk20a_pm_railgate(dev);
fail:
return err;
}
static int gk20a_pm_runtime_suspend(struct device *dev)
{
int err = 0;
struct gk20a *g = get_gk20a(dev);
if (!g)
return 0;
if (gk20a_gpu_is_virtual(dev))
err = vgpu_pm_prepare_poweroff(dev);
else
err = gk20a_pm_prepare_poweroff(dev);
if (err) {
nvgpu_err(g, "failed to power off, err=%d", err);
goto fail;
}
err = gk20a_pm_railgate(dev);
if (err)
goto fail;
return 0;
fail:
gk20a_pm_finalize_poweron(dev);
pm_runtime_mark_last_busy(dev);
return err;
}
static int gk20a_pm_suspend(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = get_gk20a(dev);
int ret = 0;
int usage_count;
struct nvgpu_timeout timeout;
if (!g->power_on) {
if (platform->suspend)
ret = platform->suspend(dev);
if (ret)
return ret;
if (!pm_runtime_enabled(dev))
ret = gk20a_pm_railgate(dev);
return ret;
}
nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
NVGPU_TIMER_CPU_TIMER);
/*
* Hold back deterministic submits and changes to deterministic
* channels - this must be outside the power busy locks.
*/
gk20a_channel_deterministic_idle(g);
/* check and wait until GPU is idle (with a timeout) */
do {
nvgpu_usleep_range(1000, 1100);
usage_count = nvgpu_atomic_read(&g->usage_count);
} while (usage_count != 0 && !nvgpu_timeout_expired(&timeout));
if (usage_count != 0) {
nvgpu_err(g, "failed to idle - usage_count %d", usage_count);
ret = -EINVAL;
goto fail_idle;
}
ret = gk20a_pm_runtime_suspend(dev);
if (ret)
goto fail_idle;
if (platform->suspend)
ret = platform->suspend(dev);
if (ret)
goto fail_suspend;
g->suspended = true;
return 0;
fail_suspend:
gk20a_pm_runtime_resume(dev);
fail_idle:
gk20a_channel_deterministic_unidle(g);
return ret;
}
static int gk20a_pm_resume(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = get_gk20a(dev);
int ret = 0;
if (!g->suspended) {
if (platform->resume)
ret = platform->resume(dev);
if (ret)
return ret;
if (!pm_runtime_enabled(dev))
ret = gk20a_pm_unrailgate(dev);
return ret;
}
if (platform->resume)
ret = platform->resume(dev);
if (ret)
return ret;
ret = gk20a_pm_runtime_resume(dev);
if (ret)
return ret;
g->suspended = false;
gk20a_channel_deterministic_unidle(g);
return ret;
}
static const struct dev_pm_ops gk20a_pm_ops = {
.runtime_resume = gk20a_pm_runtime_resume,
.runtime_suspend = gk20a_pm_runtime_suspend,
.resume = gk20a_pm_resume,
.suspend = gk20a_pm_suspend,
};
#endif
static int gk20a_pm_init(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
int err = 0;
nvgpu_log_fn(g, " ");
/*
* Initialise pm runtime. For railgate disable
* case, set autosuspend delay to negative which
* will suspend runtime pm
*/
if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
pm_runtime_set_autosuspend_delay(dev,
g->railgate_delay);
else
pm_runtime_set_autosuspend_delay(dev, -1);
pm_runtime_use_autosuspend(dev);
pm_runtime_enable(dev);
return err;
}
static int gk20a_pm_deinit(struct device *dev)
{
pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return 0;
}
/*
* Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING.
*/
void gk20a_driver_start_unload(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
down_write(&l->busy_lock);
__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
/* GR SW ready needs to be invalidated at this time with the busy lock
* held to prevent a racing condition on the gr/mm code */
g->gr.sw_ready = false;
g->sw_ready = false;
up_write(&l->busy_lock);
if (g->is_virtual)
return;
gk20a_wait_for_idle(g);
nvgpu_wait_for_deferred_interrupts(g);
if (l->nonstall_work_queue) {
cancel_work_sync(&l->nonstall_fn_work);
destroy_workqueue(l->nonstall_work_queue);
l->nonstall_work_queue = NULL;
}
}
static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
{
gk20a_get_platform(&pdev->dev)->g = gk20a;
}
static int nvgpu_read_fuse_overrides(struct gk20a *g)
{
struct device_node *np = nvgpu_get_node(g);
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
u32 *fuses;
int count, i;
if (!np) /* may be pcie device */
return 0;
count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
if (count <= 0)
return count;
fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2);
if (!fuses)
return -ENOMEM;
of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
for (i = 0; i < count; i++) {
u32 fuse, value;
fuse = fuses[2 * i];
value = fuses[2 * i + 1];
switch (fuse) {
case GM20B_FUSE_OPT_TPC_DISABLE:
g->tpc_fs_mask_user = ~value;
break;
case GP10B_FUSE_OPT_ECC_EN:
g->gr.fecs_feature_override_ecc_val = value;
break;
case GV11B_FUSE_OPT_TPC_DISABLE:
if (platform->set_tpc_pg_mask != NULL)
platform->set_tpc_pg_mask(dev_from_gk20a(g),
value);
break;
default:
nvgpu_err(g, "ignore unknown fuse override %08x", fuse);
break;
}
}
nvgpu_kfree(g, fuses);
return 0;
}
static int gk20a_probe(struct platform_device *dev)
{
struct nvgpu_os_linux *l = NULL;
struct gk20a *gk20a;
int err;
struct gk20a_platform *platform = NULL;
struct device_node *np;
if (dev->dev.of_node) {
const struct of_device_id *match;
match = of_match_device(tegra_gk20a_of_match, &dev->dev);
if (match)
platform = (struct gk20a_platform *)match->data;
} else
platform = (struct gk20a_platform *)dev->dev.platform_data;
if (!platform) {
dev_err(&dev->dev, "no platform data\n");
return -ENODATA;
}
platform_set_drvdata(dev, platform);
if (gk20a_gpu_is_virtual(&dev->dev))
return vgpu_probe(dev);
l = kzalloc(sizeof(*l), GFP_KERNEL);
if (!l) {
dev_err(&dev->dev, "couldn't allocate gk20a support");
return -ENOMEM;
}
hash_init(l->ecc_sysfs_stats_htable);
gk20a = &l->g;
nvgpu_log_fn(gk20a, " ");
nvgpu_init_gk20a(gk20a);
set_gk20a(dev, gk20a);
l->dev = &dev->dev;
gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;
nvgpu_kmem_init(gk20a);
err = nvgpu_init_enabled_flags(gk20a);
if (err)
goto return_err;
np = nvgpu_get_node(gk20a);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
}
if (nvgpu_platform_is_simulation(gk20a))
__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
gk20a->irq_stall = platform_get_irq(dev, 0);
gk20a->irq_nonstall = platform_get_irq(dev, 1);
if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) {
err = -ENXIO;
goto return_err;
}
err = devm_request_threaded_irq(&dev->dev,
gk20a->irq_stall,
gk20a_intr_isr_stall,
gk20a_intr_thread_stall,
0, "gk20a_stall", gk20a);
if (err) {
dev_err(&dev->dev,
"failed to request stall intr irq @ %d\n",
gk20a->irq_stall);
goto return_err;
}
err = devm_request_irq(&dev->dev,
gk20a->irq_nonstall,
gk20a_intr_isr_nonstall,
0, "gk20a_nonstall", gk20a);
if (err) {
dev_err(&dev->dev,
"failed to request non-stall intr irq @ %d\n",
gk20a->irq_nonstall);
goto return_err;
}
disable_irq(gk20a->irq_stall);
if (gk20a->irq_stall != gk20a->irq_nonstall)
disable_irq(gk20a->irq_nonstall);
err = gk20a_init_support(dev);
if (err)
goto return_err;
err = nvgpu_read_fuse_overrides(gk20a);
#ifdef CONFIG_RESET_CONTROLLER
platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
if (IS_ERR(platform->reset_control))
platform->reset_control = NULL;
#endif
err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
if (err)
goto return_err;
err = gk20a_pm_init(&dev->dev);
if (err) {
dev_err(&dev->dev, "pm init failed");
goto return_err;
}
#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
nvgpu_init_ecc_reporting(gk20a);
#endif
gk20a->nvgpu_reboot_nb.notifier_call =
nvgpu_kernel_shutdown_notification;
err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);
if (err)
goto return_err;
return 0;
return_err:
nvgpu_free_enabled_flags(gk20a);
/*
* Last since the above allocs may use data structures in here.
*/
nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP);
kfree(l);
return err;
}
int nvgpu_remove(struct device *dev, struct class *class)
{
struct gk20a *g = get_gk20a(dev);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
#endif
struct gk20a_platform *platform = gk20a_get_platform(dev);
int err;
nvgpu_log_fn(g, " ");
err = nvgpu_quiesce(g);
WARN(err, "gpu failed to idle during driver removal");
if (nvgpu_mem_is_valid(&g->syncpt_mem))
nvgpu_dma_free(g, &g->syncpt_mem);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
if (platform->has_cde)
gk20a_cde_destroy(l);
#endif
#ifdef CONFIG_GK20A_CTXSW_TRACE
gk20a_ctxsw_trace_cleanup(g);
#endif
gk20a_sched_ctrl_cleanup(g);
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
gk20a_scale_exit(dev);
nvgpu_clk_arb_cleanup_arbiter(g);
gk20a_user_deinit(dev, class);
gk20a_debug_deinit(g);
nvgpu_remove_sysfs(dev);
if (platform->secure_buffer.destroy)
platform->secure_buffer.destroy(g,
&platform->secure_buffer);
if (platform->remove)
platform->remove(dev);
nvgpu_mutex_destroy(&g->clk_arb_enable_lock);
nvgpu_log_fn(g, "removed");
return err;
}
static int __exit gk20a_remove(struct platform_device *pdev)
{
int err;
struct device *dev = &pdev->dev;
struct gk20a *g = get_gk20a(dev);
if (gk20a_gpu_is_virtual(dev))
return vgpu_remove(pdev);
err = nvgpu_remove(dev, &nvgpu_class);
unregister_reboot_notifier(&g->nvgpu_reboot_nb);
set_gk20a(pdev, NULL);
gk20a_put(g);
gk20a_pm_deinit(dev);
return err;
}
static struct platform_driver gk20a_driver = {
.probe = gk20a_probe,
.remove = __exit_p(gk20a_remove),
.shutdown = gk20a_pm_shutdown,
.driver = {
.owner = THIS_MODULE,
.name = "gk20a",
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
#ifdef CONFIG_OF
.of_match_table = tegra_gk20a_of_match,
#endif
#ifdef CONFIG_PM
.pm = &gk20a_pm_ops,
#endif
.suppress_bind_attrs = true,
}
};
struct class nvgpu_class = {
.owner = THIS_MODULE,
.name = CLASS_NAME,
};
static int __init gk20a_init(void)
{
int ret;
ret = class_register(&nvgpu_class);
if (ret)
return ret;
ret = nvgpu_pci_init();
if (ret)
return ret;
return platform_driver_register(&gk20a_driver);
}
static void __exit gk20a_exit(void)
{
nvgpu_pci_exit();
platform_driver_unregister(&gk20a_driver);
class_unregister(&nvgpu_class);
}
MODULE_LICENSE("GPL v2");
module_init(gk20a_init);
module_exit(gk20a_exit);