From a9785995d5f22aaeb659285f8aeb64d8b56982e0 Mon Sep 17 00:00:00 2001 From: Arto Merilainen Date: Wed, 19 Mar 2014 09:38:25 +0200 Subject: gpu: nvgpu: Add NVIDIA GPU Driver This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/Kconfig | 60 + drivers/gpu/nvgpu/gk20a/Makefile | 36 + drivers/gpu/nvgpu/gk20a/as_gk20a.c | 293 + drivers/gpu/nvgpu/gk20a/as_gk20a.h | 50 + drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2111 ++++++ drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 172 + drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 356 ++ drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 102 + drivers/gpu/nvgpu/gk20a/clk_gk20a.c | 865 +++ drivers/gpu/nvgpu/gk20a/clk_gk20a.h | 94 + drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 240 + drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h | 28 + drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 699 ++ drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | 83 + drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 295 + drivers/gpu/nvgpu/gk20a/debug_gk20a.h | 25 + drivers/gpu/nvgpu/gk20a/fb_gk20a.c | 37 + drivers/gpu/nvgpu/gk20a/fb_gk20a.h | 21 + drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 1836 ++++++ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 164 + drivers/gpu/nvgpu/gk20a/gk20a.c | 1681 +++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 559 ++ drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1247 ++++ drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 177 + drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c | 374 ++ drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h | 39 + drivers/gpu/nvgpu/gk20a/gk20a_scale.c | 358 ++ drivers/gpu/nvgpu/gk20a/gk20a_scale.h | 51 + drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 335 + drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | 333 + drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | 149 + drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c | 256 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 6747 ++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 406 ++ drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 179 + drivers/gpu/nvgpu/gk20a/hal.c | 33 + drivers/gpu/nvgpu/gk20a/hal.h | 25 + drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 50 + drivers/gpu/nvgpu/gk20a/hal_gk20a.h | 28 + drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | 105 + drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h | 113 + drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h | 85 + drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | 245 + drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h | 213 + drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h | 565 ++ drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h | 141 + drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h | 1141 ++++ drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h | 3173 +++++++++ drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | 221 + drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h | 253 + drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h | 469 ++ drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h | 137 + .../gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h | 226 + .../gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h | 226 + .../gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h | 69 + drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | 141 + drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h | 737 +++ drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h | 389 ++ drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h | 2150 +++++++ drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h | 225 + drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h | 101 + drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h | 137 + drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h | 301 + drivers/gpu/nvgpu/gk20a/kind_gk20a.c | 424 ++ drivers/gpu/nvgpu/gk20a/kind_gk20a.h | 67 + drivers/gpu/nvgpu/gk20a/ltc_common.c | 243 + drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 203 + drivers/gpu/nvgpu/gk20a/ltc_gk20a.h | 21 + drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 2984 +++++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 464 ++ drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 160 + drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c | 35 + drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 561 ++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 3796 +++++++++++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 1097 ++++ drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c | 91 + drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h | 27 + drivers/gpu/nvgpu/gk20a/regops_gk20a.c | 704 ++ drivers/gpu/nvgpu/gk20a/regops_gk20a.h | 47 + drivers/gpu/nvgpu/gk20a/sim_gk20a.h | 62 + drivers/gpu/nvgpu/gk20a/therm_gk20a.c | 142 + drivers/gpu/nvgpu/gk20a/therm_gk20a.h | 33 + 82 files changed, 43318 insertions(+) create mode 100644 drivers/gpu/nvgpu/Kconfig create mode 100644 drivers/gpu/nvgpu/gk20a/Makefile create mode 100644 drivers/gpu/nvgpu/gk20a/as_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/as_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/channel_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/channel_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/clk_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/clk_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/debug_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/debug_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/fb_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/fb_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator.c create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_scale.c create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_scale.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c create mode 100644 drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c create mode 100644 drivers/gpu/nvgpu/gk20a/gr_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/gr_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hal.c create mode 100644 drivers/gpu/nvgpu/gk20a/hal.h create mode 100644 drivers/gpu/nvgpu/gk20a/hal_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/hal_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/kind_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/kind_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/ltc_common.c create mode 100644 drivers/gpu/nvgpu/gk20a/ltc_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/ltc_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/mm_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/mm_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/platform_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c create mode 100644 drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c create mode 100644 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/pmu_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/regops_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/regops_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/sim_gk20a.h create mode 100644 drivers/gpu/nvgpu/gk20a/therm_gk20a.c create mode 100644 drivers/gpu/nvgpu/gk20a/therm_gk20a.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig new file mode 100644 index 00000000..160ec8be --- /dev/null +++ b/drivers/gpu/nvgpu/Kconfig @@ -0,0 +1,60 @@ +config GK20A + bool "Nvidia GK20A GPU support" + help + Choose this option if you have an SoC with integrated + Nvidia GPU IP. + +config GK20A_DEFAULT_TIMEOUT + depends on GK20A + int "Default timeout for submits" + default 10000 + help + Default timeout for jobs in milliseconds. Set to zero for no timeout. + +config GK20A_PMU + bool "Support GK20A PMU" + depends on GK20A + default n + help + Say Y here to enable GK20A PMU features. + +choice + depends on GK20A + prompt "Enable GK20A frequency scaling" + default GK20A_PERFMON + optional + help + Select this entry to enable gk20a scaling + +config GK20A_PERFMON + bool "Use Perfmon" + help + Select this to enable built-in perfmon scaling. + The built-in scaling option uses simplistic + scaling mechanism (if busy, increase frequency and + decrease frequency if idle). + +config GK20A_DEVFREQ + bool "Use Devfreq" + help + Select this to use devfreq based scaling. + Devfreq is a common framework that allows using + variety of different governors and changing + between governors on the fly. By default, no + governor is selected. + +endchoice + +config GK20A_CYCLE_STATS + bool "Support GK20A GPU CYCLE STATS" + depends on GK20A + default y + help + Say Y here to enable the cycle stats debugging features. + +config GK20A_PHYS_PAGE_TABLES + bool "Use physical addressing for gk20a page tables" + default y if TEGRA_SIMULATION_PLATFORM + help + Use physical addressing for gk20a page tables. If this is off, we + use SMMU translation. diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile new file mode 100644 index 00000000..f9b06b72 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/Makefile @@ -0,0 +1,36 @@ + +GCOV_PROFILE := y +ccflags-y += -Idrivers/devfreq +ccflags-y += -Wno-multichar +ccflags-y += -Werror + +obj-$(CONFIG_GK20A) += \ + gk20a.o \ + as_gk20a.o \ + ctrl_gk20a.o \ + fifo_gk20a.o \ + channel_gk20a.o \ + channel_sync_gk20a.o \ + debug_gk20a.o \ + dbg_gpu_gk20a.o \ + regops_gk20a.o \ + gr_gk20a.o \ + kind_gk20a.o \ + mm_gk20a.o \ + pmu_gk20a.o \ + priv_ring_gk20a.o \ + clk_gk20a.o \ + therm_gk20a.o \ + gr_ctx_gk20a_sim.o \ + gr_ctx_gk20a.o \ + gk20a_gating_reglist.o \ + gk20a_scale.o \ + gk20a_sysfs.o \ + ltc_gk20a.o \ + fb_gk20a.o \ + hal.o \ + hal_gk20a.o \ + gk20a_allocator.o + +obj-$(CONFIG_GK20A) += platform_gk20a_generic.o +obj-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c new file mode 100644 index 00000000..65c26938 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -0,0 +1,293 @@ +/* + * drivers/video/tegra/host/gk20a/as_gk20a.c + * + * GK20A Address Spaces + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include + +#include "gk20a.h" + +/* dumb allocator... */ +static int generate_as_share_id(struct gk20a_as *as) +{ + gk20a_dbg_fn(""); + return ++as->last_share_id; +} +/* still dumb */ +static void release_as_share_id(struct gk20a_as *as, int id) +{ + gk20a_dbg_fn(""); + return; +} + +static int gk20a_as_alloc_share(struct gk20a_as *as, + struct gk20a_as_share **out) +{ + struct gk20a_as_share *as_share; + int err = 0; + + gk20a_dbg_fn(""); + + *out = 0; + as_share = kzalloc(sizeof(*as_share), GFP_KERNEL); + if (!as_share) + return -ENOMEM; + + as_share->as = as; + as_share->id = generate_as_share_id(as_share->as); + as_share->ref_cnt.counter = 1; + + /* this will set as_share->vm. */ + err = gk20a_vm_alloc_share(as_share); + if (err) + goto failed; + + *out = as_share; + return 0; + + failed: + kfree(as_share); + return err; +} + +/* + * channels and the device nodes call this to release. + * once the ref_cnt hits zero the share is deleted. + */ +int gk20a_as_release_share(struct gk20a_as_share *as_share) +{ + int err; + + gk20a_dbg_fn(""); + + if (atomic_dec_return(&as_share->ref_cnt) > 0) + return 0; + + err = gk20a_vm_release_share(as_share); + release_as_share_id(as_share->as, as_share->id); + kfree(as_share); + return err; +} + +static int gk20a_as_ioctl_bind_channel( + struct gk20a_as_share *as_share, + struct nvhost_as_bind_channel_args *args) +{ + int err = 0; + struct channel_gk20a *ch; + + gk20a_dbg_fn(""); + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch || gk20a_channel_as_bound(ch)) + return -EINVAL; + + atomic_inc(&as_share->ref_cnt); + + /* this will set channel_gk20a->vm */ + err = gk20a_vm_bind_channel(as_share, ch); + if (err) { + atomic_dec(&as_share->ref_cnt); + return err; + } + + return err; +} + +static int gk20a_as_ioctl_alloc_space( + struct gk20a_as_share *as_share, + struct nvhost_as_alloc_space_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_alloc_space(as_share, args); +} + +static int gk20a_as_ioctl_free_space( + struct gk20a_as_share *as_share, + struct nvhost_as_free_space_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_free_space(as_share, args); +} + +static int gk20a_as_ioctl_map_buffer_ex( + struct gk20a_as_share *as_share, + struct nvhost_as_map_buffer_ex_args *args) +{ + int i; + + gk20a_dbg_fn(""); + + /* ensure that padding is not set. this is required for ensuring that + * we can safely use these fields later */ + for (i = 0; i < ARRAY_SIZE(args->padding); i++) + if (args->padding[i]) + return -EINVAL; + + return gk20a_vm_map_buffer(as_share, args->dmabuf_fd, + &args->offset, args->flags, + args->kind); +} + +static int gk20a_as_ioctl_map_buffer( + struct gk20a_as_share *as_share, + struct nvhost_as_map_buffer_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_map_buffer(as_share, args->nvmap_handle, + &args->o_a.align, + args->flags, NV_KIND_DEFAULT); + /* args->o_a.offset will be set if !err */ +} + +static int gk20a_as_ioctl_unmap_buffer( + struct gk20a_as_share *as_share, + struct nvhost_as_unmap_buffer_args *args) +{ + gk20a_dbg_fn(""); + return gk20a_vm_unmap_buffer(as_share, args->offset); +} + +int gk20a_as_dev_open(struct inode *inode, struct file *filp) +{ + struct gk20a_as_share *as_share; + struct gk20a *g; + int err; + + gk20a_dbg_fn(""); + + g = container_of(inode->i_cdev, struct gk20a, as.cdev); + + err = gk20a_get_client(g); + if (err) { + gk20a_dbg_fn("fail to get channel!"); + return err; + } + + err = gk20a_as_alloc_share(&g->as, &as_share); + if (err) { + gk20a_dbg_fn("failed to alloc share"); + gk20a_put_client(g); + return err; + } + + filp->private_data = as_share; + return 0; +} + +int gk20a_as_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_as_share *as_share = filp->private_data; + int ret; + struct gk20a *g = gk20a_from_as(as_share->as); + + gk20a_dbg_fn(""); + + ret = gk20a_as_release_share(as_share); + + gk20a_put_client(g); + + return ret; +} + +long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int err = 0; + struct gk20a_as_share *as_share = filp->private_data; + struct gk20a *g = gk20a_from_as(as_share->as); + + u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE]; + + if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST)) + return -EFAULT; + + BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE); + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + err = gk20a_channel_busy(g->dev); + if (err) + return err; + + switch (cmd) { + case NVHOST_AS_IOCTL_BIND_CHANNEL: + trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_bind_channel(as_share, + (struct nvhost_as_bind_channel_args *)buf); + + break; + case NVHOST32_AS_IOCTL_ALLOC_SPACE: + { + struct nvhost32_as_alloc_space_args *args32 = + (struct nvhost32_as_alloc_space_args *)buf; + struct nvhost_as_alloc_space_args args; + + args.pages = args32->pages; + args.page_size = args32->page_size; + args.flags = args32->flags; + args.o_a.offset = args32->o_a.offset; + trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_alloc_space(as_share, &args); + args32->o_a.offset = args.o_a.offset; + break; + } + case NVHOST_AS_IOCTL_ALLOC_SPACE: + trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_alloc_space(as_share, + (struct nvhost_as_alloc_space_args *)buf); + break; + case NVHOST_AS_IOCTL_FREE_SPACE: + trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_free_space(as_share, + (struct nvhost_as_free_space_args *)buf); + break; + case NVHOST_AS_IOCTL_MAP_BUFFER: + trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_map_buffer(as_share, + (struct nvhost_as_map_buffer_args *)buf); + break; + case NVHOST_AS_IOCTL_MAP_BUFFER_EX: + trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_map_buffer_ex(as_share, + (struct nvhost_as_map_buffer_ex_args *)buf); + break; + case NVHOST_AS_IOCTL_UNMAP_BUFFER: + trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g))); + err = gk20a_as_ioctl_unmap_buffer(as_share, + (struct nvhost_as_unmap_buffer_args *)buf); + break; + default: + dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); + err = -ENOTTY; + break; + } + + gk20a_channel_idle(g->dev); + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h new file mode 100644 index 00000000..be0e9707 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h @@ -0,0 +1,50 @@ +/* + * drivers/video/tegra/host/gk20a/as_gk20a.h + * + * GK20A Address Space + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef __GK20A_AS_H +#define __GK20A_AS_H + +#include +#include +#include + +#include + +struct gk20a_as; +struct gk20a_as_share; +struct vm_gk20a; + +struct gk20a_as_share { + struct gk20a_as *as; + atomic_t ref_cnt; + int id; + struct vm_gk20a *vm; +}; + +struct gk20a_as { + int last_share_id; /* dummy allocator for now */ + struct cdev cdev; + struct device *node; +}; + +int gk20a_as_release_share(struct gk20a_as_share *as_share); + +/* struct file_operations driver interface */ +int gk20a_as_dev_open(struct inode *inode, struct file *filp); +int gk20a_as_dev_release(struct inode *inode, struct file *filp); +long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + +#endif diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c new file mode 100644 index 00000000..6056f558 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -0,0 +1,2111 @@ +/* + * drivers/video/tegra/host/gk20a/channel_gk20a.c + * + * GK20A Graphics channel + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include /* need for nvmap.h*/ +#include +#include +#include +#include +#include + +#include "debug_gk20a.h" + +#include "gk20a.h" +#include "dbg_gpu_gk20a.h" + +#include "hw_ram_gk20a.h" +#include "hw_fifo_gk20a.h" +#include "hw_pbdma_gk20a.h" +#include "hw_ccsr_gk20a.h" +#include "hw_ltc_gk20a.h" + +#define NVMAP_HANDLE_PARAM_SIZE 1 + +static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); +static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); + +static void free_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *e); +static void recycle_priv_cmdbuf(struct channel_gk20a *c); + +static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); +static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); + +static int channel_gk20a_commit_userd(struct channel_gk20a *c); +static int channel_gk20a_setup_userd(struct channel_gk20a *c); +static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries); + +static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); +static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); + +static int channel_gk20a_alloc_inst(struct gk20a *g, + struct channel_gk20a *ch); +static void channel_gk20a_free_inst(struct gk20a *g, + struct channel_gk20a *ch); + +static int channel_gk20a_update_runlist(struct channel_gk20a *c, + bool add); +static void gk20a_free_error_notifiers(struct channel_gk20a *ch); + +static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) +{ + struct channel_gk20a *ch = NULL; + int chid; + + mutex_lock(&f->ch_inuse_mutex); + for (chid = 0; chid < f->num_channels; chid++) { + if (!f->channel[chid].in_use) { + f->channel[chid].in_use = true; + ch = &f->channel[chid]; + break; + } + } + mutex_unlock(&f->ch_inuse_mutex); + + return ch; +} + +static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) +{ + mutex_lock(&f->ch_inuse_mutex); + f->channel[c->hw_chid].in_use = false; + mutex_unlock(&f->ch_inuse_mutex); +} + +int channel_gk20a_commit_va(struct channel_gk20a *c) +{ + u64 addr; + u32 addr_lo; + u32 addr_hi; + void *inst_ptr; + + gk20a_dbg_fn(""); + + inst_ptr = c->inst_block.cpuva; + if (!inst_ptr) + return -ENOMEM; + + addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl); + addr_lo = u64_lo32(addr >> 12); + addr_hi = u64_hi32(addr); + + gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x", + (u64)addr, addr_lo, addr_hi); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(addr_hi)); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), + u64_lo32(c->vm->va_limit) | 0xFFF); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), + ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +static int channel_gk20a_commit_userd(struct channel_gk20a *c) +{ + u32 addr_lo; + u32 addr_hi; + void *inst_ptr; + + gk20a_dbg_fn(""); + + inst_ptr = c->inst_block.cpuva; + if (!inst_ptr) + return -ENOMEM; + + addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); + addr_hi = u64_hi32(c->userd_iova); + + gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", + c->hw_chid, (u64)c->userd_iova); + + gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), + pbdma_userd_target_vid_mem_f() | + pbdma_userd_addr_f(addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), + pbdma_userd_target_vid_mem_f() | + pbdma_userd_hi_addr_f(addr_hi)); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, + u32 timeslice_timeout) +{ + void *inst_ptr; + int shift = 3; + int value = timeslice_timeout; + + inst_ptr = c->inst_block.cpuva; + if (!inst_ptr) + return -ENOMEM; + + /* disable channel */ + gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), + gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | + ccsr_channel_enable_clr_true_f()); + + /* preempt the channel */ + WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); + + /* flush GPU cache */ + gk20a_mm_l2_flush(c->g, true); + + /* value field is 8 bits long */ + while (value >= 1 << 8) { + value >>= 1; + shift++; + } + + /* time slice register is only 18bits long */ + if ((value << shift) >= 1<<19) { + pr_err("Requested timeslice value is clamped to 18 bits\n"); + value = 255; + shift = 10; + } + + /* set new timeslice */ + gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(), + value | (shift << 12) | + fifo_eng_timeslice_enable_true_f()); + + /* enable channel */ + gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), + gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | + ccsr_channel_enable_set_true_f()); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +static int channel_gk20a_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries) +{ + void *inst_ptr; + + gk20a_dbg_fn(""); + + inst_ptr = c->inst_block.cpuva; + if (!inst_ptr) + return -ENOMEM; + + memset(inst_ptr, 0, ram_fc_size_val_v()); + + gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), + pbdma_gp_base_offset_f( + u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); + + gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), + pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | + pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); + + gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), + pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), + pbdma_formats_gp_fermi0_f() | + pbdma_formats_pb_fermi1_f() | + pbdma_formats_mp_fermi0_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), + pbdma_pb_header_priv_user_f() | + pbdma_pb_header_method_zero_f() | + pbdma_pb_header_subchannel_zero_f() | + pbdma_pb_header_level_main_f() | + pbdma_pb_header_first_true_f() | + pbdma_pb_header_type_inc_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), + pbdma_subdevice_id_f(1) | + pbdma_subdevice_status_active_f() | + pbdma_subdevice_channel_dma_enable_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), + pbdma_acquire_retry_man_2_f() | + pbdma_acquire_retry_exp_2_f() | + pbdma_acquire_timeout_exp_max_f() | + pbdma_acquire_timeout_man_max_f() | + pbdma_acquire_timeout_en_disable_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(), + fifo_eng_timeslice_timeout_128_f() | + fifo_eng_timeslice_timescale_3_f() | + fifo_eng_timeslice_enable_true_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), + fifo_pb_timeslice_timeout_16_f() | + fifo_pb_timeslice_timescale_0_f() | + fifo_pb_timeslice_enable_true_f()); + + gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); + + /* TBD: alwasy priv mode? */ + gk20a_mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(), + pbdma_hce_ctrl_hce_priv_mode_yes_f()); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +static int channel_gk20a_setup_userd(struct channel_gk20a *c) +{ + BUG_ON(!c->userd_cpu_va); + + gk20a_dbg_fn(""); + + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); + gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) +{ + struct gk20a *g = ch_gk20a->g; + struct fifo_gk20a *f = &g->fifo; + struct fifo_engine_info_gk20a *engine_info = + f->engine_info + ENGINE_GR_GK20A; + + u32 inst_ptr = ch_gk20a->inst_block.cpu_pa + >> ram_in_base_shift_v(); + + gk20a_dbg_info("bind channel %d inst ptr 0x%08x", + ch_gk20a->hw_chid, inst_ptr); + + ch_gk20a->bound = true; + + gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), + (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & + ~ccsr_channel_runlist_f(~0)) | + ccsr_channel_runlist_f(engine_info->runlist_id)); + + gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), + ccsr_channel_inst_ptr_f(inst_ptr) | + ccsr_channel_inst_target_vid_mem_f() | + ccsr_channel_inst_bind_true_f()); + + gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), + (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & + ~ccsr_channel_enable_set_f(~0)) | + ccsr_channel_enable_set_true_f()); +} + +static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) +{ + struct gk20a *g = ch_gk20a->g; + + gk20a_dbg_fn(""); + + if (ch_gk20a->bound) + gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), + ccsr_channel_inst_ptr_f(0) | + ccsr_channel_inst_bind_false_f()); + + ch_gk20a->bound = false; +} + +static int channel_gk20a_alloc_inst(struct gk20a *g, + struct channel_gk20a *ch) +{ + struct device *d = dev_from_gk20a(g); + int err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + ch->inst_block.size = ram_in_alloc_size_v(); + ch->inst_block.cpuva = dma_alloc_coherent(d, + ch->inst_block.size, + &iova, + GFP_KERNEL); + if (!ch->inst_block.cpuva) { + gk20a_err(d, "%s: memory allocation failed\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + ch->inst_block.iova = iova; + ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d, + ch->inst_block.iova); + if (!ch->inst_block.cpu_pa) { + gk20a_err(d, "%s: failed to get physical address\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", + ch->hw_chid, (u64)ch->inst_block.cpu_pa); + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_err(d, "fail"); + channel_gk20a_free_inst(g, ch); + return err; +} + +static void channel_gk20a_free_inst(struct gk20a *g, + struct channel_gk20a *ch) +{ + struct device *d = dev_from_gk20a(g); + + if (ch->inst_block.cpuva) + dma_free_coherent(d, ch->inst_block.size, + ch->inst_block.cpuva, ch->inst_block.iova); + ch->inst_block.cpuva = NULL; + ch->inst_block.iova = 0; + memset(&ch->inst_block, 0, sizeof(struct inst_desc)); +} + +static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) +{ + return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true); +} + +void gk20a_disable_channel_no_update(struct channel_gk20a *ch) +{ + /* ensure no fences are pending */ + if (ch->sync) + ch->sync->set_min_eq_max(ch->sync); + + /* disable channel */ + gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), + gk20a_readl(ch->g, + ccsr_channel_r(ch->hw_chid)) | + ccsr_channel_enable_clr_true_f()); +} + +static int gk20a_wait_channel_idle(struct channel_gk20a *ch) +{ + bool channel_idle = false; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); + + do { + mutex_lock(&ch->jobs_lock); + channel_idle = list_empty(&ch->jobs); + mutex_unlock(&ch->jobs_lock); + if (channel_idle) + break; + + usleep_range(1000, 3000); + } while (time_before(jiffies, end_jiffies) + || !tegra_platform_is_silicon()); + + if (!channel_idle) + gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed"); + + return 0; +} + +void gk20a_disable_channel(struct channel_gk20a *ch, + bool finish, + unsigned long finish_timeout) +{ + if (finish) { + int err = gk20a_channel_finish(ch, finish_timeout); + WARN_ON(err); + } + + /* disable the channel from hw and increment syncpoints */ + gk20a_disable_channel_no_update(ch); + + gk20a_wait_channel_idle(ch); + + /* preempt the channel */ + gk20a_fifo_preempt_channel(ch->g, ch->hw_chid); + + /* remove channel from runlist */ + channel_gk20a_update_runlist(ch, false); +} + +#if defined(CONFIG_GK20A_CYCLE_STATS) + +static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch) +{ + /* disable existing cyclestats buffer */ + mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex); + if (ch->cyclestate.cyclestate_buffer_handler) { + dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler, + ch->cyclestate.cyclestate_buffer); + dma_buf_put(ch->cyclestate.cyclestate_buffer_handler); + ch->cyclestate.cyclestate_buffer_handler = NULL; + ch->cyclestate.cyclestate_buffer = NULL; + ch->cyclestate.cyclestate_buffer_size = 0; + } + mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex); +} + +static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, + struct nvhost_cycle_stats_args *args) +{ + struct dma_buf *dmabuf; + void *virtual_address; + + if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) { + + /* set up new cyclestats buffer */ + dmabuf = dma_buf_get(args->nvmap_handle); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + virtual_address = dma_buf_vmap(dmabuf); + if (!virtual_address) + return -ENOMEM; + + ch->cyclestate.cyclestate_buffer_handler = dmabuf; + ch->cyclestate.cyclestate_buffer = virtual_address; + ch->cyclestate.cyclestate_buffer_size = dmabuf->size; + return 0; + + } else if (!args->nvmap_handle && + ch->cyclestate.cyclestate_buffer_handler) { + gk20a_free_cycle_stats_buffer(ch); + return 0; + + } else if (!args->nvmap_handle && + !ch->cyclestate.cyclestate_buffer_handler) { + /* no requst from GL */ + return 0; + + } else { + pr_err("channel already has cyclestats buffer\n"); + return -EINVAL; + } +} +#endif + +static int gk20a_init_error_notifier(struct channel_gk20a *ch, + struct nvhost_set_error_notifier *args) { + void *va; + + struct dma_buf *dmabuf; + + if (!args->mem) { + pr_err("gk20a_init_error_notifier: invalid memory handle\n"); + return -EINVAL; + } + + dmabuf = dma_buf_get(args->mem); + + if (ch->error_notifier_ref) + gk20a_free_error_notifiers(ch); + + if (IS_ERR(dmabuf)) { + pr_err("Invalid handle: %d\n", args->mem); + return -EINVAL; + } + /* map handle */ + va = dma_buf_vmap(dmabuf); + if (!va) { + dma_buf_put(dmabuf); + pr_err("Cannot map notifier handle\n"); + return -ENOMEM; + } + + /* set channel notifiers pointer */ + ch->error_notifier_ref = dmabuf; + ch->error_notifier = va + args->offset; + ch->error_notifier_va = va; + memset(ch->error_notifier, 0, sizeof(struct nvhost_notification)); + return 0; +} + +void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) +{ + if (ch->error_notifier_ref) { + struct timespec time_data; + u64 nsec; + getnstimeofday(&time_data); + nsec = ((u64)time_data.tv_sec) * 1000000000u + + (u64)time_data.tv_nsec; + ch->error_notifier->time_stamp.nanoseconds[0] = + (u32)nsec; + ch->error_notifier->time_stamp.nanoseconds[1] = + (u32)(nsec >> 32); + ch->error_notifier->info32 = error; + ch->error_notifier->status = 0xffff; + gk20a_err(dev_from_gk20a(ch->g), + "error notifier set to %d\n", error); + } +} + +static void gk20a_free_error_notifiers(struct channel_gk20a *ch) +{ + if (ch->error_notifier_ref) { + dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); + dma_buf_put(ch->error_notifier_ref); + ch->error_notifier_ref = 0; + ch->error_notifier = 0; + ch->error_notifier_va = 0; + } +} + +void gk20a_free_channel(struct channel_gk20a *ch, bool finish) +{ + struct gk20a *g = ch->g; + struct device *d = dev_from_gk20a(g); + struct fifo_gk20a *f = &g->fifo; + struct gr_gk20a *gr = &g->gr; + struct vm_gk20a *ch_vm = ch->vm; + unsigned long timeout = gk20a_get_gr_idle_timeout(g); + struct dbg_session_gk20a *dbg_s; + + gk20a_dbg_fn(""); + + /* if engine reset was deferred, perform it now */ + mutex_lock(&f->deferred_reset_mutex); + if (g->fifo.deferred_reset_pending) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" + " deferred, running now"); + fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines); + g->fifo.mmu_fault_engines = 0; + g->fifo.deferred_reset_pending = false; + } + mutex_unlock(&f->deferred_reset_mutex); + + if (!ch->bound) + return; + + if (!gk20a_channel_as_bound(ch)) + goto unbind; + + gk20a_dbg_info("freeing bound channel context, timeout=%ld", + timeout); + + gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); + + gk20a_free_error_notifiers(ch); + + /* release channel ctx */ + gk20a_free_channel_ctx(ch); + + gk20a_gr_flush_channel_tlb(gr); + + memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); + + /* free gpfifo */ + if (ch->gpfifo.gpu_va) + gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va, + ch->gpfifo.size, gk20a_mem_flag_none); + if (ch->gpfifo.cpu_va) + dma_free_coherent(d, ch->gpfifo.size, + ch->gpfifo.cpu_va, ch->gpfifo.iova); + ch->gpfifo.cpu_va = NULL; + ch->gpfifo.iova = 0; + + gk20a_mm_l2_invalidate(ch->g); + + memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); + +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_free_cycle_stats_buffer(ch); +#endif + + channel_gk20a_free_priv_cmdbuf(ch); + + if (ch->sync) { + ch->sync->destroy(ch->sync); + ch->sync = NULL; + } + + /* release channel binding to the as_share */ + gk20a_as_release_share(ch_vm->as_share); + +unbind: + channel_gk20a_unbind(ch); + channel_gk20a_free_inst(g, ch); + + ch->vpr = false; + ch->vm = NULL; + WARN_ON(ch->sync); + + /* unlink all debug sessions */ + mutex_lock(&ch->dbg_s_lock); + + list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { + dbg_s->ch = NULL; + list_del_init(&dbg_s->dbg_s_list_node); + } + + mutex_unlock(&ch->dbg_s_lock); + + /* ALWAYS last */ + release_used_channel(f, ch); +} + +int gk20a_channel_release(struct inode *inode, struct file *filp) +{ + struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data; + struct gk20a *g = ch->g; + + trace_gk20a_channel_release(dev_name(&g->dev->dev)); + + gk20a_channel_busy(ch->g->dev); + gk20a_free_channel(ch, true); + gk20a_channel_idle(ch->g->dev); + + gk20a_put_client(g); + filp->private_data = NULL; + return 0; +} + +static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch; + + ch = acquire_unused_channel(f); + if (ch == NULL) { + /* TBD: we want to make this virtualizable */ + gk20a_err(dev_from_gk20a(g), "out of hw chids"); + return 0; + } + + ch->g = g; + + if (channel_gk20a_alloc_inst(g, ch)) { + ch->in_use = false; + gk20a_err(dev_from_gk20a(g), + "failed to open gk20a channel, out of inst mem"); + + return 0; + } + g->ops.fifo.bind_channel(ch); + ch->pid = current->pid; + + /* reset timeout counter and update timestamp */ + ch->timeout_accumulated_ms = 0; + ch->timeout_gpfifo_get = 0; + /* set gr host default timeout */ + ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); + ch->timeout_debug_dump = true; + ch->has_timedout = false; + + /* The channel is *not* runnable at this point. It still needs to have + * an address space bound and allocate a gpfifo and grctx. */ + + init_waitqueue_head(&ch->notifier_wq); + init_waitqueue_head(&ch->semaphore_wq); + init_waitqueue_head(&ch->submit_wq); + + return ch; +} + +static int __gk20a_channel_open(struct gk20a *g, struct file *filp) +{ + int err; + struct channel_gk20a *ch; + + trace_gk20a_channel_open(dev_name(&g->dev->dev)); + + err = gk20a_get_client(g); + if (err) { + gk20a_err(dev_from_gk20a(g), + "failed to get client ref"); + return err; + } + + err = gk20a_channel_busy(g->dev); + if (err) { + gk20a_put_client(g); + gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err); + return err; + } + ch = gk20a_open_new_channel(g); + gk20a_channel_idle(g->dev); + if (!ch) { + gk20a_put_client(g); + gk20a_err(dev_from_gk20a(g), + "failed to get f"); + return -ENOMEM; + } + + filp->private_data = ch; + return 0; +} + +int gk20a_channel_open(struct inode *inode, struct file *filp) +{ + struct gk20a *g = container_of(inode->i_cdev, + struct gk20a, channel.cdev); + return __gk20a_channel_open(g, filp); +} + +/* allocate private cmd buffer. + used for inserting commands before/after user submitted buffers. */ +static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) +{ + struct device *d = dev_from_gk20a(c->g); + struct vm_gk20a *ch_vm = c->vm; + struct priv_cmd_queue *q = &c->priv_cmd_q; + struct priv_cmd_entry *e; + u32 i = 0, size; + int err = 0; + struct sg_table *sgt; + dma_addr_t iova; + + /* Kernel can insert gpfifos before and after user gpfifos. + Before user gpfifos, kernel inserts fence_wait, which takes + syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords. + After user gpfifos, kernel inserts fence_get, which takes + wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords) + = 6 dwords. + Worse case if kernel adds both of them for every user gpfifo, + max size of priv_cmdbuf is : + (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */ + size = roundup_pow_of_two( + c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3); + + q->mem.base_cpuva = dma_alloc_coherent(d, size, + &iova, + GFP_KERNEL); + if (!q->mem.base_cpuva) { + gk20a_err(d, "%s: memory allocation failed\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + q->mem.base_iova = iova; + q->mem.size = size; + + err = gk20a_get_sgtable(d, &sgt, + q->mem.base_cpuva, q->mem.base_iova, size); + if (err) { + gk20a_err(d, "%s: failed to create sg table\n", __func__); + goto clean_up; + } + + memset(q->mem.base_cpuva, 0, size); + + q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt, + size, + 0, /* flags */ + gk20a_mem_flag_none); + if (!q->base_gpuva) { + gk20a_err(d, "ch %d : failed to map gpu va" + "for priv cmd buffer", c->hw_chid); + err = -ENOMEM; + goto clean_up_sgt; + } + + q->size = q->mem.size / sizeof (u32); + + INIT_LIST_HEAD(&q->head); + INIT_LIST_HEAD(&q->free); + + /* pre-alloc 25% of priv cmdbuf entries and put them on free list */ + for (i = 0; i < q->size / 4; i++) { + e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); + if (!e) { + gk20a_err(d, "ch %d: fail to pre-alloc cmd entry", + c->hw_chid); + err = -ENOMEM; + goto clean_up_sgt; + } + e->pre_alloc = true; + list_add(&e->list, &q->free); + } + + gk20a_free_sgtable(&sgt); + + return 0; + +clean_up_sgt: + gk20a_free_sgtable(&sgt); +clean_up: + channel_gk20a_free_priv_cmdbuf(c); + return err; +} + +static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) +{ + struct device *d = dev_from_gk20a(c->g); + struct vm_gk20a *ch_vm = c->vm; + struct priv_cmd_queue *q = &c->priv_cmd_q; + struct priv_cmd_entry *e; + struct list_head *pos, *tmp, *head; + + if (q->size == 0) + return; + + if (q->base_gpuva) + gk20a_gmmu_unmap(ch_vm, q->base_gpuva, + q->mem.size, gk20a_mem_flag_none); + if (q->mem.base_cpuva) + dma_free_coherent(d, q->mem.size, + q->mem.base_cpuva, q->mem.base_iova); + q->mem.base_cpuva = NULL; + q->mem.base_iova = 0; + + /* free used list */ + head = &q->head; + list_for_each_safe(pos, tmp, head) { + e = container_of(pos, struct priv_cmd_entry, list); + free_priv_cmdbuf(c, e); + } + + /* free free list */ + head = &q->free; + list_for_each_safe(pos, tmp, head) { + e = container_of(pos, struct priv_cmd_entry, list); + e->pre_alloc = false; + free_priv_cmdbuf(c, e); + } + + memset(q, 0, sizeof(struct priv_cmd_queue)); +} + +/* allocate a cmd buffer with given size. size is number of u32 entries */ +int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, + struct priv_cmd_entry **entry) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + struct priv_cmd_entry *e; + struct list_head *node; + u32 free_count; + u32 size = orig_size; + bool no_retry = false; + + gk20a_dbg_fn("size %d", orig_size); + + *entry = NULL; + + /* if free space in the end is less than requested, increase the size + * to make the real allocated space start from beginning. */ + if (q->put + size > q->size) + size = orig_size + (q->size - q->put); + + gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", + c->hw_chid, q->get, q->put); + +TRY_AGAIN: + free_count = (q->size - (q->put - q->get) - 1) % q->size; + + if (size > free_count) { + if (!no_retry) { + recycle_priv_cmdbuf(c); + no_retry = true; + goto TRY_AGAIN; + } else + return -EAGAIN; + } + + if (unlikely(list_empty(&q->free))) { + + gk20a_dbg_info("ch %d: run out of pre-alloc entries", + c->hw_chid); + + e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); + if (!e) { + gk20a_err(dev_from_gk20a(c->g), + "ch %d: fail to allocate priv cmd entry", + c->hw_chid); + return -ENOMEM; + } + } else { + node = q->free.next; + list_del(node); + e = container_of(node, struct priv_cmd_entry, list); + } + + e->size = orig_size; + e->gp_get = c->gpfifo.get; + e->gp_put = c->gpfifo.put; + e->gp_wrap = c->gpfifo.wrap; + + /* if we have increased size to skip free space in the end, set put + to beginning of cmd buffer (0) + size */ + if (size != orig_size) { + e->ptr = q->mem.base_cpuva; + e->gva = q->base_gpuva; + q->put = orig_size; + } else { + e->ptr = q->mem.base_cpuva + q->put; + e->gva = q->base_gpuva + q->put * sizeof(u32); + q->put = (q->put + orig_size) & (q->size - 1); + } + + /* we already handled q->put + size > q->size so BUG_ON this */ + BUG_ON(q->put > q->size); + + /* add new entry to head since we free from head */ + list_add(&e->list, &q->head); + + *entry = e; + + gk20a_dbg_fn("done"); + + return 0; +} + +/* Don't call this to free an explict cmd entry. + * It doesn't update priv_cmd_queue get/put */ +static void free_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + + if (!e) + return; + + list_del(&e->list); + + if (unlikely(!e->pre_alloc)) + kfree(e); + else { + memset(e, 0, sizeof(struct priv_cmd_entry)); + e->pre_alloc = true; + list_add(&e->list, &q->free); + } +} + +/* free entries if they're no longer being used */ +static void recycle_priv_cmdbuf(struct channel_gk20a *c) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + struct priv_cmd_entry *e, *tmp; + struct list_head *head = &q->head; + bool wrap_around, found = false; + + gk20a_dbg_fn(""); + + /* Find the most recent free entry. Free it and everything before it */ + list_for_each_entry(e, head, list) { + + gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d " + "curr get:put:wrap %d:%d:%d", + c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap, + c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap); + + wrap_around = (c->gpfifo.wrap != e->gp_wrap); + if (e->gp_get < e->gp_put) { + if (c->gpfifo.get >= e->gp_put || + wrap_around) { + found = true; + break; + } else + e->gp_get = c->gpfifo.get; + } else if (e->gp_get > e->gp_put) { + if (wrap_around && + c->gpfifo.get >= e->gp_put) { + found = true; + break; + } else + e->gp_get = c->gpfifo.get; + } + } + + if (found) + q->get = (e->ptr - q->mem.base_cpuva) + e->size; + else { + gk20a_dbg_info("no free entry recycled"); + return; + } + + list_for_each_entry_safe_continue(e, tmp, head, list) { + free_priv_cmdbuf(c, e); + } + + gk20a_dbg_fn("done"); +} + + +static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, + struct nvhost_alloc_gpfifo_args *args) +{ + struct gk20a *g = c->g; + struct device *d = dev_from_gk20a(g); + struct vm_gk20a *ch_vm; + u32 gpfifo_size; + int err = 0; + struct sg_table *sgt; + dma_addr_t iova; + + /* Kernel can insert one extra gpfifo entry before user submitted gpfifos + and another one after, for internal usage. Triple the requested size. */ + gpfifo_size = roundup_pow_of_two(args->num_entries * 3); + + if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED) + c->vpr = true; + + /* an address space needs to have been bound at this point. */ + if (!gk20a_channel_as_bound(c)) { + gk20a_err(d, + "not bound to an address space at time of gpfifo" + " allocation. Attempting to create and bind to" + " one..."); + return -EINVAL; + } + ch_vm = c->vm; + + c->cmds_pending = false; + c->last_submit_fence.valid = false; + + c->ramfc.offset = 0; + c->ramfc.size = ram_in_ramfc_s() / 8; + + if (c->gpfifo.cpu_va) { + gk20a_err(d, "channel %d :" + "gpfifo already allocated", c->hw_chid); + return -EEXIST; + } + + c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo); + c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d, + c->gpfifo.size, + &iova, + GFP_KERNEL); + if (!c->gpfifo.cpu_va) { + gk20a_err(d, "%s: memory allocation failed\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + c->gpfifo.iova = iova; + c->gpfifo.entry_num = gpfifo_size; + + c->gpfifo.get = c->gpfifo.put = 0; + + err = gk20a_get_sgtable(d, &sgt, + c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size); + if (err) { + gk20a_err(d, "%s: failed to allocate sg table\n", __func__); + goto clean_up; + } + + c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm, + &sgt, + c->gpfifo.size, + 0, /* flags */ + gk20a_mem_flag_none); + if (!c->gpfifo.gpu_va) { + gk20a_err(d, "channel %d : failed to map" + " gpu_va for gpfifo", c->hw_chid); + err = -ENOMEM; + goto clean_up_sgt; + } + + gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", + c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num); + + channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num); + + channel_gk20a_setup_userd(c); + channel_gk20a_commit_userd(c); + + gk20a_mm_l2_invalidate(c->g); + + /* TBD: setup engine contexts */ + + err = channel_gk20a_alloc_priv_cmdbuf(c); + if (err) + goto clean_up_unmap; + + err = channel_gk20a_update_runlist(c, true); + if (err) + goto clean_up_unmap; + + gk20a_free_sgtable(&sgt); + + gk20a_dbg_fn("done"); + return 0; + +clean_up_unmap: + gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va, + c->gpfifo.size, gk20a_mem_flag_none); +clean_up_sgt: + gk20a_free_sgtable(&sgt); +clean_up: + dma_free_coherent(d, c->gpfifo.size, + c->gpfifo.cpu_va, c->gpfifo.iova); + c->gpfifo.cpu_va = NULL; + c->gpfifo.iova = 0; + memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); + gk20a_err(d, "fail"); + return err; +} + +static inline int wfi_cmd_size(void) +{ + return 2; +} +void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i) +{ + /* wfi */ + cmd->ptr[(*i)++] = 0x2001001E; + /* handle, ignored */ + cmd->ptr[(*i)++] = 0x00000000; +} + +static inline bool check_gp_put(struct gk20a *g, + struct channel_gk20a *c) +{ + u32 put; + /* gp_put changed unexpectedly since last update? */ + put = gk20a_bar1_readl(g, + c->userd_gpu_va + 4 * ram_userd_gp_put_w()); + if (c->gpfifo.put != put) { + /*TBD: BUG_ON/teardown on this*/ + gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly " + "since last update"); + c->gpfifo.put = put; + return false; /* surprise! */ + } + return true; /* checked out ok */ +} + +/* Update with this periodically to determine how the gpfifo is draining. */ +static inline u32 update_gp_get(struct gk20a *g, + struct channel_gk20a *c) +{ + u32 new_get = gk20a_bar1_readl(g, + c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); + if (new_get < c->gpfifo.get) + c->gpfifo.wrap = !c->gpfifo.wrap; + c->gpfifo.get = new_get; + return new_get; +} + +static inline u32 gp_free_count(struct channel_gk20a *c) +{ + return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % + c->gpfifo.entry_num; +} + +bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, + u32 timeout_delta_ms) +{ + u32 gpfifo_get = update_gp_get(ch->g, ch); + /* Count consequent timeout isr */ + if (gpfifo_get == ch->timeout_gpfifo_get) { + /* we didn't advance since previous channel timeout check */ + ch->timeout_accumulated_ms += timeout_delta_ms; + } else { + /* first timeout isr encountered */ + ch->timeout_accumulated_ms = timeout_delta_ms; + } + + ch->timeout_gpfifo_get = gpfifo_get; + + return ch->g->timeouts_enabled && + ch->timeout_accumulated_ms > ch->timeout_ms_max; +} + + +/* Issue a syncpoint increment *preceded* by a wait-for-idle + * command. All commands on the channel will have been + * consumed at the time the fence syncpoint increment occurs. + */ +static int gk20a_channel_submit_wfi(struct channel_gk20a *c) +{ + struct priv_cmd_entry *cmd = NULL; + struct gk20a *g = c->g; + u32 free_count; + int err; + + if (c->has_timedout) + return -ETIMEDOUT; + + if (!c->sync) { + c->sync = gk20a_channel_sync_create(c); + if (!c->sync) + return -ENOMEM; + } + + update_gp_get(g, c); + free_count = gp_free_count(c); + if (unlikely(!free_count)) { + gk20a_err(dev_from_gk20a(g), + "not enough gpfifo space"); + return -EAGAIN; + } + + err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); + if (unlikely(err)) + return err; + + WARN_ON(!c->last_submit_fence.wfi); + + c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); + c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | + pbdma_gp_entry1_length_f(cmd->size); + + c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); + + /* save gp_put */ + cmd->gp_put = c->gpfifo.put; + + gk20a_bar1_writel(g, + c->userd_gpu_va + 4 * ram_userd_gp_put_w(), + c->gpfifo.put); + + gk20a_dbg_info("post-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + return 0; +} + +static u32 get_gp_free_count(struct channel_gk20a *c) +{ + update_gp_get(c->g, c); + return gp_free_count(c); +} + +static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g) +{ + void *mem = NULL; + unsigned int words; + u64 offset; + struct dma_buf *dmabuf = NULL; + + if (gk20a_debug_trace_cmdbuf) { + u64 gpu_va = (u64)g->entry0 | + (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); + int err; + + words = pbdma_gp_entry1_length_v(g->entry1); + err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); + if (!err) + mem = dma_buf_vmap(dmabuf); + } + + if (mem) { + u32 i; + /* + * Write in batches of 128 as there seems to be a limit + * of how much you can output to ftrace at once. + */ + for (i = 0; i < words; i += 128U) { + trace_gk20a_push_cmdbuf( + c->g->dev->name, + 0, + min(words - i, 128U), + offset + i * sizeof(u32), + mem); + } + dma_buf_vunmap(dmabuf, mem); + } +} + +static int gk20a_channel_add_job(struct channel_gk20a *c, + struct gk20a_channel_fence *fence) +{ + struct vm_gk20a *vm = c->vm; + struct channel_gk20a_job *job = NULL; + struct mapped_buffer_node **mapped_buffers = NULL; + int err = 0, num_mapped_buffers; + + /* job needs reference to this vm */ + gk20a_vm_get(vm); + + err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); + if (err) { + gk20a_vm_put(vm); + return err; + } + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) { + gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); + gk20a_vm_put(vm); + return -ENOMEM; + } + + job->num_mapped_buffers = num_mapped_buffers; + job->mapped_buffers = mapped_buffers; + job->fence = *fence; + + mutex_lock(&c->jobs_lock); + list_add_tail(&job->list, &c->jobs); + mutex_unlock(&c->jobs_lock); + + return 0; +} + +void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) +{ + struct gk20a *g = c->g; + struct vm_gk20a *vm = c->vm; + struct channel_gk20a_job *job, *n; + int i; + + wake_up(&c->submit_wq); + + mutex_lock(&c->jobs_lock); + list_for_each_entry_safe(job, n, &c->jobs, list) { + bool completed = WARN_ON(!c->sync) || + c->sync->is_expired(c->sync, &job->fence); + if (!completed) + break; + + gk20a_vm_put_buffers(vm, job->mapped_buffers, + job->num_mapped_buffers); + + /* job is done. release its reference to vm */ + gk20a_vm_put(vm); + + list_del_init(&job->list); + kfree(job); + gk20a_channel_idle(g->dev); + } + mutex_unlock(&c->jobs_lock); + + for (i = 0; i < nr_completed; i++) + gk20a_channel_idle(c->g->dev); +} + +static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, + struct nvhost_gpfifo *gpfifo, + u32 num_entries, + struct nvhost_fence *fence, + u32 flags) +{ + struct gk20a *g = c->g; + struct device *d = dev_from_gk20a(g); + u32 err = 0; + int i; + struct priv_cmd_entry *wait_cmd = NULL; + struct priv_cmd_entry *incr_cmd = NULL; + /* we might need two extra gpfifo entries - one for pre fence + * and one for post fence. */ + const int extra_entries = 2; + + if (c->has_timedout) + return -ETIMEDOUT; + + if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | + NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && + !fence) + return -EINVAL; + + if (!c->sync) { + c->sync = gk20a_channel_sync_create(c); + if (!c->sync) + return -ENOMEM; + } + +#ifdef CONFIG_DEBUG_FS + /* update debug settings */ + if (g->ops.ltc.sync_debugfs) + g->ops.ltc.sync_debugfs(g); +#endif + + gk20a_dbg_info("channel %d", c->hw_chid); + + /* gk20a_channel_update releases this ref. */ + gk20a_channel_busy(g->dev); + + trace_gk20a_channel_submit_gpfifo(c->g->dev->name, + c->hw_chid, + num_entries, + flags, + fence->syncpt_id, fence->value); + check_gp_put(g, c); + update_gp_get(g, c); + + gk20a_dbg_info("pre-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + /* Invalidate tlb if it's dirty... */ + /* TBD: this should be done in the cmd stream, not with PRIs. */ + /* We don't know what context is currently running... */ + /* Note also: there can be more than one context associated with the */ + /* address space (vm). */ + gk20a_mm_tlb_invalidate(c->vm); + + /* Make sure we have enough space for gpfifo entries. If not, + * wait for signals from completed submits */ + if (gp_free_count(c) < num_entries + extra_entries) { + err = wait_event_interruptible(c->submit_wq, + get_gp_free_count(c) >= num_entries + extra_entries || + c->has_timedout); + } + + if (c->has_timedout) { + err = -ETIMEDOUT; + goto clean_up; + } + + if (err) { + gk20a_err(d, "not enough gpfifo space"); + err = -EAGAIN; + goto clean_up; + } + + /* + * optionally insert syncpt wait in the beginning of gpfifo submission + * when user requested and the wait hasn't expired. + * validate that the id makes sense, elide if not + * the only reason this isn't being unceremoniously killed is to + * keep running some tests which trigger this condition + */ + if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { + if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) + err = c->sync->wait_fd(c->sync, fence->syncpt_id, + &wait_cmd); + else + err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, + fence->value, &wait_cmd); + } + if (err) + goto clean_up; + + + /* always insert syncpt increment at end of gpfifo submission + to keep track of method completion for idle railgating */ + if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && + flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) + err = c->sync->incr_user_fd(c->sync, &incr_cmd, + &c->last_submit_fence, + &fence->syncpt_id); + else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, + &c->last_submit_fence, + &fence->syncpt_id, + &fence->value); + else + err = c->sync->incr(c->sync, &incr_cmd, + &c->last_submit_fence); + if (err) + goto clean_up; + + if (wait_cmd) { + c->gpfifo.cpu_va[c->gpfifo.put].entry0 = + u64_lo32(wait_cmd->gva); + c->gpfifo.cpu_va[c->gpfifo.put].entry1 = + u64_hi32(wait_cmd->gva) | + pbdma_gp_entry1_length_f(wait_cmd->size); + trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); + + c->gpfifo.put = (c->gpfifo.put + 1) & + (c->gpfifo.entry_num - 1); + + /* save gp_put */ + wait_cmd->gp_put = c->gpfifo.put; + } + + for (i = 0; i < num_entries; i++) { + c->gpfifo.cpu_va[c->gpfifo.put].entry0 = + gpfifo[i].entry0; /* cmd buf va low 32 */ + c->gpfifo.cpu_va[c->gpfifo.put].entry1 = + gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */ + trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); + c->gpfifo.put = (c->gpfifo.put + 1) & + (c->gpfifo.entry_num - 1); + } + + if (incr_cmd) { + c->gpfifo.cpu_va[c->gpfifo.put].entry0 = + u64_lo32(incr_cmd->gva); + c->gpfifo.cpu_va[c->gpfifo.put].entry1 = + u64_hi32(incr_cmd->gva) | + pbdma_gp_entry1_length_f(incr_cmd->size); + trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]); + + c->gpfifo.put = (c->gpfifo.put + 1) & + (c->gpfifo.entry_num - 1); + + /* save gp_put */ + incr_cmd->gp_put = c->gpfifo.put; + } + + /* Invalidate tlb if it's dirty... */ + /* TBD: this should be done in the cmd stream, not with PRIs. */ + /* We don't know what context is currently running... */ + /* Note also: there can be more than one context associated with the */ + /* address space (vm). */ + gk20a_mm_tlb_invalidate(c->vm); + + trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, + c->hw_chid, + num_entries, + flags, + fence->syncpt_id, fence->value); + + /* TODO! Check for errors... */ + gk20a_channel_add_job(c, &c->last_submit_fence); + + c->cmds_pending = true; + gk20a_bar1_writel(g, + c->userd_gpu_va + 4 * ram_userd_gp_put_w(), + c->gpfifo.put); + + gk20a_dbg_info("post-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + gk20a_dbg_fn("done"); + return err; + +clean_up: + gk20a_err(d, "fail"); + free_priv_cmdbuf(c, wait_cmd); + free_priv_cmdbuf(c, incr_cmd); + gk20a_channel_idle(g->dev); + return err; +} + +void gk20a_remove_channel_support(struct channel_gk20a *c) +{ + +} + +int gk20a_init_channel_support(struct gk20a *g, u32 chid) +{ + struct channel_gk20a *c = g->fifo.channel+chid; + c->g = g; + c->in_use = false; + c->hw_chid = chid; + c->bound = false; + c->remove_support = gk20a_remove_channel_support; + mutex_init(&c->jobs_lock); + INIT_LIST_HEAD(&c->jobs); +#if defined(CONFIG_GK20A_CYCLE_STATS) + mutex_init(&c->cyclestate.cyclestate_buffer_mutex); +#endif + INIT_LIST_HEAD(&c->dbg_s_list); + mutex_init(&c->dbg_s_lock); + + return 0; +} + +int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) +{ + int err = 0; + + if (!ch->cmds_pending) + return 0; + + /* Do not wait for a timedout channel */ + if (ch->has_timedout) + return -ETIMEDOUT; + + if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { + gk20a_dbg_fn("issuing wfi, incr to finish the channel"); + err = gk20a_channel_submit_wfi(ch); + } + if (err) + return err; + + BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); + + gk20a_dbg_fn("waiting for channel to finish thresh:%d", + ch->last_submit_fence.thresh); + + err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout); + if (WARN_ON(err)) + dev_warn(dev_from_gk20a(ch->g), + "timed out waiting for gk20a channel to finish"); + else + ch->cmds_pending = false; + + return err; +} + +static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, + ulong id, u32 offset, + u32 payload, long timeout) +{ + struct platform_device *pdev = ch->g->dev; + struct dma_buf *dmabuf; + void *data; + u32 *semaphore; + int ret = 0; + long remain; + + /* do not wait if channel has timed out */ + if (ch->has_timedout) + return -ETIMEDOUT; + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx", + id); + return -EINVAL; + } + + data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); + if (!data) { + gk20a_err(&pdev->dev, "failed to map notifier memory"); + ret = -EINVAL; + goto cleanup_put; + } + + semaphore = data + (offset & ~PAGE_MASK); + + remain = wait_event_interruptible_timeout( + ch->semaphore_wq, + *semaphore == payload || ch->has_timedout, + timeout); + + if (remain == 0 && *semaphore != payload) + ret = -ETIMEDOUT; + else if (remain < 0) + ret = remain; + + dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); +cleanup_put: + dma_buf_put(dmabuf); + return ret; +} + +static int gk20a_channel_wait(struct channel_gk20a *ch, + struct nvhost_wait_args *args) +{ + struct device *d = dev_from_gk20a(ch->g); + struct dma_buf *dmabuf; + struct notification *notif; + struct timespec tv; + u64 jiffies; + ulong id; + u32 offset; + unsigned long timeout; + int remain, ret = 0; + + gk20a_dbg_fn(""); + + if (ch->has_timedout) + return -ETIMEDOUT; + + if (args->timeout == NVHOST_NO_TIMEOUT) + timeout = MAX_SCHEDULE_TIMEOUT; + else + timeout = (u32)msecs_to_jiffies(args->timeout); + + switch (args->type) { + case NVHOST_WAIT_TYPE_NOTIFIER: + id = args->condition.notifier.nvmap_handle; + offset = args->condition.notifier.offset; + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + gk20a_err(d, "invalid notifier nvmap handle 0x%lx", + id); + return -EINVAL; + } + + notif = dma_buf_vmap(dmabuf); + if (!notif) { + gk20a_err(d, "failed to map notifier memory"); + return -ENOMEM; + } + + notif = (struct notification *)((uintptr_t)notif + offset); + + /* user should set status pending before + * calling this ioctl */ + remain = wait_event_interruptible_timeout( + ch->notifier_wq, + notif->status == 0 || ch->has_timedout, + timeout); + + if (remain == 0 && notif->status != 0) { + ret = -ETIMEDOUT; + goto notif_clean_up; + } else if (remain < 0) { + ret = -EINTR; + goto notif_clean_up; + } + + /* TBD: fill in correct information */ + jiffies = get_jiffies_64(); + jiffies_to_timespec(jiffies, &tv); + notif->timestamp.nanoseconds[0] = tv.tv_nsec; + notif->timestamp.nanoseconds[1] = tv.tv_sec; + notif->info32 = 0xDEADBEEF; /* should be object name */ + notif->info16 = ch->hw_chid; /* should be method offset */ + +notif_clean_up: + dma_buf_vunmap(dmabuf, notif); + return ret; + + case NVHOST_WAIT_TYPE_SEMAPHORE: + ret = gk20a_channel_wait_semaphore(ch, + args->condition.semaphore.nvmap_handle, + args->condition.semaphore.offset, + args->condition.semaphore.payload, + timeout); + + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int gk20a_channel_set_priority(struct channel_gk20a *ch, + u32 priority) +{ + u32 timeslice_timeout; + /* set priority of graphics channel */ + switch (priority) { + case NVHOST_PRIORITY_LOW: + /* 64 << 3 = 512us */ + timeslice_timeout = 64; + break; + case NVHOST_PRIORITY_MEDIUM: + /* 128 << 3 = 1024us */ + timeslice_timeout = 128; + break; + case NVHOST_PRIORITY_HIGH: + /* 255 << 3 = 2048us */ + timeslice_timeout = 255; + break; + default: + pr_err("Unsupported priority"); + return -EINVAL; + } + channel_gk20a_set_schedule_params(ch, + timeslice_timeout); + return 0; +} + +static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, + struct nvhost_zcull_bind_args *args) +{ + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; + + gk20a_dbg_fn(""); + + return gr_gk20a_bind_ctxsw_zcull(g, gr, ch, + args->gpu_va, args->mode); +} + +/* in this context the "channel" is the host1x channel which + * maps to *all* gk20a channels */ +int gk20a_channel_suspend(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + struct device *d = dev_from_gk20a(g); + int err; + + gk20a_dbg_fn(""); + + /* idle the engine by submitting WFI on non-KEPLER_C channel */ + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *c = &f->channel[chid]; + if (c->in_use && c->obj_class != KEPLER_C) { + err = gk20a_channel_submit_wfi(c); + if (err) { + gk20a_err(d, "cannot idle channel %d\n", + chid); + return err; + } + + c->sync->wait_cpu(c->sync, &c->last_submit_fence, + 500000); + break; + } + } + + for (chid = 0; chid < f->num_channels; chid++) { + if (f->channel[chid].in_use) { + + gk20a_dbg_info("suspend channel %d", chid); + /* disable channel */ + gk20a_writel(g, ccsr_channel_r(chid), + gk20a_readl(g, ccsr_channel_r(chid)) | + ccsr_channel_enable_clr_true_f()); + /* preempt the channel */ + gk20a_fifo_preempt_channel(g, chid); + + channels_in_use = true; + } + } + + if (channels_in_use) { + gk20a_fifo_update_runlist(g, 0, ~0, false, true); + + for (chid = 0; chid < f->num_channels; chid++) { + if (f->channel[chid].in_use) + channel_gk20a_unbind(&f->channel[chid]); + } + } + + gk20a_dbg_fn("done"); + return 0; +} + +/* in this context the "channel" is the host1x channel which + * maps to *all* gk20a channels */ +int gk20a_channel_resume(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + + gk20a_dbg_fn(""); + + for (chid = 0; chid < f->num_channels; chid++) { + if (f->channel[chid].in_use) { + gk20a_dbg_info("resume channel %d", chid); + g->ops.fifo.bind_channel(&f->channel[chid]); + channels_in_use = true; + } + } + + if (channels_in_use) + gk20a_fifo_update_runlist(g, 0, ~0, true, true); + + gk20a_dbg_fn("done"); + return 0; +} + +void gk20a_channel_semaphore_wakeup(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + u32 chid; + + gk20a_dbg_fn(""); + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *c = g->fifo.channel+chid; + if (c->in_use) + wake_up_interruptible_all(&c->semaphore_wq); + } +} + +static int gk20a_ioctl_channel_submit_gpfifo( + struct channel_gk20a *ch, + struct nvhost_submit_gpfifo_args *args) +{ + void *gpfifo; + u32 size; + int ret = 0; + + gk20a_dbg_fn(""); + + if (ch->has_timedout) + return -ETIMEDOUT; + + size = args->num_entries * sizeof(struct nvhost_gpfifo); + + gpfifo = kzalloc(size, GFP_KERNEL); + if (!gpfifo) + return -ENOMEM; + + if (copy_from_user(gpfifo, + (void __user *)(uintptr_t)args->gpfifo, size)) { + ret = -EINVAL; + goto clean_up; + } + + ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries, + &args->fence, args->flags); + +clean_up: + kfree(gpfifo); + return ret; +} + +void gk20a_init_fifo(struct gpu_ops *gops) +{ + gops->fifo.bind_channel = channel_gk20a_bind; +} + +long gk20a_channel_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct channel_gk20a *ch = filp->private_data; + struct platform_device *dev = ch->g->dev; + u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE]; + int err = 0; + + if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) || + (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE)) + return -EFAULT; + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + switch (cmd) { + case NVHOST_IOCTL_CHANNEL_OPEN: + { + int fd; + struct file *file; + char *name; + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + break; + fd = err; + + name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d", + dev_name(&dev->dev), fd); + if (!name) { + err = -ENOMEM; + put_unused_fd(fd); + break; + } + + file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR); + kfree(name); + if (IS_ERR(file)) { + err = PTR_ERR(file); + put_unused_fd(fd); + break; + } + fd_install(fd, file); + + err = __gk20a_channel_open(ch->g, file); + if (err) { + put_unused_fd(fd); + fput(file); + break; + } + + ((struct nvhost_channel_open_args *)buf)->channel_fd = fd; + break; + } + case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD: + break; + case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX: + gk20a_channel_busy(dev); + err = gk20a_alloc_obj_ctx(ch, + (struct nvhost_alloc_obj_ctx_args *)buf); + gk20a_channel_idle(dev); + break; + case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX: + gk20a_channel_busy(dev); + err = gk20a_free_obj_ctx(ch, + (struct nvhost_free_obj_ctx_args *)buf); + gk20a_channel_idle(dev); + break; + case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO: + gk20a_channel_busy(dev); + err = gk20a_alloc_channel_gpfifo(ch, + (struct nvhost_alloc_gpfifo_args *)buf); + gk20a_channel_idle(dev); + break; + case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO: + err = gk20a_ioctl_channel_submit_gpfifo(ch, + (struct nvhost_submit_gpfifo_args *)buf); + break; + case NVHOST_IOCTL_CHANNEL_WAIT: + gk20a_channel_busy(dev); + err = gk20a_channel_wait(ch, + (struct nvhost_wait_args *)buf); + gk20a_channel_idle(dev); + break; + case NVHOST_IOCTL_CHANNEL_ZCULL_BIND: + gk20a_channel_busy(dev); + err = gk20a_channel_zcull_bind(ch, + (struct nvhost_zcull_bind_args *)buf); + gk20a_channel_idle(dev); + break; + case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: + gk20a_channel_busy(dev); + err = gk20a_init_error_notifier(ch, + (struct nvhost_set_error_notifier *)buf); + gk20a_channel_idle(dev); + break; +#ifdef CONFIG_GK20A_CYCLE_STATS + case NVHOST_IOCTL_CHANNEL_CYCLE_STATS: + gk20a_channel_busy(dev); + err = gk20a_channel_cycle_stats(ch, + (struct nvhost_cycle_stats_args *)buf); + gk20a_channel_idle(dev); + break; +#endif + case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT: + { + u32 timeout = + (u32)((struct nvhost_set_timeout_args *)buf)->timeout; + gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->hw_chid); + ch->timeout_ms_max = timeout; + break; + } + case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX: + { + u32 timeout = + (u32)((struct nvhost_set_timeout_args *)buf)->timeout; + bool timeout_debug_dump = !((u32) + ((struct nvhost_set_timeout_ex_args *)buf)->flags & + (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP)); + gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->hw_chid); + ch->timeout_ms_max = timeout; + ch->timeout_debug_dump = timeout_debug_dump; + break; + } + case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT: + ((struct nvhost_get_param_args *)buf)->value = + ch->has_timedout; + break; + case NVHOST_IOCTL_CHANNEL_SET_PRIORITY: + gk20a_channel_busy(dev); + gk20a_channel_set_priority(ch, + ((struct nvhost_set_priority_args *)buf)->priority); + gk20a_channel_idle(dev); + break; + default: + dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h new file mode 100644 index 00000000..429db85d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -0,0 +1,172 @@ +/* + * drivers/video/tegra/host/gk20a/channel_gk20a.h + * + * GK20A graphics channel + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __CHANNEL_GK20A_H__ +#define __CHANNEL_GK20A_H__ + +#include +#include +#include +#include +#include +struct gk20a; +struct gr_gk20a; +struct dbg_session_gk20a; + +#include "channel_sync_gk20a.h" + +#include "mm_gk20a.h" +#include "gr_gk20a.h" + +struct gpfifo { + u32 entry0; + u32 entry1; +}; + +struct notification { + struct { + u32 nanoseconds[2]; + } timestamp; + u32 info32; + u16 info16; + u16 status; +}; + +struct fence { + u32 hw_chid; + u32 syncpt_val; +}; + +/* contexts associated with a channel */ +struct channel_ctx_gk20a { + struct gr_ctx_desc gr_ctx; + struct pm_ctx_desc pm_ctx; + struct patch_desc patch_ctx; + struct zcull_ctx_desc zcull_ctx; + u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; + u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; + bool global_ctx_buffer_mapped; +}; + +struct channel_gk20a_job { + struct mapped_buffer_node **mapped_buffers; + int num_mapped_buffers; + struct gk20a_channel_fence fence; + struct list_head list; +}; + +/* this is the priv element of struct nvhost_channel */ +struct channel_gk20a { + struct gk20a *g; + bool in_use; + int hw_chid; + bool bound; + bool first_init; + bool vpr; + pid_t pid; + + struct list_head jobs; + struct mutex jobs_lock; + + struct vm_gk20a *vm; + + struct gpfifo_desc gpfifo; + + struct channel_ctx_gk20a ch_ctx; + + struct inst_desc inst_block; + struct mem_desc_sub ramfc; + + void *userd_cpu_va; + u64 userd_iova; + u64 userd_gpu_va; + + s32 num_objects; + u32 obj_class; /* we support only one obj per channel */ + + struct priv_cmd_queue priv_cmd_q; + + wait_queue_head_t notifier_wq; + wait_queue_head_t semaphore_wq; + wait_queue_head_t submit_wq; + + u32 timeout_accumulated_ms; + u32 timeout_gpfifo_get; + + bool cmds_pending; + struct gk20a_channel_fence last_submit_fence; + + void (*remove_support)(struct channel_gk20a *); +#if defined(CONFIG_GK20A_CYCLE_STATS) + struct { + void *cyclestate_buffer; + u32 cyclestate_buffer_size; + struct dma_buf *cyclestate_buffer_handler; + struct mutex cyclestate_buffer_mutex; + } cyclestate; +#endif + struct mutex dbg_s_lock; + struct list_head dbg_s_list; + + bool has_timedout; + u32 timeout_ms_max; + bool timeout_debug_dump; + + struct dma_buf *error_notifier_ref; + struct nvhost_notification *error_notifier; + void *error_notifier_va; + + struct gk20a_channel_sync *sync; +}; + +static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) +{ + return !!ch->vm; +} +int channel_gk20a_commit_va(struct channel_gk20a *c); +int gk20a_init_channel_support(struct gk20a *, u32 chid); +void gk20a_free_channel(struct channel_gk20a *ch, bool finish); +bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, + u32 timeout_delta_ms); +void gk20a_disable_channel(struct channel_gk20a *ch, + bool wait_for_finish, + unsigned long finish_timeout); +void gk20a_disable_channel_no_update(struct channel_gk20a *ch); +int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout); +void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); +void gk20a_channel_semaphore_wakeup(struct gk20a *g); +int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, + struct priv_cmd_entry **entry); + +int gk20a_channel_suspend(struct gk20a *g); +int gk20a_channel_resume(struct gk20a *g); + +/* Channel file operations */ +int gk20a_channel_open(struct inode *inode, struct file *filp); +long gk20a_channel_ioctl(struct file *filp, + unsigned int cmd, + unsigned long arg); +int gk20a_channel_release(struct inode *inode, struct file *filp); +struct channel_gk20a *gk20a_get_channel_from_file(int fd); +void gk20a_channel_update(struct channel_gk20a *c, int nr_completed); + +void gk20a_init_fifo(struct gpu_ops *gops); + +#endif /*__CHANNEL_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c new file mode 100644 index 00000000..9f9c3ba7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -0,0 +1,356 @@ +/* + * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c + * + * GK20A Channel Synchronization Abstraction + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include "channel_sync_gk20a.h" +#include "gk20a.h" + +#ifdef CONFIG_SYNC +#include "../../../staging/android/sync.h" +#endif + +#ifdef CONFIG_TEGRA_GK20A +#include +#endif + +#ifdef CONFIG_TEGRA_GK20A + +struct gk20a_channel_syncpt { + struct gk20a_channel_sync ops; + struct channel_gk20a *c; + struct platform_device *host1x_pdev; + u32 id; +}; + +static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh) +{ + /* syncpoint_a */ + ptr[0] = 0x2001001C; + /* payload */ + ptr[1] = thresh; + /* syncpoint_b */ + ptr[2] = 0x2001001D; + /* syncpt_id, switch_en, wait */ + ptr[3] = (id << 8) | 0x10; +} + +int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence, + int timeout) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + if (!fence->valid) + return 0; + return nvhost_syncpt_wait_timeout_ext( + sp->host1x_pdev, sp->id, fence->thresh, + timeout, NULL, NULL); +} + +bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + if (!fence->valid) + return true; + return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id, + fence->thresh); +} + +int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, + u32 thresh, struct priv_cmd_entry **entry) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + struct priv_cmd_entry *wait_cmd = NULL; + + if (id >= nvhost_syncpt_nb_pts_ext(sp->host1x_pdev)) { + dev_warn(dev_from_gk20a(sp->c->g), + "invalid wait id in gpfifo submit, elided"); + return 0; + } + + if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) + return 0; + + gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd); + if (wait_cmd == NULL) { + gk20a_err(dev_from_gk20a(sp->c->g), + "not enough priv cmd buffer space"); + return -EAGAIN; + } + + add_wait_cmd(&wait_cmd->ptr[0], id, thresh); + + *entry = wait_cmd; + return 0; +} + +int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, + struct priv_cmd_entry **entry) +{ +#ifdef CONFIG_SYNC + int i; + int num_wait_cmds; + struct sync_pt *pt; + struct sync_fence *sync_fence; + struct priv_cmd_entry *wait_cmd = NULL; + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + struct channel_gk20a *c = sp->c; + + sync_fence = nvhost_sync_fdget(fd); + if (!sync_fence) + return -EINVAL; + + num_wait_cmds = nvhost_sync_num_pts(sync_fence); + gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); + if (wait_cmd == NULL) { + gk20a_err(dev_from_gk20a(c->g), + "not enough priv cmd buffer space"); + sync_fence_put(sync_fence); + return -EAGAIN; + } + + i = 0; + list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) { + u32 wait_id = nvhost_sync_pt_id(pt); + u32 wait_value = nvhost_sync_pt_thresh(pt); + + if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, + wait_id, wait_value)) { + wait_cmd->ptr[i * 4 + 0] = 0; + wait_cmd->ptr[i * 4 + 1] = 0; + wait_cmd->ptr[i * 4 + 2] = 0; + wait_cmd->ptr[i * 4 + 3] = 0; + } else + add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id, + wait_value); + i++; + } + WARN_ON(i != num_wait_cmds); + sync_fence_put(sync_fence); + + *entry = wait_cmd; + return 0; +#else + return -ENODEV; +#endif +} + +static void gk20a_channel_syncpt_update(void *priv, int nr_completed) +{ + struct channel_gk20a *ch20a = priv; + gk20a_channel_update(ch20a, nr_completed); +} + +static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, + bool gfx_class, bool wfi_cmd, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + u32 thresh; + int incr_cmd_size; + int j = 0; + int err; + struct priv_cmd_entry *incr_cmd = NULL; + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + struct channel_gk20a *c = sp->c; + + /* nvhost action_gpfifo_submit_complete releases this ref. */ + err = gk20a_channel_busy(c->g->dev); + if (err) + return err; + + incr_cmd_size = 4; + if (wfi_cmd) + incr_cmd_size += 2; + + gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); + if (incr_cmd == NULL) { + gk20a_channel_idle(c->g->dev); + gk20a_err(dev_from_gk20a(c->g), + "not enough priv cmd buffer space"); + return -EAGAIN; + } + + if (gfx_class) { + WARN_ON(wfi_cmd); /* No sense to use gfx class + wfi. */ + /* setobject KEPLER_C */ + incr_cmd->ptr[j++] = 0x20010000; + incr_cmd->ptr[j++] = KEPLER_C; + /* syncpt incr */ + incr_cmd->ptr[j++] = 0x200100B2; + incr_cmd->ptr[j++] = sp->id | + (0x1 << 20) | (0x1 << 16); + } else { + if (wfi_cmd) { + /* wfi */ + incr_cmd->ptr[j++] = 0x2001001E; + /* handle, ignored */ + incr_cmd->ptr[j++] = 0x00000000; + } + /* syncpoint_a */ + incr_cmd->ptr[j++] = 0x2001001C; + /* payload, ignored */ + incr_cmd->ptr[j++] = 0; + /* syncpoint_b */ + incr_cmd->ptr[j++] = 0x2001001D; + /* syncpt_id, incr */ + incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; + } + WARN_ON(j != incr_cmd_size); + + thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 1); + + err = nvhost_intr_register_notifier(sp->host1x_pdev, sp->id, thresh, + gk20a_channel_syncpt_update, c); + + /* Adding interrupt action should never fail. A proper error handling + * here would require us to decrement the syncpt max back to its + * original value. */ + if (WARN(err, "failed to set submit complete interrupt")) { + gk20a_channel_idle(c->g->dev); + err = 0; /* Ignore this error. */ + } + + fence->thresh = thresh; + fence->valid = true; + fence->wfi = wfi_cmd; + *entry = incr_cmd; + return 0; +} + +int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + return __gk20a_channel_syncpt_incr(s, + false /* use host class */, + true /* wfi */, + entry, fence); +} + +int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + /* Don't put wfi cmd to this one since we're not returning + * a fence to user space. */ + return __gk20a_channel_syncpt_incr(s, + sp->c->obj_class == KEPLER_C /* may use gfx class */, + false /* no wfi */, + entry, fence); +} + +int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + u32 *id, u32 *thresh) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence + * to user space. */ + int err = __gk20a_channel_syncpt_incr(s, + sp->c->obj_class == KEPLER_C /* use gfx class? */, + sp->c->obj_class != KEPLER_C /* wfi if host class */, + entry, fence); + if (err) + return err; + *id = sp->id; + *thresh = fence->thresh; + return 0; +} + +int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + int *fd) +{ +#ifdef CONFIG_SYNC + int err; + struct nvhost_ctrl_sync_fence_info pt; + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, + &pt.id, &pt.thresh); + if (err) + return err; + return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1, + "fence", fd); +#else + return -ENODEV; +#endif +} + +void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); +} + +static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + nvhost_free_syncpt(sp->id); + kfree(sp); +} + +static struct gk20a_channel_sync * +gk20a_channel_syncpt_create(struct channel_gk20a *c) +{ + struct gk20a_channel_syncpt *sp; + + sp = kzalloc(sizeof(*sp), GFP_KERNEL); + if (!sp) + return NULL; + + sp->c = c; + sp->host1x_pdev = to_platform_device(c->g->dev->dev.parent); + sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev, c->hw_chid); + + sp->ops.wait_cpu = gk20a_channel_syncpt_wait_cpu; + sp->ops.is_expired = gk20a_channel_syncpt_is_expired; + sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; + sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; + sp->ops.incr = gk20a_channel_syncpt_incr; + sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi; + sp->ops.incr_user_syncpt = gk20a_channel_syncpt_incr_user_syncpt; + sp->ops.incr_user_fd = gk20a_channel_syncpt_incr_user_fd; + sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; + sp->ops.destroy = gk20a_channel_syncpt_destroy; + return &sp->ops; +} +#endif /* CONFIG_TEGRA_GK20A */ + +struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) +{ +#ifdef CONFIG_TEGRA_GK20A + if (gk20a_platform_has_syncpoints(c->g->dev)) + return gk20a_channel_syncpt_create(c); +#endif + WARN_ON(1); + return NULL; +} diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h new file mode 100644 index 00000000..69feb89f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -0,0 +1,102 @@ +/* + * drivers/video/tegra/host/gk20a/channel_sync_gk20a.h + * + * GK20A Channel Synchronization Abstraction + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _GK20A_CHANNEL_SYNC_H_ +#define _GK20A_CHANNEL_SYNC_H_ + +#include + +struct gk20a_channel_sync; +struct priv_cmd_entry; +struct channel_gk20a; + +struct gk20a_channel_fence { + bool valid; + bool wfi; /* was issued with preceding wfi */ + u32 thresh; /* either semaphore or syncpoint value */ +}; + +struct gk20a_channel_sync { + /* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */ + int (*wait_cpu)(struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence, + int timeout); + + /* Test whether a fence returned by incr_syncpt() or incr_fd() is + * expired. */ + bool (*is_expired)(struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence); + + /* Generate a gpu wait cmdbuf from syncpoint. */ + int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, + struct priv_cmd_entry **entry); + + /* Generate a gpu wait cmdbuf from sync fd. */ + int (*wait_fd)(struct gk20a_channel_sync *s, int fd, + struct priv_cmd_entry **entry); + + /* Increment syncpoint/semaphore. + * Returns + * - a gpu cmdbuf that performs the increment when executed, + * - a fence that can be passed to wait_cpu() and is_expired(). + */ + int (*incr)(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence); + + /* Increment syncpoint/semaphore, preceded by a wfi. + * Returns + * - a gpu cmdbuf that performs the increment when executed, + * - a fence that can be passed to wait_cpu() and is_expired(). + */ + int (*incr_wfi)(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence); + + /* Increment syncpoint, so that the returned fence represents + * work completion (may need wfi) and can be returned to user space. + * Returns + * - a gpu cmdbuf that performs the increment when executed, + * - a fence that can be passed to wait_cpu() and is_expired(), + * - a syncpoint id/value pair that can be returned to user space. + */ + int (*incr_user_syncpt)(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + u32 *id, u32 *thresh); + + /* Increment syncpoint/semaphore, so that the returned fence represents + * work completion (may need wfi) and can be returned to user space. + * Returns + * - a gpu cmdbuf that performs the increment when executed, + * - a fence that can be passed to wait_cpu() and is_expired(), + * - a sync fd that can be returned to user space. + */ + int (*incr_user_fd)(struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + int *fd); + + /* Reset the channel syncpoint/semaphore. */ + void (*set_min_eq_max)(struct gk20a_channel_sync *s); + + /* Free the resources allocated by gk20a_channel_sync_create. */ + void (*destroy)(struct gk20a_channel_sync *s); +}; + +struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); +#endif diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c new file mode 100644 index 00000000..151a332b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c @@ -0,0 +1,865 @@ +/* + * drivers/video/tegra/host/gk20a/clk_gk20a.c + * + * GK20A Clocks + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include /* for mdelay */ +#include +#include +#include +#include + +#include "gk20a.h" +#include "hw_trim_gk20a.h" +#include "hw_timer_gk20a.h" + +#define gk20a_dbg_clk(fmt, arg...) \ + gk20a_dbg(gpu_dbg_clk, fmt, ##arg) + +/* from vbios PLL info table */ +struct pll_parms gpc_pll_params = { + 144, 2064, /* freq */ + 1000, 2064, /* vco */ + 12, 38, /* u */ + 1, 255, /* M */ + 8, 255, /* N */ + 1, 32, /* PL */ +}; + +static int num_gpu_cooling_freq; +static struct gpufreq_table_data *gpu_cooling_freq; + +struct gpufreq_table_data *tegra_gpufreq_table_get(void) +{ + return gpu_cooling_freq; +} + +unsigned int tegra_gpufreq_table_size_get(void) +{ + return num_gpu_cooling_freq; +} + +static u8 pl_to_div[] = { +/* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */ +/* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 }; + +/* Calculate and update M/N/PL as well as pll->freq + ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a) + u_f = ref_clk_f / M; + PLL output = vco_f = u_f * N = ref_clk_f * N / M; + gpc2clk = target clock frequency = vco_f / PL; + gpcclk = gpc2clk / 2; */ +static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll, + struct pll_parms *pll_params, u32 *target_freq, bool best_fit) +{ + u32 min_vco_f, max_vco_f; + u32 best_M, best_N; + u32 low_PL, high_PL, best_PL; + u32 m, n, n2; + u32 target_vco_f, vco_f; + u32 ref_clk_f, target_clk_f, u_f; + u32 delta, lwv, best_delta = ~0; + int pl; + + BUG_ON(target_freq == NULL); + + gk20a_dbg_fn("request target freq %d MHz", *target_freq); + + ref_clk_f = pll->clk_in; + target_clk_f = *target_freq; + max_vco_f = pll_params->max_vco; + min_vco_f = pll_params->min_vco; + best_M = pll_params->max_M; + best_N = pll_params->min_N; + best_PL = pll_params->min_PL; + + target_vco_f = target_clk_f + target_clk_f / 50; + if (max_vco_f < target_vco_f) + max_vco_f = target_vco_f; + + high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f; + high_PL = min(high_PL, pll_params->max_PL); + high_PL = max(high_PL, pll_params->min_PL); + + low_PL = min_vco_f / target_vco_f; + low_PL = min(low_PL, pll_params->max_PL); + low_PL = max(low_PL, pll_params->min_PL); + + /* Find Indices of high_PL and low_PL */ + for (pl = 0; pl < 14; pl++) { + if (pl_to_div[pl] >= low_PL) { + low_PL = pl; + break; + } + } + for (pl = 0; pl < 14; pl++) { + if (pl_to_div[pl] >= high_PL) { + high_PL = pl; + break; + } + } + gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)", + low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]); + + for (pl = low_PL; pl <= high_PL; pl++) { + target_vco_f = target_clk_f * pl_to_div[pl]; + + for (m = pll_params->min_M; m <= pll_params->max_M; m++) { + u_f = ref_clk_f / m; + + if (u_f < pll_params->min_u) + break; + if (u_f > pll_params->max_u) + continue; + + n = (target_vco_f * m) / ref_clk_f; + n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; + + if (n > pll_params->max_N) + break; + + for (; n <= n2; n++) { + if (n < pll_params->min_N) + continue; + if (n > pll_params->max_N) + break; + + vco_f = ref_clk_f * n / m; + + if (vco_f >= min_vco_f && vco_f <= max_vco_f) { + lwv = (vco_f + (pl_to_div[pl] / 2)) + / pl_to_div[pl]; + delta = abs(lwv - target_clk_f); + + if (delta < best_delta) { + best_delta = delta; + best_M = m; + best_N = n; + best_PL = pl; + + if (best_delta == 0 || + /* 0.45% for non best fit */ + (!best_fit && (vco_f / best_delta > 218))) { + goto found_match; + } + + gk20a_dbg_info("delta %d @ M %d, N %d, PL %d", + delta, m, n, pl); + } + } + } + } + } + +found_match: + BUG_ON(best_delta == ~0); + + if (best_fit && best_delta != 0) + gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll", + target_clk_f); + + pll->M = best_M; + pll->N = best_N; + pll->PL = best_PL; + + /* save current frequency */ + pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]); + + *target_freq = pll->freq; + + gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)", + *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]); + + gk20a_dbg_fn("done"); + + return 0; +} + +static int clk_slide_gpc_pll(struct gk20a *g, u32 n) +{ + u32 data, coeff; + u32 nold; + int ramp_timeout = 500; + + /* get old coefficients */ + coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); + nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); + + /* do nothing if NDIV is same */ + if (n == nold) + return 0; + + /* setup */ + data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); + data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), + trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b)); + gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); + data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); + data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), + trim_sys_gpcpll_cfg3_pll_stepb_f(0xb)); + gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); + + /* pll slowdown mode */ + data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); + data = set_field(data, + trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), + trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); + gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); + + /* new ndiv ready for ramp */ + coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); + coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), + trim_sys_gpcpll_coeff_ndiv_f(n)); + udelay(1); + gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); + + /* dynamic ramp to new ndiv */ + data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); + data = set_field(data, + trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), + trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); + udelay(1); + gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); + + do { + udelay(1); + ramp_timeout--; + data = gk20a_readl( + g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); + if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) + break; + } while (ramp_timeout > 0); + + /* exit slowdown mode */ + data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); + data = set_field(data, + trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), + trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); + data = set_field(data, + trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), + trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); + gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); + gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); + + if (ramp_timeout <= 0) { + gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout"); + return -ETIMEDOUT; + } + return 0; +} + +static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk, + int allow_slide) +{ + u32 data, cfg, coeff, timeout; + u32 m, n, pl; + u32 nlo; + + gk20a_dbg_fn(""); + + if (!tegra_platform_is_silicon()) + return 0; + + /* get old coefficients */ + coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); + m = trim_sys_gpcpll_coeff_mdiv_v(coeff); + n = trim_sys_gpcpll_coeff_ndiv_v(coeff); + pl = trim_sys_gpcpll_coeff_pldiv_v(coeff); + + /* do NDIV slide if there is no change in M and PL */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + if (allow_slide && clk->gpc_pll.M == m && clk->gpc_pll.PL == pl + && trim_sys_gpcpll_cfg_enable_v(cfg)) { + return clk_slide_gpc_pll(g, clk->gpc_pll.N); + } + + /* slide down to NDIV_LO */ + nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in); + if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { + int ret = clk_slide_gpc_pll(g, nlo); + if (ret) + return ret; + } + + /* split FO-to-bypass jump in halfs by setting out divider 1:2 */ + data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); + data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), + trim_sys_gpc2clk_out_vcodiv_f(2)); + gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); + + /* put PLL in bypass before programming it */ + data = gk20a_readl(g, trim_sys_sel_vco_r()); + data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), + trim_sys_sel_vco_gpc2clk_out_bypass_f()); + udelay(2); + gk20a_writel(g, trim_sys_sel_vco_r(), data); + + /* get out from IDDQ */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + if (trim_sys_gpcpll_cfg_iddq_v(cfg)) { + cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(), + trim_sys_gpcpll_cfg_iddq_power_on_v()); + gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); + gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + udelay(2); + } + + /* disable PLL before changing coefficients */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), + trim_sys_gpcpll_cfg_enable_no_f()); + gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); + gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + + /* change coefficients */ + nlo = DIV_ROUND_UP(clk->gpc_pll.M * gpc_pll_params.min_vco, + clk->gpc_pll.clk_in); + coeff = trim_sys_gpcpll_coeff_mdiv_f(clk->gpc_pll.M) | + trim_sys_gpcpll_coeff_ndiv_f(allow_slide ? + nlo : clk->gpc_pll.N) | + trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL); + gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); + + /* enable PLL after changing coefficients */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), + trim_sys_gpcpll_cfg_enable_yes_f()); + gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); + + /* lock pll */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){ + cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(), + trim_sys_gpcpll_cfg_enb_lckdet_power_on_f()); + gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); + } + + /* wait pll lock */ + timeout = clk->pll_delay / 2 + 1; + do { + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f()) + goto pll_locked; + udelay(2); + } while (--timeout > 0); + + /* PLL is messed up. What can we do here? */ + BUG(); + return -EBUSY; + +pll_locked: + /* put PLL back on vco */ + data = gk20a_readl(g, trim_sys_sel_vco_r()); + data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), + trim_sys_sel_vco_gpc2clk_out_vco_f()); + gk20a_writel(g, trim_sys_sel_vco_r(), data); + clk->gpc_pll.enabled = true; + + /* restore out divider 1:1 */ + data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); + data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), + trim_sys_gpc2clk_out_vcodiv_by1_f()); + udelay(2); + gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); + + /* slide up to target NDIV */ + return clk_slide_gpc_pll(g, clk->gpc_pll.N); +} + +static int clk_disable_gpcpll(struct gk20a *g, int allow_slide) +{ + u32 cfg, coeff, m, nlo; + struct clk_gk20a *clk = &g->clk; + + /* slide to VCO min */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { + coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); + m = trim_sys_gpcpll_coeff_mdiv_v(coeff); + nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, + clk->gpc_pll.clk_in); + clk_slide_gpc_pll(g, nlo); + } + + /* put PLL in bypass before disabling it */ + cfg = gk20a_readl(g, trim_sys_sel_vco_r()); + cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(), + trim_sys_sel_vco_gpc2clk_out_bypass_f()); + gk20a_writel(g, trim_sys_sel_vco_r(), cfg); + + /* disable PLL */ + cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), + trim_sys_gpcpll_cfg_enable_no_f()); + gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); + gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + + clk->gpc_pll.enabled = false; + return 0; +} + +static int gk20a_init_clk_reset_enable_hw(struct gk20a *g) +{ + gk20a_dbg_fn(""); + return 0; +} + +struct clk *gk20a_clk_get(struct gk20a *g) +{ + if (!g->clk.tegra_clk) { + struct clk *clk; + + clk = clk_get_sys("tegra_gk20a", "gpu"); + if (IS_ERR(clk)) { + gk20a_err(dev_from_gk20a(g), + "fail to get tegra gpu clk tegra_gk20a/gpu"); + return NULL; + } + g->clk.tegra_clk = clk; + } + + return g->clk.tegra_clk; +} + +static int gk20a_init_clk_setup_sw(struct gk20a *g) +{ + struct clk_gk20a *clk = &g->clk; + static int initialized; + unsigned long *freqs; + int err, num_freqs; + struct clk *ref; + unsigned long ref_rate; + + gk20a_dbg_fn(""); + + if (clk->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + if (!gk20a_clk_get(g)) + return -EINVAL; + + ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); + if (IS_ERR(ref)) { + gk20a_err(dev_from_gk20a(g), + "failed to get GPCPLL reference clock"); + return -EINVAL; + } + ref_rate = clk_get_rate(ref); + + clk->pll_delay = 300; /* usec */ + + clk->gpc_pll.id = GK20A_GPC_PLL; + clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */ + + /* Decide initial frequency */ + if (!initialized) { + initialized = 1; + clk->gpc_pll.M = 1; + clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, + clk->gpc_pll.clk_in); + clk->gpc_pll.PL = 1; + clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; + clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; + } + + err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk), + &freqs, &num_freqs); + if (!err) { + int i, j; + + /* init j for inverse traversal of frequencies */ + j = num_freqs - 1; + + gpu_cooling_freq = kzalloc( + (1 + num_freqs) * sizeof(*gpu_cooling_freq), + GFP_KERNEL); + + /* store frequencies in inverse order */ + for (i = 0; i < num_freqs; ++i, --j) { + gpu_cooling_freq[i].index = i; + gpu_cooling_freq[i].frequency = freqs[j]; + } + + /* add 'end of table' marker */ + gpu_cooling_freq[i].index = i; + gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END; + + /* store number of frequencies */ + num_gpu_cooling_freq = num_freqs + 1; + } + + mutex_init(&clk->clk_mutex); + + clk->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; +} + +static int gk20a_init_clk_setup_hw(struct gk20a *g) +{ + u32 data; + + gk20a_dbg_fn(""); + + data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); + data = set_field(data, + trim_sys_gpc2clk_out_sdiv14_m() | + trim_sys_gpc2clk_out_vcodiv_m() | + trim_sys_gpc2clk_out_bypdiv_m(), + trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() | + trim_sys_gpc2clk_out_vcodiv_by1_f() | + trim_sys_gpc2clk_out_bypdiv_f(0)); + gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); + + return 0; +} + +static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) +{ + struct clk_gk20a *clk = &g->clk; + + if (freq > gpc_pll_params.max_freq) + freq = gpc_pll_params.max_freq; + else if (freq < gpc_pll_params.min_freq) + freq = gpc_pll_params.min_freq; + + if (freq != old_freq) { + /* gpc_pll.freq is changed to new value here */ + if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, + &freq, true)) { + gk20a_err(dev_from_gk20a(g), + "failed to set pll target for %d", freq); + return -EINVAL; + } + } + return 0; +} + +static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq) +{ + struct clk_gk20a *clk = &g->clk; + int err = 0; + + gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq); + + if ((freq == old_freq) && clk->gpc_pll.enabled) + return 0; + + /* change frequency only if power is on */ + if (g->clk.clk_hw_on) { + err = clk_program_gpc_pll(g, clk, 1); + if (err) + err = clk_program_gpc_pll(g, clk, 0); + } + + /* Just report error but not restore PLL since dvfs could already change + voltage even when it returns error. */ + if (err) + gk20a_err(dev_from_gk20a(g), + "failed to set pll to %d", freq); + return err; +} + +static int gk20a_clk_export_set_rate(void *data, unsigned long *rate) +{ + u32 old_freq; + int ret = -ENODATA; + struct gk20a *g = data; + struct clk_gk20a *clk = &g->clk; + + if (rate) { + mutex_lock(&clk->clk_mutex); + old_freq = clk->gpc_pll.freq; + ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq); + if (!ret && clk->gpc_pll.enabled) + ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq); + if (!ret) + *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); + mutex_unlock(&clk->clk_mutex); + } + return ret; +} + +static int gk20a_clk_export_enable(void *data) +{ + int ret; + struct gk20a *g = data; + struct clk_gk20a *clk = &g->clk; + + mutex_lock(&clk->clk_mutex); + ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); + mutex_unlock(&clk->clk_mutex); + return ret; +} + +static void gk20a_clk_export_disable(void *data) +{ + struct gk20a *g = data; + struct clk_gk20a *clk = &g->clk; + + mutex_lock(&clk->clk_mutex); + if (g->clk.clk_hw_on) + clk_disable_gpcpll(g, 1); + mutex_unlock(&clk->clk_mutex); +} + +static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state) +{ + struct gk20a *g = data; + struct clk_gk20a *clk = &g->clk; + + mutex_lock(&clk->clk_mutex); + if (state) + *state = clk->gpc_pll.enabled; + if (rate) + *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); + mutex_unlock(&clk->clk_mutex); +} + +static struct tegra_clk_export_ops gk20a_clk_export_ops = { + .init = gk20a_clk_export_init, + .enable = gk20a_clk_export_enable, + .disable = gk20a_clk_export_disable, + .set_rate = gk20a_clk_export_set_rate, +}; + +static int gk20a_clk_register_export_ops(struct gk20a *g) +{ + int ret; + struct clk *c; + + if (gk20a_clk_export_ops.data) + return 0; + + gk20a_clk_export_ops.data = (void *)g; + c = g->clk.tegra_clk; + if (!c || !clk_get_parent(c)) + return -ENOSYS; + + ret = tegra_clk_register_export_ops(clk_get_parent(c), + &gk20a_clk_export_ops); + + return ret; +} + +int gk20a_init_clk_support(struct gk20a *g) +{ + struct clk_gk20a *clk = &g->clk; + u32 err; + + gk20a_dbg_fn(""); + + clk->g = g; + + err = gk20a_init_clk_reset_enable_hw(g); + if (err) + return err; + + err = gk20a_init_clk_setup_sw(g); + if (err) + return err; + + mutex_lock(&clk->clk_mutex); + clk->clk_hw_on = true; + + err = gk20a_init_clk_setup_hw(g); + mutex_unlock(&clk->clk_mutex); + if (err) + return err; + + err = gk20a_clk_register_export_ops(g); + if (err) + return err; + + /* FIXME: this effectively prevents host level clock gating */ + err = clk_enable(g->clk.tegra_clk); + if (err) + return err; + + /* The prev call may not enable PLL if gbus is unbalanced - force it */ + mutex_lock(&clk->clk_mutex); + err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); + mutex_unlock(&clk->clk_mutex); + if (err) + return err; + + return err; +} + +unsigned long gk20a_clk_get_rate(struct gk20a *g) +{ + struct clk_gk20a *clk = &g->clk; + return rate_gpc2clk_to_gpu(clk->gpc_pll.freq); +} + +long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate) +{ + /* make sure the clock is available */ + if (!gk20a_clk_get(g)) + return rate; + + return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); +} + +int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate) +{ + return clk_set_rate(g->clk.tegra_clk, rate); +} + +int gk20a_suspend_clk_support(struct gk20a *g) +{ + int ret; + + clk_disable(g->clk.tegra_clk); + + /* The prev call may not disable PLL if gbus is unbalanced - force it */ + mutex_lock(&g->clk.clk_mutex); + ret = clk_disable_gpcpll(g, 1); + g->clk.clk_hw_on = false; + mutex_unlock(&g->clk.clk_mutex); + return ret; +} + +#ifdef CONFIG_DEBUG_FS + +static int rate_get(void *data, u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + *val = (u64)gk20a_clk_get_rate(g); + return 0; +} +static int rate_set(void *data, u64 val) +{ + struct gk20a *g = (struct gk20a *)data; + return gk20a_clk_set_rate(g, (u32)val); +} +DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); + +static int pll_reg_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + u32 reg, m, n, pl, f; + + mutex_lock(&g->clk.clk_mutex); + if (!g->clk.clk_hw_on) { + seq_printf(s, "gk20a powered down - no access to registers\n"); + mutex_unlock(&g->clk.clk_mutex); + return 0; + } + + reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); + seq_printf(s, "cfg = 0x%x : %s : %s\n", reg, + trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled", + trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked"); + + reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); + m = trim_sys_gpcpll_coeff_mdiv_v(reg); + n = trim_sys_gpcpll_coeff_ndiv_v(reg); + pl = trim_sys_gpcpll_coeff_pldiv_v(reg); + f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]); + seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); + seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2); + mutex_unlock(&g->clk.clk_mutex); + return 0; +} + +static int pll_reg_open(struct inode *inode, struct file *file) +{ + return single_open(file, pll_reg_show, inode->i_private); +} + +static const struct file_operations pll_reg_fops = { + .open = pll_reg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int monitor_get(void *data, u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + struct clk_gk20a *clk = &g->clk; + int err; + + u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */ + u32 clkin = clk->gpc_pll.clk_in; + u32 count1, count2; + + err = gk20a_busy(g->dev); + if (err) + return err; + + gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), + trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f()); + gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), + trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() | + trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() | + trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle)); + /* start */ + + /* It should take about 8us to finish 100 cycle of 12MHz. + But longer than 100us delay is required here. */ + gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0)); + udelay(2000); + + count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); + udelay(100); + count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); + *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle); + gk20a_idle(g->dev); + + if (count1 != count2) + return -EBUSY; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); + +int clk_gk20a_debugfs_init(struct platform_device *dev) +{ + struct dentry *d; + struct gk20a_platform *platform = platform_get_drvdata(dev); + struct gk20a *g = get_gk20a(dev); + + d = debugfs_create_file( + "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops); + if (!d) + goto err_out; + + return 0; + +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + debugfs_remove_recursive(platform->debugfs); + return -ENOMEM; +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h new file mode 100644 index 00000000..d2665259 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h @@ -0,0 +1,94 @@ +/* + * drivers/video/tegra/host/gk20a/clk_gk20a.h + * + * GK20A Graphics + * + * Copyright (c) 2011 - 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef _NVHOST_CLK_GK20A_H_ +#define _NVHOST_CLK_GK20A_H_ + +#include + +#define GPUFREQ_TABLE_END ~(u32)1 +enum { + /* only one PLL for gk20a */ + GK20A_GPC_PLL = 0, +}; + +struct pll { + u32 id; + u32 clk_in; /* MHz */ + u32 M; + u32 N; + u32 PL; + u32 freq; /* MHz */ + bool enabled; +}; + +struct pll_parms { + u32 min_freq, max_freq; /* MHz */ + u32 min_vco, max_vco; /* MHz */ + u32 min_u, max_u; /* MHz */ + u32 min_M, max_M; + u32 min_N, max_N; + u32 min_PL, max_PL; +}; + +struct clk_gk20a { + struct gk20a *g; + struct clk *tegra_clk; + struct pll gpc_pll; + u32 pll_delay; /* default PLL settle time */ + struct mutex clk_mutex; + bool sw_ready; + bool clk_hw_on; +}; + +struct gpufreq_table_data { + unsigned int index; + unsigned int frequency; /* MHz */ +}; + +struct gpufreq_table_data *tegra_gpufreq_table_get(void); + +unsigned int tegra_gpufreq_table_size_get(void); + +int gk20a_init_clk_support(struct gk20a *g); + +unsigned long gk20a_clk_get_rate(struct gk20a *g); +int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate); +int gk20a_suspend_clk_support(struct gk20a *g); +struct clk *gk20a_clk_get(struct gk20a *g); +long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate); + +extern struct pll_parms gpc_pll_params; + +#define KHZ 1000 +#define MHZ 1000000 + +static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate) +{ + /* convert the MHz gpc2clk frequency to Hz gpcpll frequency */ + return (rate * MHZ) / 2; +} +static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate) +{ + /* convert the Hz gpcpll frequency to MHz gpc2clk frequency */ + return (rate * 2) / MHZ; +} + +#endif /* _NVHOST_CLK_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c new file mode 100644 index 00000000..9128959f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -0,0 +1,240 @@ +/* + * GK20A Ctrl + * + * Copyright (c) 2011-2014, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include "gk20a.h" + +int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) +{ + int err; + struct gk20a *g; + + gk20a_dbg_fn(""); + + g = container_of(inode->i_cdev, + struct gk20a, ctrl.cdev); + + filp->private_data = g->dev; + + err = gk20a_get_client(g); + if (err) { + gk20a_dbg_fn("fail to get channel!"); + return err; + } + + return 0; +} + +int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) +{ + struct platform_device *dev = filp->private_data; + + gk20a_dbg_fn(""); + + gk20a_put_client(get_gk20a(dev)); + return 0; +} + +static long +gk20a_ctrl_ioctl_gpu_characteristics( + struct gk20a *g, + struct nvhost_gpu_get_characteristics *request) +{ + struct nvhost_gpu_characteristics *pgpu = &g->gpu_characteristics; + long err = 0; + + if (request->gpu_characteristics_buf_size > 0) { + size_t write_size = sizeof(*pgpu); + + if (write_size > request->gpu_characteristics_buf_size) + write_size = request->gpu_characteristics_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + request->gpu_characteristics_buf_addr, + pgpu, write_size); + } + + if (err == 0) + request->gpu_characteristics_buf_size = sizeof(*pgpu); + + return err; +} + +long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct platform_device *dev = filp->private_data; + struct gk20a *g = get_gk20a(dev); + struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args; + struct nvhost_gpu_zcull_get_info_args *get_info_args; + struct nvhost_gpu_zbc_set_table_args *set_table_args; + struct nvhost_gpu_zbc_query_table_args *query_table_args; + u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE]; + struct gr_zcull_info *zcull_info; + struct zbc_entry *zbc_val; + struct zbc_query_params *zbc_tbl; + int i, err = 0; + + gk20a_dbg_fn(""); + + if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST)) + return -EFAULT; + + BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE); + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + if (!g->gr.sw_ready) { + err = gk20a_busy(g->dev); + if (err) + return err; + + gk20a_idle(g->dev); + } + + switch (cmd) { + case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE: + get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf; + + get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); + + break; + case NVHOST_GPU_IOCTL_ZCULL_GET_INFO: + get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf; + + memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args)); + + zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL); + if (zcull_info == NULL) + return -ENOMEM; + + err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); + if (err) { + kfree(zcull_info); + break; + } + + get_info_args->width_align_pixels = zcull_info->width_align_pixels; + get_info_args->height_align_pixels = zcull_info->height_align_pixels; + get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; + get_info_args->aliquot_total = zcull_info->aliquot_total; + get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; + get_info_args->region_header_size = zcull_info->region_header_size; + get_info_args->subregion_header_size = zcull_info->subregion_header_size; + get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; + get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; + get_info_args->subregion_count = zcull_info->subregion_count; + + kfree(zcull_info); + break; + case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: + set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; + + zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); + if (zbc_val == NULL) + return -ENOMEM; + + zbc_val->format = set_table_args->format; + zbc_val->type = set_table_args->type; + + switch (zbc_val->type) { + case GK20A_ZBC_TYPE_COLOR: + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val->color_ds[i] = set_table_args->color_ds[i]; + zbc_val->color_l2[i] = set_table_args->color_l2[i]; + } + break; + case GK20A_ZBC_TYPE_DEPTH: + zbc_val->depth = set_table_args->depth; + break; + default: + err = -EINVAL; + } + + if (!err) { + gk20a_busy(dev); + err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val); + gk20a_idle(dev); + } + + if (zbc_val) + kfree(zbc_val); + break; + case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE: + query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf; + + zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL); + if (zbc_tbl == NULL) + return -ENOMEM; + + zbc_tbl->type = query_table_args->type; + zbc_tbl->index_size = query_table_args->index_size; + + err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl); + + if (!err) { + switch (zbc_tbl->type) { + case GK20A_ZBC_TYPE_COLOR: + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; + query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; + } + break; + case GK20A_ZBC_TYPE_DEPTH: + query_table_args->depth = zbc_tbl->depth; + break; + case GK20A_ZBC_TYPE_INVALID: + query_table_args->index_size = zbc_tbl->index_size; + break; + default: + err = -EINVAL; + } + if (!err) { + query_table_args->format = zbc_tbl->format; + query_table_args->ref_cnt = zbc_tbl->ref_cnt; + } + } + + if (zbc_tbl) + kfree(zbc_tbl); + break; + + case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS: + err = gk20a_ctrl_ioctl_gpu_characteristics( + g, (struct nvhost_gpu_get_characteristics *)buf); + break; + + default: + gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + return err; +} + diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h new file mode 100644 index 00000000..ac9c253e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h @@ -0,0 +1,28 @@ +/* + * drivers/video/tegra/host/gk20a/gk20a_ctrl.h + * + * GK20A Ctrl + * + * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef _NVHOST_GK20A_CTRL_H_ +#define _NVHOST_GK20A_CTRL_H_ + +int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); +int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); +long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + +#endif /* _NVHOST_GK20A_CTRL_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 00000000..da7d733e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -0,0 +1,699 @@ +/* + * Tegra GK20A GPU Debugger/Profiler Driver + * + * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "gr_gk20a.h" +#include "dbg_gpu_gk20a.h" +#include "regops_gk20a.h" +#include "hw_therm_gk20a.h" + +struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { + .exec_reg_ops = exec_regops_gk20a, +}; + +/* silly allocator - just increment session id */ +static atomic_t session_id = ATOMIC_INIT(0); +static int generate_session_id(void) +{ + return atomic_add_return(1, &session_id); +} + +static int alloc_session(struct dbg_session_gk20a **_dbg_s) +{ + struct dbg_session_gk20a *dbg_s; + *_dbg_s = NULL; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL); + if (!dbg_s) + return -ENOMEM; + + dbg_s->id = generate_session_id(); + dbg_s->ops = &dbg_gpu_session_ops_gk20a; + *_dbg_s = dbg_s; + return 0; +} + +int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler) +{ + struct dbg_session_gk20a *dbg_session; + struct gk20a *g; + + struct platform_device *pdev; + struct device *dev; + + int err; + + if (!is_profiler) + g = container_of(inode->i_cdev, + struct gk20a, dbg.cdev); + else + g = container_of(inode->i_cdev, + struct gk20a, prof.cdev); + pdev = g->dev; + dev = &pdev->dev; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev)); + + err = alloc_session(&dbg_session); + if (err) + return err; + + filp->private_data = dbg_session; + dbg_session->pdev = pdev; + dbg_session->dev = dev; + dbg_session->g = g; + dbg_session->is_profiler = is_profiler; + dbg_session->is_pg_disabled = false; + + INIT_LIST_HEAD(&dbg_session->dbg_s_list_node); + init_waitqueue_head(&dbg_session->dbg_events.wait_queue); + dbg_session->dbg_events.events_enabled = false; + dbg_session->dbg_events.num_pending_events = 0; + + return 0; +} + +/* used in scenarios where the debugger session can take just the inter-session + * lock for performance, but the profiler session must take the per-gpu lock + * since it might not have an associated channel. */ +static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s) +{ + if (dbg_s->is_profiler) + mutex_lock(&dbg_s->g->dbg_sessions_lock); + else + mutex_lock(&dbg_s->ch->dbg_s_lock); +} + +static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s) +{ + if (dbg_s->is_profiler) + mutex_unlock(&dbg_s->g->dbg_sessions_lock); + else + mutex_unlock(&dbg_s->ch->dbg_s_lock); +} + +static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + gk20a_dbg_session_mutex_lock(dbg_s); + + dbg_s->dbg_events.events_enabled = true; + dbg_s->dbg_events.num_pending_events = 0; + + gk20a_dbg_session_mutex_unlock(dbg_s); +} + +static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + gk20a_dbg_session_mutex_lock(dbg_s); + + dbg_s->dbg_events.events_enabled = false; + dbg_s->dbg_events.num_pending_events = 0; + + gk20a_dbg_session_mutex_unlock(dbg_s); +} + +static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + gk20a_dbg_session_mutex_lock(dbg_s); + + if (dbg_s->dbg_events.events_enabled && + dbg_s->dbg_events.num_pending_events > 0) + dbg_s->dbg_events.num_pending_events--; + + gk20a_dbg_session_mutex_unlock(dbg_s); +} + +static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_events_ctrl_args *args) +{ + int ret = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); + + if (!dbg_s->ch) { + gk20a_err(dev_from_gk20a(dbg_s->g), + "no channel bound to dbg session\n"); + return -EINVAL; + } + + switch (args->cmd) { + case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: + gk20a_dbg_gpu_events_enable(dbg_s); + break; + + case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: + gk20a_dbg_gpu_events_disable(dbg_s); + break; + + case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: + gk20a_dbg_gpu_events_clear(dbg_s); + break; + + default: + gk20a_err(dev_from_gk20a(dbg_s->g), + "unrecognized dbg gpu events ctrl cmd: 0x%x", + args->cmd); + ret = -EINVAL; + break; + } + + return ret; +} + +unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) +{ + unsigned int mask = 0; + struct dbg_session_gk20a *dbg_s = filep->private_data; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait); + + gk20a_dbg_session_mutex_lock(dbg_s); + + if (dbg_s->dbg_events.events_enabled && + dbg_s->dbg_events.num_pending_events > 0) { + gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d", + dbg_s->id); + gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", + dbg_s->dbg_events.num_pending_events); + mask = (POLLPRI | POLLIN); + } + + gk20a_dbg_session_mutex_unlock(dbg_s); + + return mask; +} + +int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); +} + +int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); +} + +void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch) +{ + struct dbg_session_gk20a *dbg_s; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + /* guard against the session list being modified */ + mutex_lock(&ch->dbg_s_lock); + + list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { + if (dbg_s->dbg_events.events_enabled) { + gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d", + dbg_s->id); + gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", + dbg_s->dbg_events.num_pending_events); + + dbg_s->dbg_events.num_pending_events++; + + wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue); + } + } + + mutex_unlock(&ch->dbg_s_lock); +} + + +static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, + __u32 powermode); + +static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s) +{ + struct channel_gk20a *ch_gk20a = dbg_s->ch; + struct gk20a *g = dbg_s->g; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + /* wasn't bound to start with ? */ + if (!ch_gk20a) { + gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?"); + return -ENODEV; + } + + mutex_lock(&g->dbg_sessions_lock); + mutex_lock(&ch_gk20a->dbg_s_lock); + + --g->dbg_sessions; + + /* Powergate enable is called here as possibility of dbg_session + * which called powergate disable ioctl, to be killed without calling + * powergate enable ioctl + */ + dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE); + + dbg_s->ch = NULL; + fput(dbg_s->ch_f); + dbg_s->ch_f = NULL; + + list_del_init(&dbg_s->dbg_s_list_node); + + mutex_unlock(&ch_gk20a->dbg_s_lock); + mutex_unlock(&g->dbg_sessions_lock); + + return 0; +} + +int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) +{ + struct dbg_session_gk20a *dbg_s = filp->private_data; + + gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev)); + + /* unbind if it was bound */ + if (!dbg_s->ch) + return 0; + dbg_unbind_channel_gk20a(dbg_s); + + kfree(dbg_s); + return 0; +} + +static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_bind_channel_args *args) +{ + struct file *f; + struct gk20a *g; + struct channel_gk20a *ch; + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", + dev_name(dbg_s->dev), args->channel_fd); + + if (args->channel_fd == ~0) + return dbg_unbind_channel_gk20a(dbg_s); + + /* even though get_file_channel is doing this it releases it as well */ + /* by holding it here we'll keep it from disappearing while the + * debugger is in session */ + f = fget(args->channel_fd); + if (!f) + return -ENODEV; + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) { + gk20a_dbg_fn("no channel found for fd"); + fput(f); + return -EINVAL; + } + + g = dbg_s->g; + gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid); + + mutex_lock(&g->dbg_sessions_lock); + mutex_lock(&ch->dbg_s_lock); + + dbg_s->ch_f = f; + dbg_s->ch = ch; + list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list); + + g->dbg_sessions++; + + mutex_unlock(&ch->dbg_s_lock); + mutex_unlock(&g->dbg_sessions_lock); + return 0; +} + +static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_exec_reg_ops_args *args); + +static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_powergate_args *args); + +static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args); + +long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct dbg_session_gk20a *dbg_s = filp->private_data; + struct gk20a *g = get_gk20a(dbg_s->pdev); + u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST)) + return -EFAULT; + + BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE); + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + if (!g->gr.sw_ready) { + err = gk20a_busy(g->dev); + if (err) + return err; + + gk20a_idle(g->dev); + } + + switch (cmd) { + case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL: + err = dbg_bind_channel_gk20a(dbg_s, + (struct nvhost_dbg_gpu_bind_channel_args *)buf); + gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); + break; + + case NVHOST_DBG_GPU_IOCTL_REG_OPS: + err = nvhost_ioctl_channel_reg_ops(dbg_s, + (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf); + gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); + break; + + case NVHOST_DBG_GPU_IOCTL_POWERGATE: + err = nvhost_ioctl_powergate_gk20a(dbg_s, + (struct nvhost_dbg_gpu_powergate_args *)buf); + gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); + break; + + case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL: + err = gk20a_dbg_gpu_events_ctrl(dbg_s, + (struct nvhost_dbg_gpu_events_ctrl_args *)buf); + break; + + case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: + err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, + (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf); + break; + + default: + gk20a_err(dev_from_gk20a(g), + "unrecognized dbg gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, + buf, _IOC_SIZE(cmd)); + + return err; +} + +/* In order to perform a context relative op the context has + * to be created already... which would imply that the + * context switch mechanism has already been put in place. + * So by the time we perform such an opertation it should always + * be possible to query for the appropriate context offsets, etc. + * + * But note: while the dbg_gpu bind requires the a channel fd, + * it doesn't require an allocated gr/compute obj at that point... + */ +static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, + struct gr_gk20a *gr) +{ + int err; + + mutex_lock(&gr->ctx_mutex); + err = !gr->ctx_vars.golden_image_initialized; + mutex_unlock(&gr->ctx_mutex); + if (err) + return false; + return true; + +} + +static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_exec_reg_ops_args *args) +{ + int err; + struct device *dev = dbg_s->dev; + struct gk20a *g = get_gk20a(dbg_s->pdev); + struct nvhost_dbg_gpu_reg_op *ops; + u64 ops_size = sizeof(ops[0]) * args->num_ops; + + gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); + + if (!dbg_s->ops) { + gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); + return -EINVAL; + } + + if (!dbg_s->is_profiler && !dbg_s->ch) { + gk20a_err(dev, "bind a channel before regops for a debugging session"); + return -EINVAL; + } + + /* be sure that ctx info is in place */ + if (!gr_context_info_available(dbg_s, &g->gr)) { + gk20a_err(dev, "gr context data not available\n"); + return -ENODEV; + } + + ops = kzalloc(ops_size, GFP_KERNEL); + if (!ops) { + gk20a_err(dev, "Allocating memory failed!"); + return -ENOMEM; + } + + gk20a_dbg_fn("Copying regops from userspace"); + + if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) { + dev_err(dev, "copy_from_user failed!"); + err = -EFAULT; + goto clean_up; + } + + /* since exec_reg_ops sends methods to the ucode, it must take the + * global gpu lock to protect against mixing methods from debug sessions + * on other channels */ + mutex_lock(&g->dbg_sessions_lock); + + err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops); + + mutex_unlock(&g->dbg_sessions_lock); + + if (err) { + gk20a_err(dev, "dbg regops failed"); + goto clean_up; + } + + gk20a_dbg_fn("Copying result to userspace"); + + if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) { + dev_err(dev, "copy_to_user failed!"); + err = -EFAULT; + goto clean_up; + } + return 0; + clean_up: + kfree(ops); + return err; +} + +static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, + __u32 powermode) +{ + int err = 0; + struct gk20a *g = get_gk20a(dbg_s->pdev); + + /* This function must be called with g->dbg_sessions_lock held */ + + gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d", + dev_name(dbg_s->dev), powermode); + + switch (powermode) { + case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE: + /* save off current powergate, clk state. + * set gpu module's can_powergate = 0. + * set gpu module's clk to max. + * while *a* debug session is active there will be no power or + * clocking state changes allowed from mainline code (but they + * should be saved). + */ + /* Allow powergate disable if the current dbg_session doesn't + * call a powergate disable ioctl and the global + * powergating_disabled_refcount is zero + */ + + if ((dbg_s->is_pg_disabled == false) && + (g->dbg_powergating_disabled_refcount++ == 0)) { + + gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy"); + gk20a_busy(g->dev); + gk20a_channel_busy(dbg_s->pdev); + + g->ops.clock_gating.slcg_gr_load_gating_prod(g, + false); + g->ops.clock_gating.slcg_perf_load_gating_prod(g, + false); + gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A); + + g->elcg_enabled = false; + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + + gk20a_pmu_disable_elpg(g); + } + + dbg_s->is_pg_disabled = true; + break; + + case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE: + /* restore (can) powergate, clk state */ + /* release pending exceptions to fault/be handled as usual */ + /*TBD: ordering of these? */ + + /* Re-enabling powergate as no other sessions want + * powergate disabled and the current dbg-sessions had + * requested the powergate disable through ioctl + */ + if (dbg_s->is_pg_disabled && + --g->dbg_powergating_disabled_refcount == 0) { + + g->elcg_enabled = true; + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); + gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A); + + g->ops.clock_gating.slcg_gr_load_gating_prod(g, + g->slcg_enabled); + g->ops.clock_gating.slcg_perf_load_gating_prod(g, + g->slcg_enabled); + + gk20a_pmu_enable_elpg(g); + + gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); + gk20a_channel_idle(dbg_s->pdev); + gk20a_idle(g->dev); + } + + dbg_s->is_pg_disabled = false; + break; + + default: + gk20a_err(dev_from_gk20a(g), + "unrecognized dbg gpu powergate mode: 0x%x", + powermode); + err = -ENOTTY; + break; + } + + return err; +} + +static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_powergate_args *args) +{ + int err; + struct gk20a *g = get_gk20a(dbg_s->pdev); + gk20a_dbg_fn("%s powergate mode = %d", + dev_name(dbg_s->dev), args->mode); + + mutex_lock(&g->dbg_sessions_lock); + err = dbg_set_powergate(dbg_s, args->mode); + mutex_unlock(&g->dbg_sessions_lock); + return err; +} + +static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args) +{ + int err; + struct gk20a *g = get_gk20a(dbg_s->pdev); + struct channel_gk20a *ch_gk20a; + + gk20a_dbg_fn("%s smpc ctxsw mode = %d", + dev_name(dbg_s->dev), args->mode); + + /* Take the global lock, since we'll be doing global regops */ + mutex_lock(&g->dbg_sessions_lock); + + ch_gk20a = dbg_s->ch; + + if (!ch_gk20a) { + gk20a_err(dev_from_gk20a(dbg_s->g), + "no bound channel for smpc ctxsw mode update\n"); + err = -EINVAL; + goto clean_up; + } + + err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, + args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); + if (err) { + gk20a_err(dev_from_gk20a(dbg_s->g), + "error (%d) during smpc ctxsw mode update\n", err); + goto clean_up; + } + /* The following regops are a hack/war to make up for the fact that we + * just scribbled into the ctxsw image w/o really knowing whether + * it was already swapped out in/out once or not, etc. + */ + { + struct nvhost_dbg_gpu_reg_op ops[4]; + int i; + for (i = 0; i < ARRAY_SIZE(ops); i++) { + ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32; + ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX; + ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS; + ops[i].value_hi = 0; + ops[i].and_n_mask_lo = 0; + ops[i].and_n_mask_hi = 0; + } + /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/ + ops[0].offset = 0x00419e08; + ops[0].value_lo = 0x1d; + + /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */ + ops[1].offset = 0x00419e58; + ops[1].value_lo = 0x1; + + /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */ + ops[2].offset = 0x00419e68; + ops[2].value_lo = 0xaaaa; + + /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */ + ops[3].offset = 0x00419f40; + ops[3].value_lo = 0x18; + + err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops)); + } + + clean_up: + mutex_unlock(&g->dbg_sessions_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h new file mode 100644 index 00000000..49827608 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h @@ -0,0 +1,83 @@ +/* + * Tegra GK20A GPU Debugger Driver + * + * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __DBG_GPU_GK20A_H_ +#define __DBG_GPU_GK20A_H_ +#include + +/* module debug driver interface */ +int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); +int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); +long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); + +/* used by profiler driver interface */ +int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); + +/* used by the interrupt handler to post events */ +void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch); + +struct dbg_gpu_session_ops { + int (*exec_reg_ops)(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *ops, + u64 num_ops); +}; + +struct dbg_gpu_session_events { + wait_queue_head_t wait_queue; + bool events_enabled; + int num_pending_events; +}; + +struct dbg_session_gk20a { + /* dbg session id used for trace/prints */ + int id; + + /* profiler session, if any */ + bool is_profiler; + + /* power enabled or disabled */ + bool is_pg_disabled; + + /* + * There can be different versions of the whitelists + * between both global and per-context sets; as well + * as between debugger and profiler interfaces. + */ + struct regops_whitelist *global; + struct regops_whitelist *per_context; + + /* gpu module vagaries */ + struct device *dev; + struct platform_device *pdev; + struct gk20a *g; + + /* bound channel, if any */ + struct file *ch_f; + struct channel_gk20a *ch; + + /* session operations */ + struct dbg_gpu_session_ops *ops; + + /* event support */ + struct dbg_gpu_session_events dbg_events; + struct list_head dbg_s_list_node; +}; + +extern struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a; + +#endif /* __DBG_GPU_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c new file mode 100644 index 00000000..c5b6953c --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -0,0 +1,295 @@ +/* + * drivers/video/tegra/host/t20/debug_gk20a.c + * + * Copyright (C) 2011-2014 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +#include + +#include "gk20a.h" +#include "debug_gk20a.h" + +#include "hw_ram_gk20a.h" +#include "hw_fifo_gk20a.h" +#include "hw_ccsr_gk20a.h" +#include "hw_pbdma_gk20a.h" + +unsigned int gk20a_debug_trace_cmdbuf; +struct platform_device *gk20a_device; + +struct gk20a_debug_output { + void (*fn)(void *ctx, const char *str, size_t len); + void *ctx; + char buf[256]; +}; + +static const char * const ccsr_chan_status_str[] = { + "idle", + "pending", + "pending_ctx_reload", + "pending_acquire", + "pending_acq_ctx_reload", + "on_pbdma", + "on_pbdma_and_eng", + "on_eng", + "on_eng_pending_acquire", + "on_eng_pending", + "on_pbdma_ctx_reload", + "on_pbdma_and_eng_ctx_reload", + "on_eng_ctx_reload", + "on_eng_pending_ctx_reload", + "on_eng_pending_acq_ctx_reload", +}; + +static const char * const chan_status_str[] = { + "invalid", + "valid", + "chsw_load", + "chsw_save", + "chsw_switch", +}; + +static const char * const ctx_status_str[] = { + "invalid", + "valid", + NULL, + NULL, + NULL, + "ctxsw_load", + "ctxsw_save", + "ctxsw_switch", +}; + +static inline void gk20a_debug_write_printk(void *ctx, const char *str, + size_t len) +{ + pr_info("%s", str); +} + +static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, + size_t len) +{ + seq_write((struct seq_file *)ctx, str, len); +} + +void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + o->fn(o->ctx, o->buf, len); +} + +static void gk20a_debug_show_channel(struct gk20a *g, + struct gk20a_debug_output *o, + struct channel_gk20a *ch) +{ + u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid)); + u32 status = ccsr_channel_status_v(channel); + u32 syncpointa, syncpointb; + void *inst_ptr; + + inst_ptr = ch->inst_block.cpuva; + if (!inst_ptr) + return; + + syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); + syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); + + gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid, + ch->g->dev->name, + ch->pid); + gk20a_debug_output(o, "%s in use %s %s\n", + ccsr_channel_enable_v(channel) ? "" : "not", + ccsr_chan_status_str[status], + ccsr_channel_busy_v(channel) ? "busy" : "not busy"); + gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " + "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" + "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", + (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + + ((u64)gk20a_mem_rd32(inst_ptr, + ram_fc_pb_top_level_get_hi_w()) << 32ULL), + (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), + (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), + (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), + gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), + gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()), + syncpointa, + syncpointb, + gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), + gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), + gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), + gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); + + if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) + && (pbdma_syncpointb_wait_switch_v(syncpointb) == + pbdma_syncpointb_wait_switch_en_v())) + gk20a_debug_output(o, "Waiting on syncpt %u (%s) val %u\n", + pbdma_syncpointb_syncpt_index_v(syncpointb), + nvhost_syncpt_get_name( + to_platform_device(g->dev->dev.parent), + pbdma_syncpointb_syncpt_index_v(syncpointb)), + pbdma_syncpointa_payload_v(syncpointa)); + + gk20a_debug_output(o, "\n"); +} + +void gk20a_debug_show_dump(struct platform_device *pdev, + struct gk20a_debug_output *o) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a *g = platform->g; + struct fifo_gk20a *f = &g->fifo; + u32 chid; + int i; + + gk20a_busy(g->dev); + for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) { + u32 status = gk20a_readl(g, fifo_pbdma_status_r(i)); + u32 chan_status = fifo_pbdma_status_chan_status_v(status); + + gk20a_debug_output(o, "%s pbdma %d: ", g->dev->name, i); + gk20a_debug_output(o, + "id: %d (%s), next_id: %d (%s) status: %s\n", + fifo_pbdma_status_id_v(status), + fifo_pbdma_status_id_type_v(status) ? + "tsg" : "channel", + fifo_pbdma_status_next_id_v(status), + fifo_pbdma_status_next_id_type_v(status) ? + "tsg" : "channel", + chan_status_str[chan_status]); + gk20a_debug_output(o, "PUT: %016llx GET: %016llx " + "FETCH: %08x HEADER: %08x\n", + (u64)gk20a_readl(g, pbdma_put_r(i)) + + ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL), + (u64)gk20a_readl(g, pbdma_get_r(i)) + + ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL), + gk20a_readl(g, pbdma_gp_fetch_r(i)), + gk20a_readl(g, pbdma_pb_header_r(i))); + } + gk20a_debug_output(o, "\n"); + + for (i = 0; i < fifo_engine_status__size_1_v(); i++) { + u32 status = gk20a_readl(g, fifo_engine_status_r(i)); + u32 ctx_status = fifo_engine_status_ctx_status_v(status); + + gk20a_debug_output(o, "%s eng %d: ", g->dev->name, i); + gk20a_debug_output(o, + "id: %d (%s), next_id: %d (%s), ctx: %s ", + fifo_engine_status_id_v(status), + fifo_engine_status_id_type_v(status) ? + "tsg" : "channel", + fifo_engine_status_next_id_v(status), + fifo_engine_status_next_id_type_v(status) ? + "tsg" : "channel", + ctx_status_str[ctx_status]); + + if (fifo_engine_status_faulted_v(status)) + gk20a_debug_output(o, "faulted "); + if (fifo_engine_status_engine_v(status)) + gk20a_debug_output(o, "busy "); + gk20a_debug_output(o, "\n"); + } + gk20a_debug_output(o, "\n"); + + for (chid = 0; chid < f->num_channels; chid++) { + if (f->channel[chid].in_use) { + struct channel_gk20a *gpu_ch = &f->channel[chid]; + gk20a_debug_show_channel(g, o, gpu_ch); + } + } + gk20a_idle(g->dev); +} + +void gk20a_debug_dump(struct platform_device *pdev) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_printk + }; + + if (platform->dump_platform_dependencies) + platform->dump_platform_dependencies(pdev); + + gk20a_debug_show_dump(pdev, &o); +} + +void gk20a_debug_dump_device(struct platform_device *pdev) +{ + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_printk + }; + + /* Dump the first device if no info is provided */ + if (!pdev && gk20a_device) + pdev = gk20a_device; + + gk20a_debug_show_dump(pdev, &o); +} +EXPORT_SYMBOL(gk20a_debug_dump_device); + +static int gk20a_debug_show(struct seq_file *s, void *unused) +{ + struct platform_device *pdev = s->private; + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_to_seqfile, + .ctx = s, + }; + gk20a_debug_show_dump(pdev, &o); + return 0; +} + +static int gk20a_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_debug_show, inode->i_private); +} + +static const struct file_operations gk20a_debug_fops = { + .open = gk20a_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void gk20a_debug_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + + /* Store the first device */ + if (!gk20a_device) + gk20a_device = pdev; + + platform->debugfs = debugfs_create_dir(pdev->name, NULL); + + debugfs_create_file("status", S_IRUGO, platform->debugfs, + pdev, &gk20a_debug_fops); + debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, + &gk20a_debug_trace_cmdbuf); + +#if defined(GK20A_DEBUG) + debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, + &gk20a_dbg_mask); + debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR, platform->debugfs, + &gk20a_dbg_ftrace); +#endif +} diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h new file mode 100644 index 00000000..cd2e09c3 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h @@ -0,0 +1,25 @@ +/* + * GK20A Debug functionality + * + * Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _DEBUG_GK20A_H_ +#define _DEBUG_GK20A_H_ + +extern unsigned int gk20a_debug_trace_cmdbuf; + +void gk20a_debug_dump(struct platform_device *pdev); +void gk20a_debug_init(struct platform_device *pdev); + +#endif diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c new file mode 100644 index 00000000..52f2db4d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -0,0 +1,37 @@ +/* + * GK20A memory interface + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include "gk20a.h" +#include "kind_gk20a.h" +#include "hw_mc_gk20a.h" + +static void fb_gk20a_reset(struct gk20a *g) +{ + gk20a_dbg_info("reset gk20a fb"); + + gk20a_reset(g, mc_enable_pfb_enabled_f() + | mc_enable_l2_enabled_f() + | mc_enable_xbar_enabled_f() + | mc_enable_hub_enabled_f()); +} + +void gk20a_init_fb(struct gpu_ops *gops) +{ + gops->fb.reset = fb_gk20a_reset; + gk20a_init_uncompressed_kind_map(); + gk20a_init_kind_attr(); +} diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h new file mode 100644 index 00000000..34c21c9b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h @@ -0,0 +1,21 @@ +/* + * GK20A FB + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVHOST_GK20A_FB +#define _NVHOST_GK20A_FB +struct gk20a; + +void gk20a_init_fb(struct gpu_ops *gops); +#endif diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c new file mode 100644 index 00000000..5575b995 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -0,0 +1,1836 @@ +/* + * drivers/video/tegra/host/gk20a/fifo_gk20a.c + * + * GK20A Graphics FIFO (gr host) + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "debug_gk20a.h" +#include "hw_fifo_gk20a.h" +#include "hw_pbdma_gk20a.h" +#include "hw_ccsr_gk20a.h" +#include "hw_ram_gk20a.h" +#include "hw_proj_gk20a.h" +#include "hw_top_gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_gr_gk20a.h" + +static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, + bool wait_for_finish); +static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work); + +/* + * Link engine IDs to MMU IDs and vice versa. + */ + +static inline u32 gk20a_engine_id_to_mmu_id(u32 engine_id) +{ + switch (engine_id) { + case ENGINE_GR_GK20A: + return 0x00; + case ENGINE_CE2_GK20A: + return 0x1b; + default: + return ~0; + } +} + +static inline u32 gk20a_mmu_id_to_engine_id(u32 engine_id) +{ + switch (engine_id) { + case 0x00: + return ENGINE_GR_GK20A; + case 0x1b: + return ENGINE_CE2_GK20A; + default: + return ~0; + } +} + + +static int init_engine_info(struct fifo_gk20a *f) +{ + struct gk20a *g = f->g; + struct device *d = dev_from_gk20a(g); + struct fifo_engine_info_gk20a *gr_info; + const u32 gr_sw_id = ENGINE_GR_GK20A; + u32 i; + u32 max_info_entries = top_device_info__size_1_v(); + + gk20a_dbg_fn(""); + + /* all we really care about finding is the graphics entry */ + /* especially early on in sim it probably thinks it has more */ + f->num_engines = 1; + + gr_info = f->engine_info + gr_sw_id; + + gr_info->sw_id = gr_sw_id; + gr_info->name = "gr"; + gr_info->dev_info_id = top_device_info_type_enum_graphics_v(); + gr_info->mmu_fault_id = fifo_intr_mmu_fault_eng_id_graphics_v(); + gr_info->runlist_id = ~0; + gr_info->pbdma_id = ~0; + gr_info->engine_id = ~0; + + for (i = 0; i < max_info_entries; i++) { + u32 table_entry = gk20a_readl(f->g, top_device_info_r(i)); + u32 entry = top_device_info_entry_v(table_entry); + u32 engine_enum = top_device_info_type_enum_v(table_entry); + u32 table_entry2 = 0; + + if (entry == top_device_info_entry_not_valid_v()) + continue; + + if (top_device_info_chain_v(table_entry) == + top_device_info_chain_enable_v()) { + + table_entry2 = gk20a_readl(f->g, + top_device_info_r(++i)); + + engine_enum = top_device_info_type_enum_v(table_entry2); + } + + /* we only care about GR engine here */ + if (entry == top_device_info_entry_enum_v() && + engine_enum == gr_info->dev_info_id) { + int pbdma_id; + u32 runlist_bit; + + gr_info->runlist_id = + top_device_info_runlist_enum_v(table_entry); + gk20a_dbg_info("gr info: runlist_id %d", gr_info->runlist_id); + + gr_info->engine_id = + top_device_info_engine_enum_v(table_entry); + gk20a_dbg_info("gr info: engine_id %d", gr_info->engine_id); + + runlist_bit = 1 << gr_info->runlist_id; + + for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) { + gk20a_dbg_info("gr info: pbdma_map[%d]=%d", + pbdma_id, f->pbdma_map[pbdma_id]); + if (f->pbdma_map[pbdma_id] & runlist_bit) + break; + } + + if (pbdma_id == f->num_pbdma) { + gk20a_err(d, "busted pbmda map"); + return -EINVAL; + } + gr_info->pbdma_id = pbdma_id; + + break; + } + } + + if (gr_info->runlist_id == ~0) { + gk20a_err(d, "busted device info"); + return -EINVAL; + } + + return 0; +} + +void gk20a_remove_fifo_support(struct fifo_gk20a *f) +{ + struct gk20a *g = f->g; + struct device *d = dev_from_gk20a(g); + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + u32 runlist_id; + u32 i; + + gk20a_dbg_fn(""); + + if (f->channel) { + int c; + for (c = 0; c < f->num_channels; c++) { + if (f->channel[c].remove_support) + f->channel[c].remove_support(f->channel+c); + } + kfree(f->channel); + } + if (f->userd.gpu_va) + gk20a_gmmu_unmap(&g->mm.bar1.vm, + f->userd.gpu_va, + f->userd.size, + gk20a_mem_flag_none); + + if (f->userd.sgt) + gk20a_free_sgtable(&f->userd.sgt); + + if (f->userd.cpuva) + dma_free_coherent(d, + f->userd_total_size, + f->userd.cpuva, + f->userd.iova); + f->userd.cpuva = NULL; + f->userd.iova = 0; + + engine_info = f->engine_info + ENGINE_GR_GK20A; + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + if (runlist->mem[i].cpuva) + dma_free_coherent(d, + runlist->mem[i].size, + runlist->mem[i].cpuva, + runlist->mem[i].iova); + runlist->mem[i].cpuva = NULL; + runlist->mem[i].iova = 0; + } + + kfree(runlist->active_channels); + + kfree(f->runlist_info); + kfree(f->pbdma_map); + kfree(f->engine_info); +} + +/* reads info from hardware and fills in pbmda exception info record */ +static inline void get_exception_pbdma_info( + struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info) +{ + struct fifo_pbdma_exception_info_gk20a *e = + &eng_info->pbdma_exception_info; + + u32 pbdma_status_r = e->status_r = gk20a_readl(g, + fifo_pbdma_status_r(eng_info->pbdma_id)); + e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */ + e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) == + fifo_pbdma_status_id_type_chid_v(); + e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r); + e->next_id_is_chid = + fifo_pbdma_status_next_id_type_v(pbdma_status_r) == + fifo_pbdma_status_next_id_type_chid_v(); + e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r); + e->chsw_in_progress = + fifo_pbdma_status_chsw_v(pbdma_status_r) == + fifo_pbdma_status_chsw_in_progress_v(); +} + +static void fifo_pbdma_exception_status(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info) +{ + struct fifo_pbdma_exception_info_gk20a *e; + get_exception_pbdma_info(g, eng_info); + e = &eng_info->pbdma_exception_info; + + gk20a_dbg_fn("pbdma_id %d, " + "id_type %s, id %d, chan_status %d, " + "next_id_type %s, next_id %d, " + "chsw_in_progress %d", + eng_info->pbdma_id, + e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v, + e->next_id_is_chid ? "chid" : "tsgid", e->next_id, + e->chsw_in_progress); +} + +/* reads info from hardware and fills in pbmda exception info record */ +static inline void get_exception_engine_info( + struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info) +{ + struct fifo_engine_exception_info_gk20a *e = + &eng_info->engine_exception_info; + u32 engine_status_r = e->status_r = + gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); + e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */ + e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) == + fifo_engine_status_id_type_chid_v(); + e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r); + e->faulted = + fifo_engine_status_faulted_v(engine_status_r) == + fifo_engine_status_faulted_true_v(); + e->idle = + fifo_engine_status_engine_v(engine_status_r) == + fifo_engine_status_engine_idle_v(); + e->ctxsw_in_progress = + fifo_engine_status_ctxsw_v(engine_status_r) == + fifo_engine_status_ctxsw_in_progress_v(); +} + +static void fifo_engine_exception_status(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info) +{ + struct fifo_engine_exception_info_gk20a *e; + get_exception_engine_info(g, eng_info); + e = &eng_info->engine_exception_info; + + gk20a_dbg_fn("engine_id %d, id_type %s, id %d, ctx_status %d, " + "faulted %d, idle %d, ctxsw_in_progress %d, ", + eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid", + e->id, e->ctx_status_v, + e->faulted, e->idle, e->ctxsw_in_progress); +} + +static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) +{ + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + struct device *d = dev_from_gk20a(g); + u32 runlist_id; + u32 i; + u64 runlist_size; + + gk20a_dbg_fn(""); + + f->max_runlists = fifo_eng_runlist_base__size_1_v(); + f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists, GFP_KERNEL); + if (!f->runlist_info) + goto clean_up; + + engine_info = f->engine_info + ENGINE_GR_GK20A; + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + runlist->active_channels = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_channels) + goto clean_up_runlist_info; + + runlist_size = ram_rl_entry_size_v() * f->num_channels; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + dma_addr_t iova; + + runlist->mem[i].cpuva = + dma_alloc_coherent(d, + runlist_size, + &iova, + GFP_KERNEL); + if (!runlist->mem[i].cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up_runlist; + } + runlist->mem[i].iova = iova; + runlist->mem[i].size = runlist_size; + } + mutex_init(&runlist->mutex); + init_waitqueue_head(&runlist->runlist_wq); + + /* None of buffers is pinned if this value doesn't change. + Otherwise, one of them (cur_buffer) must have been pinned. */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + + gk20a_dbg_fn("done"); + return 0; + +clean_up_runlist: + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + if (runlist->mem[i].cpuva) + dma_free_coherent(d, + runlist->mem[i].size, + runlist->mem[i].cpuva, + runlist->mem[i].iova); + runlist->mem[i].cpuva = NULL; + runlist->mem[i].iova = 0; + } + + kfree(runlist->active_channels); + runlist->active_channels = NULL; + +clean_up_runlist_info: + kfree(f->runlist_info); + f->runlist_info = NULL; + +clean_up: + gk20a_dbg_fn("fail"); + return -ENOMEM; +} + +#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000 + +int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) +{ + u32 intr_stall; + u32 mask; + u32 timeout; + int i; + + gk20a_dbg_fn(""); + /* enable pmc pfifo */ + gk20a_reset(g, mc_enable_pfifo_enabled_f() + | mc_enable_ce2_enabled_f()); + + /* enable pbdma */ + mask = 0; + for (i = 0; i < proj_host_num_pbdma_v(); ++i) + mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i); + gk20a_writel(g, mc_enable_pb_r(), mask); + + /* enable pfifo interrupt */ + gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF); + gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF); + gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000); + + /* enable pbdma interrupt */ + mask = 0; + for (i = 0; i < proj_host_num_pbdma_v(); i++) { + intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i)); + intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f(); + gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall); + gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF); + gk20a_writel(g, pbdma_intr_en_0_r(i), + (~0) & ~pbdma_intr_en_0_lbreq_enabled_f()); + gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF); + gk20a_writel(g, pbdma_intr_en_1_r(i), 0xFFFFFFFF); + } + + /* TBD: apply overrides */ + + /* TBD: BLCG prod */ + + /* reset runlist interrupts */ + gk20a_writel(g, fifo_intr_runlist_r(), ~0); + + /* TBD: do we need those? */ + timeout = gk20a_readl(g, fifo_fb_timeout_r()); + timeout = set_field(timeout, fifo_fb_timeout_period_m(), + fifo_fb_timeout_period_max_f()); + gk20a_writel(g, fifo_fb_timeout_r(), timeout); + + if (tegra_platform_is_silicon()) { + timeout = gk20a_readl(g, fifo_pb_timeout_r()); + timeout &= ~fifo_pb_timeout_detection_enabled_f(); + gk20a_writel(g, fifo_pb_timeout_r(), timeout); + } + + timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US | + fifo_eng_timeout_detection_enabled_f(); + gk20a_writel(g, fifo_eng_timeout_r(), timeout); + + gk20a_dbg_fn("done"); + + return 0; +} + +static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f) +{ + /* These are all errors which indicate something really wrong + * going on in the device. */ + f->intr.pbdma.device_fatal_0 = + pbdma_intr_0_memreq_pending_f() | + pbdma_intr_0_memack_timeout_pending_f() | + pbdma_intr_0_memack_extra_pending_f() | + pbdma_intr_0_memdat_timeout_pending_f() | + pbdma_intr_0_memdat_extra_pending_f() | + pbdma_intr_0_memflush_pending_f() | + pbdma_intr_0_memop_pending_f() | + pbdma_intr_0_lbconnect_pending_f() | + pbdma_intr_0_lbreq_pending_f() | + pbdma_intr_0_lback_timeout_pending_f() | + pbdma_intr_0_lback_extra_pending_f() | + pbdma_intr_0_lbdat_timeout_pending_f() | + pbdma_intr_0_lbdat_extra_pending_f() | + pbdma_intr_0_xbarconnect_pending_f() | + pbdma_intr_0_pri_pending_f(); + + /* These are data parsing, framing errors or others which can be + * recovered from with intervention... or just resetting the + * channel. */ + f->intr.pbdma.channel_fatal_0 = + pbdma_intr_0_gpfifo_pending_f() | + pbdma_intr_0_gpptr_pending_f() | + pbdma_intr_0_gpentry_pending_f() | + pbdma_intr_0_gpcrc_pending_f() | + pbdma_intr_0_pbptr_pending_f() | + pbdma_intr_0_pbentry_pending_f() | + pbdma_intr_0_pbcrc_pending_f() | + pbdma_intr_0_method_pending_f() | + pbdma_intr_0_methodcrc_pending_f() | + pbdma_intr_0_pbseg_pending_f() | + pbdma_intr_0_signature_pending_f(); + + /* Can be used for sw-methods, or represents + * a recoverable timeout. */ + f->intr.pbdma.restartable_0 = + pbdma_intr_0_device_pending_f() | + pbdma_intr_0_acquire_pending_f(); +} + +static int gk20a_init_fifo_setup_sw(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct device *d = dev_from_gk20a(g); + int chid, i, err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (f->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + f->g = g; + + INIT_WORK(&f->fault_restore_thread, + gk20a_fifo_handle_mmu_fault_thread); + mutex_init(&f->intr.isr.mutex); + gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ + + f->num_channels = ccsr_channel__size_1_v(); + f->num_pbdma = proj_host_num_pbdma_v(); + f->max_engines = ENGINE_INVAL_GK20A; + + f->userd_entry_size = 1 << ram_userd_base_shift_v(); + f->userd_total_size = f->userd_entry_size * f->num_channels; + + f->userd.cpuva = dma_alloc_coherent(d, + f->userd_total_size, + &iova, + GFP_KERNEL); + if (!f->userd.cpuva) { + dev_err(d, "memory allocation failed\n"); + goto clean_up; + } + + f->userd.iova = iova; + err = gk20a_get_sgtable(d, &f->userd.sgt, + f->userd.cpuva, f->userd.iova, + f->userd_total_size); + if (err) { + dev_err(d, "failed to create sg table\n"); + goto clean_up; + } + + /* bar1 va */ + f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm, + &f->userd.sgt, + f->userd_total_size, + 0, /* flags */ + gk20a_mem_flag_none); + if (!f->userd.gpu_va) { + dev_err(d, "gmmu mapping failed\n"); + goto clean_up; + } + + gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); + + f->userd.size = f->userd_total_size; + + f->channel = kzalloc(f->num_channels * sizeof(*f->channel), + GFP_KERNEL); + f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map), + GFP_KERNEL); + f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), + GFP_KERNEL); + + if (!(f->channel && f->pbdma_map && f->engine_info)) { + err = -ENOMEM; + goto clean_up; + } + + /* pbdma map needs to be in place before calling engine info init */ + for (i = 0; i < f->num_pbdma; ++i) + f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i)); + + init_engine_info(f); + + init_runlist(g, f); + + for (chid = 0; chid < f->num_channels; chid++) { + f->channel[chid].userd_cpu_va = + f->userd.cpuva + chid * f->userd_entry_size; + f->channel[chid].userd_iova = + NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova) + + chid * f->userd_entry_size; + f->channel[chid].userd_gpu_va = + f->userd.gpu_va + chid * f->userd_entry_size; + + gk20a_init_channel_support(g, chid); + } + mutex_init(&f->ch_inuse_mutex); + + f->remove_support = gk20a_remove_fifo_support; + + f->deferred_reset_pending = false; + mutex_init(&f->deferred_reset_mutex); + + f->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_dbg_fn("fail"); + if (f->userd.gpu_va) + gk20a_gmmu_unmap(&g->mm.bar1.vm, + f->userd.gpu_va, + f->userd.size, + gk20a_mem_flag_none); + if (f->userd.sgt) + gk20a_free_sgtable(&f->userd.sgt); + if (f->userd.cpuva) + dma_free_coherent(d, + f->userd_total_size, + f->userd.cpuva, + f->userd.iova); + f->userd.cpuva = NULL; + f->userd.iova = 0; + + memset(&f->userd, 0, sizeof(struct userd_desc)); + + kfree(f->channel); + f->channel = NULL; + kfree(f->pbdma_map); + f->pbdma_map = NULL; + kfree(f->engine_info); + f->engine_info = NULL; + + return err; +} + +static void gk20a_fifo_handle_runlist_event(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_runlist_info_gk20a *runlist; + unsigned long runlist_event; + u32 runlist_id; + + runlist_event = gk20a_readl(g, fifo_intr_runlist_r()); + gk20a_writel(g, fifo_intr_runlist_r(), runlist_event); + + for_each_set_bit(runlist_id, &runlist_event, f->max_runlists) { + runlist = &f->runlist_info[runlist_id]; + wake_up(&runlist->runlist_wq); + } + +} + +static int gk20a_init_fifo_setup_hw(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + + gk20a_dbg_fn(""); + + /* test write, read through bar1 @ userd region before + * turning on the snooping */ + { + struct fifo_gk20a *f = &g->fifo; + u32 v, v1 = 0x33, v2 = 0x55; + + u32 bar1_vaddr = f->userd.gpu_va; + volatile u32 *cpu_vaddr = f->userd.cpuva; + + gk20a_dbg_info("test bar1 @ vaddr 0x%x", + bar1_vaddr); + + v = gk20a_bar1_readl(g, bar1_vaddr); + + *cpu_vaddr = v1; + smp_mb(); + + if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + gk20a_bar1_writel(g, bar1_vaddr, v2); + + if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) { + gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!"); + return -EINVAL; + } + + /* is it visible to the cpu? */ + if (*cpu_vaddr != v2) { + gk20a_err(dev_from_gk20a(g), + "cpu didn't see bar1 write @ %p!", + cpu_vaddr); + } + + /* put it back */ + gk20a_bar1_writel(g, bar1_vaddr, v); + } + + /*XXX all manner of flushes and caching worries, etc */ + + /* set the base for the userd region now */ + gk20a_writel(g, fifo_bar1_base_r(), + fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) | + fifo_bar1_base_valid_true_f()); + + gk20a_dbg_fn("done"); + + return 0; +} + +int gk20a_init_fifo_support(struct gk20a *g) +{ + u32 err; + + err = gk20a_init_fifo_setup_sw(g); + if (err) + return err; + + err = gk20a_init_fifo_setup_hw(g); + if (err) + return err; + + return err; +} + +static struct channel_gk20a * +channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) +{ + int ci; + if (unlikely(!f->channel)) + return NULL; + for (ci = 0; ci < f->num_channels; ci++) { + struct channel_gk20a *c = f->channel+ci; + if (c->inst_block.cpuva && + (inst_ptr == c->inst_block.cpu_pa)) + return f->channel+ci; + } + return NULL; +} + +/* fault info/descriptions. + * tbd: move to setup + * */ +static const char * const fault_type_descs[] = { + "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */ + "pde size", + "pte", + "va limit viol", + "unbound inst", + "priv viol", + "ro viol", + "wo viol", + "pitch mask", + "work creation", + "bad aperture", + "compression failure", + "bad kind", + "region viol", + "dual ptes", + "poisoned", +}; +/* engine descriptions */ +static const char * const engine_subid_descs[] = { + "gpc", + "hub", +}; + +static const char * const hub_client_descs[] = { + "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu", + "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld", + "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc", + "scc nb", "sec", "ssync", "gr copy", "ce2", "xv", "mmu nb", + "msenc", "d falcon", "sked", "a falcon", "n/a", +}; + +static const char * const gpc_client_descs[] = { + "l1 0", "t1 0", "pe 0", + "l1 1", "t1 1", "pe 1", + "l1 2", "t1 2", "pe 2", + "l1 3", "t1 3", "pe 3", + "rast", "gcc", "gpccs", + "prop 0", "prop 1", "prop 2", "prop 3", + "l1 4", "t1 4", "pe 4", + "l1 5", "t1 5", "pe 5", + "l1 6", "t1 6", "pe 6", + "l1 7", "t1 7", "pe 7", + "gpm", + "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3", + "rgg utlb", +}; + +/* reads info from hardware and fills in mmu fault info record */ +static inline void get_exception_mmu_fault_info( + struct gk20a *g, u32 engine_id, + struct fifo_mmu_fault_info_gk20a *f) +{ + u32 fault_info_v; + + gk20a_dbg_fn("engine_id %d", engine_id); + + memset(f, 0, sizeof(*f)); + + f->fault_info_v = fault_info_v = gk20a_readl(g, + fifo_intr_mmu_fault_info_r(engine_id)); + f->fault_type_v = + fifo_intr_mmu_fault_info_type_v(fault_info_v); + f->engine_subid_v = + fifo_intr_mmu_fault_info_engine_subid_v(fault_info_v); + f->client_v = fifo_intr_mmu_fault_info_client_v(fault_info_v); + + BUG_ON(f->fault_type_v >= ARRAY_SIZE(fault_type_descs)); + f->fault_type_desc = fault_type_descs[f->fault_type_v]; + + BUG_ON(f->engine_subid_v >= ARRAY_SIZE(engine_subid_descs)); + f->engine_subid_desc = engine_subid_descs[f->engine_subid_v]; + + if (f->engine_subid_v == + fifo_intr_mmu_fault_info_engine_subid_hub_v()) { + + BUG_ON(f->client_v >= ARRAY_SIZE(hub_client_descs)); + f->client_desc = hub_client_descs[f->client_v]; + } else if (f->engine_subid_v == + fifo_intr_mmu_fault_info_engine_subid_gpc_v()) { + BUG_ON(f->client_v >= ARRAY_SIZE(gpc_client_descs)); + f->client_desc = gpc_client_descs[f->client_v]; + } else { + BUG_ON(1); + } + + f->fault_hi_v = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(engine_id)); + f->fault_lo_v = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(engine_id)); + /* note:ignoring aperture on gk20a... */ + f->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v( + gk20a_readl(g, fifo_intr_mmu_fault_inst_r(engine_id))); + /* note: inst_ptr is a 40b phys addr. */ + f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v(); +} + +static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) +{ + gk20a_dbg_fn(""); + + if (engine_id == top_device_info_type_enum_graphics_v()) { + /* resetting engine using mc_enable_r() is not enough, + * we do full init sequence */ + gk20a_gr_reset(g); + } + if (engine_id == top_device_info_type_enum_copy0_v()) + gk20a_reset(g, mc_enable_ce2_m()); +} + +static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work) +{ + struct fifo_gk20a *f = container_of(work, struct fifo_gk20a, + fault_restore_thread); + struct gk20a *g = f->g; + int i; + + /* Reinitialise FECS and GR */ + gk20a_init_pmu_setup_hw2(g); + + /* It is safe to enable ELPG again. */ + gk20a_pmu_enable_elpg(g); + + /* Restore the runlist */ + for (i = 0; i < g->fifo.max_runlists; i++) + gk20a_fifo_update_runlist_locked(g, i, ~0, true, true); + + /* unlock all runlists */ + for (i = 0; i < g->fifo.max_runlists; i++) + mutex_unlock(&g->fifo.runlist_info[i].mutex); + +} + +static void gk20a_fifo_handle_chsw_fault(struct gk20a *g) +{ + u32 intr; + + intr = gk20a_readl(g, fifo_intr_chsw_error_r()); + gk20a_err(dev_from_gk20a(g), "chsw: %08x\n", intr); + gk20a_fecs_dump_falcon_stats(g); + gk20a_writel(g, fifo_intr_chsw_error_r(), intr); +} + +static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); + gk20a_err(dev, "dropped mmu fault (0x%08x)", fault_id); +} + +static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, + struct fifo_mmu_fault_info_gk20a *f, bool fake_fault) +{ + /* channel recovery is only deferred if an sm debugger + is attached and has MMU debug mode is enabled */ + if (!gk20a_gr_sm_debugger_attached(g) || + !gk20a_mm_mmu_debug_mode_enabled(g)) + return false; + + /* if this fault is fake (due to RC recovery), don't defer recovery */ + if (fake_fault) + return false; + + if (engine_id != ENGINE_GR_GK20A || + f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v()) + return false; + + return true; +} + +void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, + unsigned long fault_id) { + u32 engine_mmu_id; + int i; + + /* reset engines */ + for_each_set_bit(engine_mmu_id, &fault_id, 32) { + u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); + if (engine_id != ~0) + gk20a_fifo_reset_engine(g, engine_id); + } + + /* CLEAR the runlists. Do not wait for runlist to start as + * some engines may not be available right now */ + for (i = 0; i < g->fifo.max_runlists; i++) + gk20a_fifo_update_runlist_locked(g, i, ~0, false, false); + + /* clear interrupt */ + gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); + + /* resume scheduler */ + gk20a_writel(g, fifo_error_sched_disable_r(), + gk20a_readl(g, fifo_error_sched_disable_r())); + + /* Spawn a work to enable PMU and restore runlists */ + schedule_work(&g->fifo.fault_restore_thread); +} + +static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, + struct channel_gk20a *ch) { + bool verbose = true; + if (!ch) + return verbose; + + gk20a_err(dev_from_gk20a(g), + "channel %d generated a mmu fault", + ch->hw_chid); + if (ch->error_notifier) { + u32 err = ch->error_notifier->info32; + if (ch->error_notifier->status == 0xffff) { + /* If error code is already set, this mmu fault + * was triggered as part of recovery from other + * error condition. + * Don't overwrite error flag. */ + /* Fifo timeout debug spew is controlled by user */ + if (err == NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) + verbose = ch->timeout_debug_dump; + } else { + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); + } + } + /* mark channel as faulted */ + ch->has_timedout = true; + wmb(); + /* unblock pending waits */ + wake_up(&ch->semaphore_wq); + wake_up(&ch->notifier_wq); + wake_up(&ch->submit_wq); + return verbose; +} + + +static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) +{ + bool fake_fault; + unsigned long fault_id; + unsigned long engine_mmu_id; + int i; + bool verbose = true; + gk20a_dbg_fn(""); + + g->fifo.deferred_reset_pending = false; + + /* Disable ELPG */ + gk20a_pmu_disable_elpg(g); + + /* If we have recovery in progress, MMU fault id is invalid */ + if (g->fifo.mmu_fault_engines) { + fault_id = g->fifo.mmu_fault_engines; + g->fifo.mmu_fault_engines = 0; + fake_fault = true; + } else { + fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); + fake_fault = false; + gk20a_debug_dump(g->dev); + } + + /* lock all runlists. Note that locks are are released in + * gk20a_fifo_handle_mmu_fault_thread() */ + for (i = 0; i < g->fifo.max_runlists; i++) + mutex_lock(&g->fifo.runlist_info[i].mutex); + + /* go through all faulted engines */ + for_each_set_bit(engine_mmu_id, &fault_id, 32) { + /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to + * engines. Convert engine_mmu_id to engine_id */ + u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id); + struct fifo_runlist_info_gk20a *runlist = g->fifo.runlist_info; + struct fifo_mmu_fault_info_gk20a f; + struct channel_gk20a *ch = NULL; + + get_exception_mmu_fault_info(g, engine_mmu_id, &f); + trace_gk20a_mmu_fault(f.fault_hi_v, + f.fault_lo_v, + f.fault_info_v, + f.inst_ptr, + engine_id, + f.engine_subid_desc, + f.client_desc, + f.fault_type_desc); + gk20a_err(dev_from_gk20a(g), "mmu fault on engine %d, " + "engine subid %d (%s), client %d (%s), " + "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x," + "inst_ptr 0x%llx\n", + engine_id, + f.engine_subid_v, f.engine_subid_desc, + f.client_v, f.client_desc, + f.fault_hi_v, f.fault_lo_v, + f.fault_type_v, f.fault_type_desc, + f.fault_info_v, f.inst_ptr); + + /* get the channel */ + if (fake_fault) { + /* read and parse engine status */ + u32 status = gk20a_readl(g, + fifo_engine_status_r(engine_id)); + u32 ctx_status = + fifo_engine_status_ctx_status_v(status); + bool type_ch = fifo_pbdma_status_id_type_v(status) == + fifo_pbdma_status_id_type_chid_v(); + + /* use next_id if context load is failing */ + u32 id = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_v(status) : + fifo_engine_status_id_v(status); + + if (type_ch) { + ch = g->fifo.channel + id; + } else { + gk20a_err(dev_from_gk20a(g), "non-chid type not supported"); + WARN_ON(1); + } + } else { + /* read channel based on instruction pointer */ + ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); + } + + if (ch) { + if (ch->in_use) { + /* disable the channel from hw and increment + * syncpoints */ + gk20a_disable_channel_no_update(ch); + + /* remove the channel from runlist */ + clear_bit(ch->hw_chid, + runlist->active_channels); + } + + /* check if engine reset should be deferred */ + if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) { + g->fifo.mmu_fault_engines = fault_id; + + /* handled during channel free */ + g->fifo.deferred_reset_pending = true; + } else + verbose = gk20a_fifo_set_ctx_mmu_error(g, ch); + + } else if (f.inst_ptr == + g->mm.bar1.inst_block.cpu_pa) { + gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); + } else if (f.inst_ptr == + g->mm.pmu.inst_block.cpu_pa) { + gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); + } else + gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); + } + + if (g->fifo.deferred_reset_pending) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "sm debugger attached," + " deferring channel recovery to channel free"); + /* clear interrupt */ + gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id); + return verbose; + } + + /* resetting the engines and clearing the runlists is done in + a separate function to allow deferred reset. */ + fifo_gk20a_finish_mmu_fault_handling(g, fault_id); + return verbose; +} + +static void gk20a_fifo_get_faulty_channel(struct gk20a *g, int engine_id, + u32 *chid, bool *type_ch) +{ + u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); + u32 ctx_status = fifo_engine_status_ctx_status_v(status); + + *type_ch = fifo_pbdma_status_id_type_v(status) == + fifo_pbdma_status_id_type_chid_v(); + /* use next_id if context load is failing */ + *chid = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_v(status) : + fifo_engine_status_id_v(status); +} + +void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, + bool verbose) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + unsigned long delay = GR_IDLE_CHECK_DEFAULT; + unsigned long engine_id, i; + unsigned long _engine_ids = __engine_ids; + unsigned long engine_ids = 0; + int ret; + + if (verbose) + gk20a_debug_dump(g->dev); + + /* store faulted engines in advance */ + g->fifo.mmu_fault_engines = 0; + for_each_set_bit(engine_id, &_engine_ids, 32) { + bool ref_type_ch; + int ref_chid; + gk20a_fifo_get_faulty_channel(g, engine_id, &ref_chid, + &ref_type_ch); + + /* Reset *all* engines that use the + * same channel as faulty engine */ + for (i = 0; i < g->fifo.max_engines; i++) { + bool type_ch; + u32 chid; + gk20a_fifo_get_faulty_channel(g, i, &chid, &type_ch); + if (ref_type_ch == type_ch && ref_chid == chid) { + engine_ids |= BIT(i); + g->fifo.mmu_fault_engines |= + BIT(gk20a_engine_id_to_mmu_id(i)); + } + } + + } + + /* trigger faults for all bad engines */ + for_each_set_bit(engine_id, &engine_ids, 32) { + if (engine_id > g->fifo.max_engines) { + WARN_ON(true); + break; + } + + gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), + fifo_trigger_mmu_fault_id_f( + gk20a_engine_id_to_mmu_id(engine_id)) | + fifo_trigger_mmu_fault_enable_f(1)); + } + + /* Wait for MMU fault to trigger */ + ret = -EBUSY; + do { + if (gk20a_readl(g, fifo_intr_0_r()) & + fifo_intr_0_mmu_fault_pending_f()) { + ret = 0; + break; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } while (time_before(jiffies, end_jiffies) || + !tegra_platform_is_silicon()); + + if (ret) + gk20a_err(dev_from_gk20a(g), "mmu fault timeout"); + + /* release mmu fault trigger */ + for_each_set_bit(engine_id, &engine_ids, 32) + gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0); +} + + +static bool gk20a_fifo_handle_sched_error(struct gk20a *g) +{ + u32 sched_error; + u32 engine_id; + int id = -1; + bool non_chid = false; + + /* read and reset the scheduler error register */ + sched_error = gk20a_readl(g, fifo_intr_sched_error_r()); + gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f()); + + for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) { + u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); + u32 ctx_status = fifo_engine_status_ctx_status_v(status); + bool failing_engine; + + /* we are interested in busy engines */ + failing_engine = fifo_engine_status_engine_v(status) == + fifo_engine_status_engine_busy_v(); + + /* ..that are doing context switch */ + failing_engine = failing_engine && + (ctx_status == + fifo_engine_status_ctx_status_ctxsw_switch_v() + || ctx_status == + fifo_engine_status_ctx_status_ctxsw_save_v() + || ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()); + + if (failing_engine) { + id = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_v(status) : + fifo_engine_status_id_v(status); + non_chid = fifo_pbdma_status_id_type_v(status) != + fifo_pbdma_status_id_type_chid_v(); + break; + } + } + + /* could not find the engine - should never happen */ + if (unlikely(engine_id >= g->fifo.max_engines)) + goto err; + + if (fifo_intr_sched_error_code_f(sched_error) == + fifo_intr_sched_error_code_ctxsw_timeout_v()) { + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[id]; + + if (non_chid) { + gk20a_fifo_recover(g, BIT(engine_id), true); + goto err; + } + + if (gk20a_channel_update_and_check_timeout(ch, + GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + gk20a_err(dev_from_gk20a(g), + "fifo sched ctxsw timeout error:" + "engine = %u, ch = %d", engine_id, id); + gk20a_fifo_recover(g, BIT(engine_id), + ch->timeout_debug_dump); + } else { + gk20a_warn(dev_from_gk20a(g), + "fifo is waiting for ctx switch for %d ms," + "ch = %d\n", + ch->timeout_accumulated_ms, + id); + } + return ch->timeout_debug_dump; + } +err: + gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d", + sched_error, engine_id, non_chid ? "non-ch" : "ch", id); + + return true; +} + +static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) +{ + bool print_channel_reset_log = false, reset_engine = false; + struct device *dev = dev_from_gk20a(g); + u32 handled = 0; + + gk20a_dbg_fn(""); + + if (fifo_intr & fifo_intr_0_pio_error_pending_f()) { + /* pio mode is unused. this shouldn't happen, ever. */ + /* should we clear it or just leave it pending? */ + gk20a_err(dev, "fifo pio error!\n"); + BUG_ON(1); + } + + if (fifo_intr & fifo_intr_0_bind_error_pending_f()) { + u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r()); + gk20a_err(dev, "fifo bind error: 0x%08x", bind_error); + print_channel_reset_log = true; + handled |= fifo_intr_0_bind_error_pending_f(); + } + + if (fifo_intr & fifo_intr_0_sched_error_pending_f()) { + print_channel_reset_log = gk20a_fifo_handle_sched_error(g); + handled |= fifo_intr_0_sched_error_pending_f(); + } + + if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) { + gk20a_fifo_handle_chsw_fault(g); + handled |= fifo_intr_0_chsw_error_pending_f(); + } + + if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { + print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); + reset_engine = true; + handled |= fifo_intr_0_mmu_fault_pending_f(); + } + + if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) { + gk20a_fifo_handle_dropped_mmu_fault(g); + handled |= fifo_intr_0_dropped_mmu_fault_pending_f(); + } + + print_channel_reset_log = !g->fifo.deferred_reset_pending + && print_channel_reset_log; + + if (print_channel_reset_log) { + int engine_id; + gk20a_err(dev_from_gk20a(g), + "channel reset initated from %s", __func__); + for (engine_id = 0; + engine_id < g->fifo.max_engines; + engine_id++) { + gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id, + g->fifo.engine_info[engine_id].engine_id); + fifo_pbdma_exception_status(g, + &g->fifo.engine_info[engine_id]); + fifo_engine_exception_status(g, + &g->fifo.engine_info[engine_id]); + } + } + + return handled; +} + + +static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, + struct gk20a *g, + struct fifo_gk20a *f, + u32 pbdma_id) +{ + u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id)); + u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id)); + u32 handled = 0; + bool reset_device = false; + bool reset_channel = false; + + gk20a_dbg_fn(""); + + gk20a_dbg(gpu_dbg_intr, "pbdma id intr pending %d %08x %08x", pbdma_id, + pbdma_intr_0, pbdma_intr_1); + if (pbdma_intr_0) { + if (f->intr.pbdma.device_fatal_0 & pbdma_intr_0) { + dev_err(dev, "unrecoverable device error: " + "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0); + reset_device = true; + /* TODO: disable pbdma intrs */ + handled |= f->intr.pbdma.device_fatal_0 & pbdma_intr_0; + } + if (f->intr.pbdma.channel_fatal_0 & pbdma_intr_0) { + dev_warn(dev, "channel error: " + "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0); + reset_channel = true; + /* TODO: clear pbdma channel errors */ + handled |= f->intr.pbdma.channel_fatal_0 & pbdma_intr_0; + } + if (f->intr.pbdma.restartable_0 & pbdma_intr_0) { + dev_warn(dev, "sw method: %08x %08x", + gk20a_readl(g, pbdma_method0_r(0)), + gk20a_readl(g, pbdma_method0_r(0)+4)); + gk20a_writel(g, pbdma_method0_r(0), 0); + gk20a_writel(g, pbdma_method0_r(0)+4, 0); + handled |= f->intr.pbdma.restartable_0 & pbdma_intr_0; + } + + gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0); + } + + /* all intrs in _intr_1 are "host copy engine" related, + * which gk20a doesn't have. for now just make them channel fatal. */ + if (pbdma_intr_1) { + dev_err(dev, "channel hce error: pbdma_intr_1(%d): 0x%08x", + pbdma_id, pbdma_intr_1); + reset_channel = true; + gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1); + } + + + + return handled; +} + +static u32 fifo_channel_isr(struct gk20a *g, u32 fifo_intr) +{ + gk20a_channel_semaphore_wakeup(g); + return fifo_intr_0_channel_intr_pending_f(); +} + + +static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr) +{ + struct device *dev = dev_from_gk20a(g); + struct fifo_gk20a *f = &g->fifo; + u32 clear_intr = 0, i; + u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r()); + + for (i = 0; i < fifo_intr_pbdma_id_status__size_1_v(); i++) { + if (fifo_intr_pbdma_id_status_f(pbdma_pending, i)) { + gk20a_dbg(gpu_dbg_intr, "pbdma id %d intr pending", i); + clear_intr |= + gk20a_fifo_handle_pbdma_intr(dev, g, f, i); + } + } + return fifo_intr_0_pbdma_intr_pending_f(); +} + +void gk20a_fifo_isr(struct gk20a *g) +{ + u32 error_intr_mask = + fifo_intr_0_bind_error_pending_f() | + fifo_intr_0_sched_error_pending_f() | + fifo_intr_0_chsw_error_pending_f() | + fifo_intr_0_fb_flush_timeout_pending_f() | + fifo_intr_0_dropped_mmu_fault_pending_f() | + fifo_intr_0_mmu_fault_pending_f() | + fifo_intr_0_lb_error_pending_f() | + fifo_intr_0_pio_error_pending_f(); + + u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r()); + u32 clear_intr = 0; + + /* note we're not actually in an "isr", but rather + * in a threaded interrupt context... */ + mutex_lock(&g->fifo.intr.isr.mutex); + + gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr); + + /* handle runlist update */ + if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) { + gk20a_fifo_handle_runlist_event(g); + clear_intr |= fifo_intr_0_runlist_event_pending_f(); + } + if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f()) + clear_intr |= fifo_pbdma_isr(g, fifo_intr); + + if (unlikely(fifo_intr & error_intr_mask)) + clear_intr = fifo_error_isr(g, fifo_intr); + + gk20a_writel(g, fifo_intr_0_r(), clear_intr); + + mutex_unlock(&g->fifo.intr.isr.mutex); + + return; +} + +void gk20a_fifo_nonstall_isr(struct gk20a *g) +{ + u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r()); + u32 clear_intr = 0; + + gk20a_dbg(gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr); + + if (fifo_intr & fifo_intr_0_channel_intr_pending_f()) + clear_intr |= fifo_channel_isr(g, fifo_intr); + + gk20a_writel(g, fifo_intr_0_r(), clear_intr); + + return; +} + +int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid) +{ + struct fifo_gk20a *f = &g->fifo; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 delay = GR_IDLE_CHECK_DEFAULT; + u32 ret = 0; + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + u32 elpg_off = 0; + u32 i; + + gk20a_dbg_fn("%d", hw_chid); + + /* we have no idea which runlist we are using. lock all */ + for (i = 0; i < g->fifo.max_runlists; i++) + mutex_lock(&f->runlist_info[i].mutex); + + /* disable elpg if failed to acquire pmu mutex */ + elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + if (elpg_off) + gk20a_pmu_disable_elpg(g); + + /* issue preempt */ + gk20a_writel(g, fifo_preempt_r(), + fifo_preempt_chid_f(hw_chid) | + fifo_preempt_type_channel_f()); + + /* wait for preempt */ + ret = -EBUSY; + do { + if (!(gk20a_readl(g, fifo_preempt_r()) & + fifo_preempt_pending_true_f())) { + ret = 0; + break; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } while (time_before(jiffies, end_jiffies) || + !tegra_platform_is_silicon()); + + if (ret) { + int i; + u32 engines = 0; + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[hw_chid]; + + gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n", + hw_chid); + + /* forcefully reset all busy engines using this channel */ + for (i = 0; i < g->fifo.max_engines; i++) { + u32 status = gk20a_readl(g, fifo_engine_status_r(i)); + u32 ctx_status = + fifo_engine_status_ctx_status_v(status); + bool type_ch = fifo_pbdma_status_id_type_v(status) == + fifo_pbdma_status_id_type_chid_v(); + bool busy = fifo_engine_status_engine_v(status) == + fifo_engine_status_engine_busy_v(); + u32 id = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_v(status) : + fifo_engine_status_id_v(status); + + if (type_ch && busy && id == hw_chid) + engines |= BIT(i); + } + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + gk20a_fifo_recover(g, engines, true); + } + + /* re-enable elpg or release pmu mutex */ + if (elpg_off) + gk20a_pmu_enable_elpg(g); + else + pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + + for (i = 0; i < g->fifo.max_runlists; i++) + mutex_unlock(&f->runlist_info[i].mutex); + + return ret; +} + +int gk20a_fifo_enable_engine_activity(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info) +{ + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + u32 elpg_off; + u32 enable; + + gk20a_dbg_fn(""); + + /* disable elpg if failed to acquire pmu mutex */ + elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + if (elpg_off) + gk20a_pmu_disable_elpg(g); + + enable = gk20a_readl(g, fifo_sched_disable_r()); + enable &= ~(fifo_sched_disable_true_v() >> eng_info->runlist_id); + gk20a_writel(g, fifo_sched_disable_r(), enable); + + /* re-enable elpg or release pmu mutex */ + if (elpg_off) + gk20a_pmu_enable_elpg(g); + else + pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + + gk20a_dbg_fn("done"); + return 0; +} + +int gk20a_fifo_disable_engine_activity(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info, + bool wait_for_idle) +{ + u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat; + u32 pbdma_chid = ~0, engine_chid = ~0, disable; + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + u32 elpg_off; + u32 err = 0; + + gk20a_dbg_fn(""); + + gr_stat = + gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); + if (fifo_engine_status_engine_v(gr_stat) == + fifo_engine_status_engine_busy_v() && !wait_for_idle) + return -EBUSY; + + /* disable elpg if failed to acquire pmu mutex */ + elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + if (elpg_off) + gk20a_pmu_disable_elpg(g); + + disable = gk20a_readl(g, fifo_sched_disable_r()); + disable = set_field(disable, + fifo_sched_disable_runlist_m(eng_info->runlist_id), + fifo_sched_disable_runlist_f(fifo_sched_disable_true_v(), + eng_info->runlist_id)); + gk20a_writel(g, fifo_sched_disable_r(), disable); + + /* chid from pbdma status */ + pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id)); + chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat); + if (chan_stat == fifo_pbdma_status_chan_status_valid_v() || + chan_stat == fifo_pbdma_status_chan_status_chsw_save_v()) + pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat); + else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() || + chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v()) + pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat); + + if (pbdma_chid != ~0) { + err = gk20a_fifo_preempt_channel(g, pbdma_chid); + if (err) + goto clean_up; + } + + /* chid from engine status */ + eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id)); + ctx_stat = fifo_engine_status_ctx_status_v(eng_stat); + if (ctx_stat == fifo_engine_status_ctx_status_valid_v() || + ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v()) + engine_chid = fifo_engine_status_id_v(eng_stat); + else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() || + ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v()) + engine_chid = fifo_engine_status_next_id_v(eng_stat); + + if (engine_chid != ~0 && engine_chid != pbdma_chid) { + err = gk20a_fifo_preempt_channel(g, engine_chid); + if (err) + goto clean_up; + } + +clean_up: + /* re-enable elpg or release pmu mutex */ + if (elpg_off) + gk20a_pmu_enable_elpg(g); + else + pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + + if (err) { + gk20a_dbg_fn("failed"); + if (gk20a_fifo_enable_engine_activity(g, eng_info)) + gk20a_err(dev_from_gk20a(g), + "failed to enable gr engine activity\n"); + } else { + gk20a_dbg_fn("done"); + } + return err; +} + +static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) +{ + struct fifo_gk20a *f = &g->fifo; + u32 engines = 0; + int i; + + for (i = 0; i < f->max_engines; i++) { + u32 status = gk20a_readl(g, fifo_engine_status_r(i)); + bool engine_busy = fifo_engine_status_engine_v(status) == + fifo_engine_status_engine_busy_v(); + + if (engine_busy && + (f->engine_info[i].runlist_id == runlist_id)) + engines |= BIT(i); + } + gk20a_fifo_recover(g, engines, true); +} + +static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) +{ + struct fifo_runlist_info_gk20a *runlist; + u32 remain; + bool pending; + + runlist = &g->fifo.runlist_info[runlist_id]; + remain = wait_event_timeout(runlist->runlist_wq, + ((pending = gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) & + fifo_eng_runlist_pending_true_f()) == 0), + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); + + if (remain == 0 && pending != 0) + return -ETIMEDOUT; + + return 0; +} + +static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, + u32 hw_chid, bool add, + bool wait_for_finish) +{ + u32 ret = 0; + struct device *d = dev_from_gk20a(g); + struct fifo_gk20a *f = &g->fifo; + struct fifo_runlist_info_gk20a *runlist = NULL; + u32 *runlist_entry_base = NULL; + u32 *runlist_entry = NULL; + phys_addr_t runlist_pa; + u32 old_buf, new_buf; + u32 chid; + u32 count = 0; + runlist = &f->runlist_info[runlist_id]; + + /* valid channel, add/remove it from active list. + Otherwise, keep active list untouched for suspend/resume. */ + if (hw_chid != ~0) { + if (add) { + if (test_and_set_bit(hw_chid, + runlist->active_channels) == 1) + return 0; + } else { + if (test_and_clear_bit(hw_chid, + runlist->active_channels) == 0) + return 0; + } + } + + old_buf = runlist->cur_buffer; + new_buf = !runlist->cur_buffer; + + gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", + runlist_id, runlist->mem[new_buf].iova); + + runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); + if (!runlist_pa) { + ret = -EINVAL; + goto clean_up; + } + + runlist_entry_base = runlist->mem[new_buf].cpuva; + if (!runlist_entry_base) { + ret = -ENOMEM; + goto clean_up; + } + + if (hw_chid != ~0 || /* add/remove a valid channel */ + add /* resume to add all channels back */) { + runlist_entry = runlist_entry_base; + for_each_set_bit(chid, + runlist->active_channels, f->num_channels) { + gk20a_dbg_info("add channel %d to runlist", chid); + runlist_entry[0] = chid; + runlist_entry[1] = 0; + runlist_entry += 2; + count++; + } + } else /* suspend to remove all channels */ + count = 0; + + if (count != 0) { + gk20a_writel(g, fifo_runlist_base_r(), + fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) | + fifo_runlist_base_target_vid_mem_f()); + } + + gk20a_writel(g, fifo_runlist_r(), + fifo_runlist_engine_f(runlist_id) | + fifo_eng_runlist_length_f(count)); + + if (wait_for_finish) { + ret = gk20a_fifo_runlist_wait_pending(g, runlist_id); + + if (ret == -ETIMEDOUT) { + gk20a_err(dev_from_gk20a(g), + "runlist update timeout"); + + gk20a_fifo_runlist_reset_engines(g, runlist_id); + + /* engine reset needs the lock. drop it */ + mutex_unlock(&runlist->mutex); + /* wait until the runlist is active again */ + ret = gk20a_fifo_runlist_wait_pending(g, runlist_id); + /* get the lock back. at this point everything should + * should be fine */ + mutex_lock(&runlist->mutex); + + if (ret) + gk20a_err(dev_from_gk20a(g), + "runlist update failed: %d", ret); + } else if (ret == -EINTR) + gk20a_err(dev_from_gk20a(g), + "runlist update interrupted"); + } + + runlist->cur_buffer = new_buf; + +clean_up: + return ret; +} + +/* add/remove a channel from runlist + special cases below: runlist->active_channels will NOT be changed. + (hw_chid == ~0 && !add) means remove all active channels from runlist. + (hw_chid == ~0 && add) means restore all active channels on runlist. */ +int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid, + bool add, bool wait_for_finish) +{ + struct fifo_runlist_info_gk20a *runlist = NULL; + struct fifo_gk20a *f = &g->fifo; + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + u32 elpg_off; + u32 ret = 0; + + runlist = &f->runlist_info[runlist_id]; + + mutex_lock(&runlist->mutex); + + /* disable elpg if failed to acquire pmu mutex */ + elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + if (elpg_off) + gk20a_pmu_disable_elpg(g); + + ret = gk20a_fifo_update_runlist_locked(g, runlist_id, hw_chid, add, + wait_for_finish); + + /* re-enable elpg or release pmu mutex */ + if (elpg_off) + gk20a_pmu_enable_elpg(g); + else + pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token); + + mutex_unlock(&runlist->mutex); + return ret; +} + +int gk20a_fifo_suspend(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + /* stop bar1 snooping */ + gk20a_writel(g, fifo_bar1_base_r(), + fifo_bar1_base_valid_false_f()); + + /* disable fifo intr */ + gk20a_writel(g, fifo_intr_en_0_r(), 0); + gk20a_writel(g, fifo_intr_en_1_r(), 0); + + gk20a_dbg_fn("done"); + return 0; +} + +bool gk20a_fifo_mmu_fault_pending(struct gk20a *g) +{ + if (gk20a_readl(g, fifo_intr_0_r()) & + fifo_intr_0_mmu_fault_pending_f()) + return true; + else + return false; +} diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h new file mode 100644 index 00000000..051acda2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -0,0 +1,164 @@ +/* + * drivers/video/tegra/host/gk20a/fifo_gk20a.h + * + * GK20A graphics fifo (gr host) + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __FIFO_GK20A_H__ +#define __FIFO_GK20A_H__ + +#include "channel_gk20a.h" + +#define MAX_RUNLIST_BUFFERS 2 + +/* generally corresponds to the "pbdma" engine */ + +struct fifo_runlist_info_gk20a { + unsigned long *active_channels; + /* Each engine has its own SW and HW runlist buffer.*/ + struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS]; + u32 cur_buffer; + u32 total_entries; + bool stopped; + bool support_tsg; + struct mutex mutex; /* protect channel preempt and runlist upate */ + wait_queue_head_t runlist_wq; +}; + +/* so far gk20a has two engines: gr and ce2(gr_copy) */ +enum { + ENGINE_GR_GK20A = 0, + ENGINE_CE2_GK20A = 1, + ENGINE_INVAL_GK20A +}; + +struct fifo_pbdma_exception_info_gk20a { + u32 status_r; /* raw register value from hardware */ + u32 id, next_id; + u32 chan_status_v; /* raw value from hardware */ + bool id_is_chid, next_id_is_chid; + bool chsw_in_progress; +}; + +struct fifo_engine_exception_info_gk20a { + u32 status_r; /* raw register value from hardware */ + u32 id, next_id; + u32 ctx_status_v; /* raw value from hardware */ + bool id_is_chid, next_id_is_chid; + bool faulted, idle, ctxsw_in_progress; +}; + +struct fifo_mmu_fault_info_gk20a { + u32 fault_info_v; + u32 fault_type_v; + u32 engine_subid_v; + u32 client_v; + u32 fault_hi_v; + u32 fault_lo_v; + u64 inst_ptr; + const char *fault_type_desc; + const char *engine_subid_desc; + const char *client_desc; +}; + +struct fifo_engine_info_gk20a { + u32 sw_id; + const char *name; + u32 dev_info_id; + u32 engine_id; + u32 runlist_id; + u32 pbdma_id; + u32 mmu_fault_id; + u32 rc_mask; + struct fifo_pbdma_exception_info_gk20a pbdma_exception_info; + struct fifo_engine_exception_info_gk20a engine_exception_info; + struct fifo_mmu_fault_info_gk20a mmu_fault_info; + +}; + +struct fifo_gk20a { + struct gk20a *g; + int num_channels; + + int num_pbdma; + u32 *pbdma_map; + + struct fifo_engine_info_gk20a *engine_info; + u32 max_engines; + u32 num_engines; + + struct fifo_runlist_info_gk20a *runlist_info; + u32 max_runlists; + + struct userd_desc userd; + u32 userd_entry_size; + u32 userd_total_size; + + struct channel_gk20a *channel; + struct mutex ch_inuse_mutex; /* protect unused chid look up */ + + void (*remove_support)(struct fifo_gk20a *); + bool sw_ready; + struct { + /* share info between isrs and non-isr code */ + struct { + struct mutex mutex; + } isr; + struct { + u32 device_fatal_0; + u32 channel_fatal_0; + u32 restartable_0; + } pbdma; + struct { + + } engine; + + + } intr; + + u32 mmu_fault_engines; + bool deferred_reset_pending; + struct mutex deferred_reset_mutex; + + struct work_struct fault_restore_thread; +}; + +int gk20a_init_fifo_support(struct gk20a *g); + +void gk20a_fifo_isr(struct gk20a *g); +void gk20a_fifo_nonstall_isr(struct gk20a *g); + +int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid); + +int gk20a_fifo_enable_engine_activity(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info); +int gk20a_fifo_disable_engine_activity(struct gk20a *g, + struct fifo_engine_info_gk20a *eng_info, + bool wait_for_idle); + +int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid, + bool add, bool wait_for_finish); + +int gk20a_fifo_suspend(struct gk20a *g); + +bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); +void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); +int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); + +void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, + unsigned long fault_id); +#endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c new file mode 100644 index 00000000..4cc500de --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -0,0 +1,1681 @@ +/* + * drivers/video/tegra/host/gk20a/gk20a.c + * + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define CREATE_TRACE_POINTS +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "gk20a.h" +#include "debug_gk20a.h" +#include "ctrl_gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_timer_gk20a.h" +#include "hw_bus_gk20a.h" +#include "hw_sim_gk20a.h" +#include "hw_top_gk20a.h" +#include "hw_ltc_gk20a.h" +#include "gk20a_scale.h" +#include "dbg_gpu_gk20a.h" +#include "hal.h" + +#ifdef CONFIG_ARM64 +#define __cpuc_flush_dcache_area __flush_dcache_area +#endif + +#define CLASS_NAME "nvidia-gpu" +/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ +#define INTERFACE_NAME "nvhost%s-gpu" + +#define GK20A_NUM_CDEVS 5 + +#if defined(GK20A_DEBUG) +u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; +u32 gk20a_dbg_ftrace; +#endif + +static int gk20a_pm_finalize_poweron(struct device *dev); +static int gk20a_pm_prepare_poweroff(struct device *dev); + +static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a) +{ + gk20a_get_platform(dev)->g = gk20a; +} + +static const struct file_operations gk20a_channel_ops = { + .owner = THIS_MODULE, + .release = gk20a_channel_release, + .open = gk20a_channel_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_channel_ioctl, +#endif + .unlocked_ioctl = gk20a_channel_ioctl, +}; + +static const struct file_operations gk20a_ctrl_ops = { + .owner = THIS_MODULE, + .release = gk20a_ctrl_dev_release, + .open = gk20a_ctrl_dev_open, + .unlocked_ioctl = gk20a_ctrl_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_ctrl_dev_ioctl, +#endif +}; + +static const struct file_operations gk20a_dbg_ops = { + .owner = THIS_MODULE, + .release = gk20a_dbg_gpu_dev_release, + .open = gk20a_dbg_gpu_dev_open, + .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, + .poll = gk20a_dbg_gpu_dev_poll, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, +#endif +}; + +static const struct file_operations gk20a_as_ops = { + .owner = THIS_MODULE, + .release = gk20a_as_dev_release, + .open = gk20a_as_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_as_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_as_dev_ioctl, +}; + +/* + * Note: We use a different 'open' to trigger handling of the profiler session. + * Most of the code is shared between them... Though, at some point if the + * code does get too tangled trying to handle each in the same path we can + * separate them cleanly. + */ +static const struct file_operations gk20a_prof_ops = { + .owner = THIS_MODULE, + .release = gk20a_dbg_gpu_dev_release, + .open = gk20a_prof_gpu_dev_open, + .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, + /* .mmap = gk20a_prof_gpu_dev_mmap,*/ + /*int (*mmap) (struct file *, struct vm_area_struct *);*/ + .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, +#endif +}; + +static inline void sim_writel(struct gk20a *g, u32 r, u32 v) +{ + writel(v, g->sim.regs+r); +} + +static inline u32 sim_readl(struct gk20a *g, u32 r) +{ + return readl(g->sim.regs+r); +} + +static void kunmap_and_free_iopage(void **kvaddr, struct page **page) +{ + if (*kvaddr) { + kunmap(*kvaddr); + *kvaddr = 0; + } + if (*page) { + __free_page(*page); + *page = 0; + } +} + +static void gk20a_free_sim_support(struct gk20a *g) +{ + /* free sim mappings, bfrs */ + kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr, + &g->sim.send_bfr.page); + + kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr, + &g->sim.recv_bfr.page); + + kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr, + &g->sim.msg_bfr.page); +} + +static void gk20a_remove_sim_support(struct sim_gk20a *s) +{ + struct gk20a *g = s->g; + if (g->sim.regs) + sim_writel(g, sim_config_r(), sim_config_mode_disabled_v()); + gk20a_free_sim_support(g); +} + +static int alloc_and_kmap_iopage(struct device *d, + void **kvaddr, + phys_addr_t *phys, + struct page **page) +{ + int err = 0; + *page = alloc_page(GFP_KERNEL); + + if (!*page) { + err = -ENOMEM; + dev_err(d, "couldn't allocate io page\n"); + goto fail; + } + + *kvaddr = kmap(*page); + if (!*kvaddr) { + err = -ENOMEM; + dev_err(d, "couldn't kmap io page\n"); + goto fail; + } + *phys = page_to_phys(*page); + return 0; + + fail: + kunmap_and_free_iopage(kvaddr, page); + return err; + +} + +static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i, + struct resource **out) +{ + struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); + if (!r) + return NULL; + if (out) + *out = r; + return devm_request_and_ioremap(&dev->dev, r); +} + +/* TBD: strip from released */ +static int gk20a_init_sim_support(struct platform_device *dev) +{ + int err = 0; + struct gk20a *g = get_gk20a(dev); + struct device *d = &dev->dev; + phys_addr_t phys; + + g->sim.g = g; + g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM, + &g->sim.reg_mem); + if (!g->sim.regs) { + dev_err(d, "failed to remap gk20a sim regs\n"); + err = -ENXIO; + goto fail; + } + + /* allocate sim event/msg buffers */ + err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr, + &g->sim.send_bfr.phys, + &g->sim.send_bfr.page); + + err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr, + &g->sim.recv_bfr.phys, + &g->sim.recv_bfr.page); + + err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr, + &g->sim.msg_bfr.phys, + &g->sim.msg_bfr.page); + + if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr && + g->sim.msg_bfr.kvaddr)) { + dev_err(d, "couldn't allocate all sim buffers\n"); + goto fail; + } + + /*mark send ring invalid*/ + sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f()); + + /*read get pointer and make equal to put*/ + g->sim.send_ring_put = sim_readl(g, sim_send_get_r()); + sim_writel(g, sim_send_put_r(), g->sim.send_ring_put); + + /*write send ring address and make it valid*/ + /*TBD: work for >32b physmem*/ + phys = g->sim.send_bfr.phys; + sim_writel(g, sim_send_ring_hi_r(), 0); + sim_writel(g, sim_send_ring_r(), + sim_send_ring_status_valid_f() | + sim_send_ring_target_phys_pci_coherent_f() | + sim_send_ring_size_4kb_f() | + sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT)); + + /*repeat for recv ring (but swap put,get as roles are opposite) */ + sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f()); + + /*read put pointer and make equal to get*/ + g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r()); + sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get); + + /*write send ring address and make it valid*/ + /*TBD: work for >32b physmem*/ + phys = g->sim.recv_bfr.phys; + sim_writel(g, sim_recv_ring_hi_r(), 0); + sim_writel(g, sim_recv_ring_r(), + sim_recv_ring_status_valid_f() | + sim_recv_ring_target_phys_pci_coherent_f() | + sim_recv_ring_size_4kb_f() | + sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT)); + + g->sim.remove_support = gk20a_remove_sim_support; + return 0; + + fail: + gk20a_free_sim_support(g); + return err; +} + +static inline u32 sim_msg_header_size(void) +{ + return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/ +} + +static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset) +{ + return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset); +} + +static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset) +{ + return sim_msg_bfr(g, byte_offset); /*starts at 0*/ +} + +static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset) +{ + /*starts after msg header/cmn*/ + return sim_msg_bfr(g, byte_offset + sim_msg_header_size()); +} + +static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size) +{ + /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/ + *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v(); + *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v(); + *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v(); + *sim_msg_hdr(g, sim_msg_function_r()) = func; + *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size(); +} + +static inline u32 sim_escape_read_hdr_size(void) +{ + return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/ +} + +static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset); +} + +static int rpc_send_message(struct gk20a *g) +{ + /* calculations done in units of u32s */ + u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2; + u32 dma_offset = send_base + sim_dma_r()/sizeof(u32); + u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32); + + *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) = + sim_dma_target_phys_pci_coherent_f() | + sim_dma_status_valid_f() | + sim_dma_size_4kb_f() | + sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT); + + *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/ + + *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++; + + g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) % + PAGE_SIZE; + + __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE); + __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE); + __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE); + + /* Update the put pointer. This will trap into the host. */ + sim_writel(g, sim_send_put_r(), g->sim.send_ring_put); + + return 0; +} + +static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset); +} + +static int rpc_recv_poll(struct gk20a *g) +{ + phys_addr_t recv_phys_addr; + + /* XXX This read is not required (?) */ + /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/ + + /* Poll the recv ring get pointer in an infinite loop*/ + do { + g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r()); + } while (g->sim.recv_ring_put == g->sim.recv_ring_get); + + /* process all replies */ + while (g->sim.recv_ring_put != g->sim.recv_ring_get) { + /* these are in u32 offsets*/ + u32 dma_lo_offset = + sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0; + /*u32 dma_hi_offset = dma_lo_offset + 1;*/ + u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4)); + + /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v( + (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/ + + /*TBD >32b phys addr */ + recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT; + + if (recv_phys_addr != g->sim.msg_bfr.phys) { + dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n", + __func__); + return -1; + } + + /* Update GET pointer */ + g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) % + PAGE_SIZE; + + __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE); + __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE); + __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE); + + sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get); + + g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r()); + } + + return 0; +} + +static int issue_rpc_and_wait(struct gk20a *g) +{ + int err; + + err = rpc_send_message(g); + if (err) { + dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n", + __func__); + return err; + } + + err = rpc_recv_poll(g); + if (err) { + dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n", + __func__); + return err; + } + + /* Now check if RPC really succeeded */ + if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) { + dev_err(dev_from_gk20a(g), "%s received failed status!\n", + __func__); + return -(*sim_msg_hdr(g, sim_msg_result_r())); + } + return 0; +} + +int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data) +{ + int err; + size_t pathlen = strlen(path); + u32 data_offset; + + sim_write_hdr(g, sim_msg_function_sim_escape_read_v(), + sim_escape_read_hdr_size()); + *sim_msg_param(g, 0) = index; + *sim_msg_param(g, 4) = count; + data_offset = roundup(0xc + pathlen + 1, sizeof(u32)); + *sim_msg_param(g, 8) = data_offset; + strcpy((char *)sim_msg_param(g, 0xc), path); + + err = issue_rpc_and_wait(g); + + if (!err) + memcpy(data, sim_msg_param(g, data_offset), count); + return err; +} + +static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + u32 mc_intr_0; + + if (!g->power_on) + return IRQ_NONE; + + /* not from gpu when sharing irq with others */ + mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); + if (unlikely(!mc_intr_0)) + return IRQ_NONE; + + gk20a_writel(g, mc_intr_en_0_r(), + mc_intr_en_0_inta_disabled_f()); + + /* flush previous write */ + gk20a_readl(g, mc_intr_en_0_r()); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + u32 mc_intr_1; + + if (!g->power_on) + return IRQ_NONE; + + /* not from gpu when sharing irq with others */ + mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); + if (unlikely(!mc_intr_1)) + return IRQ_NONE; + + gk20a_writel(g, mc_intr_en_1_r(), + mc_intr_en_1_inta_disabled_f()); + + /* flush previous write */ + gk20a_readl(g, mc_intr_en_1_r()); + + return IRQ_WAKE_THREAD; +} + +static void gk20a_pbus_isr(struct gk20a *g) +{ + u32 val; + val = gk20a_readl(g, bus_intr_0_r()); + if (val & (bus_intr_0_pri_squash_m() | + bus_intr_0_pri_fecserr_m() | + bus_intr_0_pri_timeout_m())) { + gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x", + gk20a_readl(g, top_fs_status_r())); + gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", + gk20a_readl(g, mc_enable_r())); + gk20a_err(&g->dev->dev, + "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n", + gk20a_readl(g, timer_pri_timeout_save_0_r())); + gk20a_err(&g->dev->dev, + "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n", + gk20a_readl(g, timer_pri_timeout_save_1_r())); + gk20a_err(&g->dev->dev, + "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n", + gk20a_readl(g, timer_pri_timeout_fecs_errcode_r())); + } + + if (val) + gk20a_err(&g->dev->dev, + "Unhandled pending pbus interrupt\n"); + + gk20a_writel(g, bus_intr_0_r(), val); +} + +static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + u32 mc_intr_0; + + gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); + + mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); + + gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); + + if (mc_intr_0 & mc_intr_0_pgraph_pending_f()) + gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g)); + if (mc_intr_0 & mc_intr_0_pfifo_pending_f()) + gk20a_fifo_isr(g); + if (mc_intr_0 & mc_intr_0_pmu_pending_f()) + gk20a_pmu_isr(g); + if (mc_intr_0 & mc_intr_0_priv_ring_pending_f()) + gk20a_priv_ring_isr(g); + if (mc_intr_0 & mc_intr_0_ltc_pending_f()) + gk20a_mm_ltc_isr(g); + if (mc_intr_0 & mc_intr_0_pbus_pending_f()) + gk20a_pbus_isr(g); + + gk20a_writel(g, mc_intr_en_0_r(), + mc_intr_en_0_inta_hardware_f()); + + /* flush previous write */ + gk20a_readl(g, mc_intr_en_0_r()); + + return IRQ_HANDLED; +} + +static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + u32 mc_intr_1; + + gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); + + mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); + + gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); + + if (mc_intr_1 & mc_intr_0_pfifo_pending_f()) + gk20a_fifo_nonstall_isr(g); + if (mc_intr_1 & mc_intr_0_pgraph_pending_f()) + gk20a_gr_nonstall_isr(g); + + gk20a_writel(g, mc_intr_en_1_r(), + mc_intr_en_1_inta_hardware_f()); + + /* flush previous write */ + gk20a_readl(g, mc_intr_en_1_r()); + + return IRQ_HANDLED; +} + +static void gk20a_remove_support(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + /* pmu support should already be removed when driver turns off + gpu power rail in prepapre_poweroff */ + if (g->gk20a_cdev.gk20a_cooling_dev) + thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev); + + if (g->gr.remove_support) + g->gr.remove_support(&g->gr); + + if (g->fifo.remove_support) + g->fifo.remove_support(&g->fifo); + + if (g->mm.remove_support) + g->mm.remove_support(&g->mm); + + if (g->sim.remove_support) + g->sim.remove_support(&g->sim); + + release_firmware(g->pmu_fw); + + if (g->irq_requested) { + free_irq(g->irq_stall, g); + free_irq(g->irq_nonstall, g); + g->irq_requested = false; + } + + /* free mappings to registers, etc*/ + + if (g->regs) { + iounmap(g->regs); + g->regs = 0; + } + if (g->bar1) { + iounmap(g->bar1); + g->bar1 = 0; + } +} + +static int gk20a_init_support(struct platform_device *dev) +{ + int err = 0; + struct gk20a *g = get_gk20a(dev); + + g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, + &g->reg_mem); + if (!g->regs) { + dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n"); + err = -ENXIO; + goto fail; + } + + g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, + &g->bar1_mem); + if (!g->bar1) { + dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n"); + err = -ENXIO; + goto fail; + } + + /* Get interrupt numbers */ + g->irq_stall = platform_get_irq(dev, 0); + g->irq_nonstall = platform_get_irq(dev, 1); + if (g->irq_stall < 0 || g->irq_nonstall < 0) { + err = -ENXIO; + goto fail; + } + + if (tegra_cpu_is_asim()) { + err = gk20a_init_sim_support(dev); + if (err) + goto fail; + } + + mutex_init(&g->dbg_sessions_lock); + mutex_init(&g->client_lock); + + g->remove_support = gk20a_remove_support; + return 0; + + fail: + gk20a_remove_support(dev); + return err; +} + +static int gk20a_init_client(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int err; + + gk20a_dbg_fn(""); + +#ifndef CONFIG_PM_RUNTIME + gk20a_pm_finalize_poweron(&dev->dev); +#endif + + err = gk20a_init_mm_setup_sw(g); + if (err) + return err; + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) + gk20a_scale_hw_init(dev); + return 0; +} + +static void gk20a_deinit_client(struct platform_device *dev) +{ + gk20a_dbg_fn(""); +#ifndef CONFIG_PM_RUNTIME + gk20a_pm_prepare_poweroff(&dev->dev); +#endif +} + +int gk20a_get_client(struct gk20a *g) +{ + int err = 0; + + mutex_lock(&g->client_lock); + if (g->client_refcount == 0) + err = gk20a_init_client(g->dev); + if (!err) + g->client_refcount++; + mutex_unlock(&g->client_lock); + return err; +} + +void gk20a_put_client(struct gk20a *g) +{ + mutex_lock(&g->client_lock); + if (g->client_refcount == 1) + gk20a_deinit_client(g->dev); + g->client_refcount--; + mutex_unlock(&g->client_lock); + WARN_ON(g->client_refcount < 0); +} + +static int gk20a_pm_prepare_poweroff(struct device *_dev) +{ + struct platform_device *dev = to_platform_device(_dev); + struct gk20a *g = get_gk20a(dev); + int ret = 0; + + gk20a_dbg_fn(""); + + if (!g->power_on) + return 0; + + ret |= gk20a_channel_suspend(g); + + /* disable elpg before gr or fifo suspend */ + ret |= gk20a_pmu_destroy(g); + ret |= gk20a_gr_suspend(g); + ret |= gk20a_mm_suspend(g); + ret |= gk20a_fifo_suspend(g); + + /* + * After this point, gk20a interrupts should not get + * serviced. + */ + if (g->irq_requested) { + free_irq(g->irq_stall, g); + free_irq(g->irq_nonstall, g); + g->irq_requested = false; + } + + /* Disable GPCPLL */ + ret |= gk20a_suspend_clk_support(g); + g->power_on = false; + + return ret; +} + +static void gk20a_detect_chip(struct gk20a *g) +{ + struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; + + u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r()); + gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) << + NVHOST_GPU_ARCHITECTURE_SHIFT; + gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value); + gpu->rev = + (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) | + mc_boot_0_minor_revision_v(mc_boot_0_value); + + gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n", + g->gpu_characteristics.arch, + g->gpu_characteristics.impl, + g->gpu_characteristics.rev); +} + +static int gk20a_pm_finalize_poweron(struct device *_dev) +{ + struct platform_device *dev = to_platform_device(_dev); + struct gk20a *g = get_gk20a(dev); + int err, nice_value; + + gk20a_dbg_fn(""); + + if (g->power_on) + return 0; + + nice_value = task_nice(current); + set_user_nice(current, -20); + + if (!g->irq_requested) { + err = request_threaded_irq(g->irq_stall, + gk20a_intr_isr_stall, + gk20a_intr_thread_stall, + 0, "gk20a_stall", g); + if (err) { + dev_err(dev_from_gk20a(g), + "failed to request stall intr irq @ %lld\n", + (u64)g->irq_stall); + goto done; + } + err = request_threaded_irq(g->irq_nonstall, + gk20a_intr_isr_nonstall, + gk20a_intr_thread_nonstall, + 0, "gk20a_nonstall", g); + if (err) { + dev_err(dev_from_gk20a(g), + "failed to request non-stall intr irq @ %lld\n", + (u64)g->irq_nonstall); + goto done; + } + g->irq_requested = true; + } + + g->power_on = true; + + gk20a_writel(g, mc_intr_mask_1_r(), + mc_intr_0_pfifo_pending_f() + | mc_intr_0_pgraph_pending_f()); + gk20a_writel(g, mc_intr_en_1_r(), + mc_intr_en_1_inta_hardware_f()); + + gk20a_writel(g, mc_intr_mask_0_r(), + mc_intr_0_pgraph_pending_f() + | mc_intr_0_pfifo_pending_f() + | mc_intr_0_priv_ring_pending_f() + | mc_intr_0_ltc_pending_f() + | mc_intr_0_pbus_pending_f()); + gk20a_writel(g, mc_intr_en_0_r(), + mc_intr_en_0_inta_hardware_f()); + + if (!tegra_platform_is_silicon()) + gk20a_writel(g, bus_intr_en_0_r(), 0x0); + else + gk20a_writel(g, bus_intr_en_0_r(), + bus_intr_en_0_pri_squash_m() | + bus_intr_en_0_pri_fecserr_m() | + bus_intr_en_0_pri_timeout_m()); + gk20a_reset_priv_ring(g); + + gk20a_detect_chip(g); + err = gpu_init_hal(g); + if (err) + goto done; + + /* TBD: move this after graphics init in which blcg/slcg is enabled. + This function removes SlowdownOnBoot which applies 32x divider + on gpcpll bypass path. The purpose of slowdown is to save power + during boot but it also significantly slows down gk20a init on + simulation and emulation. We should remove SOB after graphics power + saving features (blcg/slcg) are enabled. For now, do it here. */ + err = gk20a_init_clk_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a clk"); + goto done; + } + + /* enable pri timeout only on silicon */ + if (tegra_platform_is_silicon()) { + gk20a_writel(g, + timer_pri_timeout_r(), + timer_pri_timeout_period_f(0x186A0) | + timer_pri_timeout_en_en_enabled_f()); + } else { + gk20a_writel(g, + timer_pri_timeout_r(), + timer_pri_timeout_period_f(0x186A0) | + timer_pri_timeout_en_en_disabled_f()); + } + + err = gk20a_init_fifo_reset_enable_hw(g); + if (err) { + gk20a_err(&dev->dev, "failed to reset gk20a fifo"); + goto done; + } + + err = gk20a_init_mm_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a mm"); + goto done; + } + + err = gk20a_init_pmu_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a pmu"); + goto done; + } + + err = gk20a_init_fifo_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a fifo"); + goto done; + } + + err = gk20a_init_gr_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a gr"); + goto done; + } + + err = gk20a_init_pmu_setup_hw2(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a pmu_hw2"); + goto done; + } + + err = gk20a_init_therm_support(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a therm"); + goto done; + } + + err = gk20a_init_gpu_characteristics(g); + if (err) { + gk20a_err(&dev->dev, "failed to init gk20a gpu characteristics"); + goto done; + } + + gk20a_channel_resume(g); + set_user_nice(current, nice_value); + +done: + return err; +} + +static struct of_device_id tegra_gk20a_of_match[] = { +#ifdef CONFIG_TEGRA_GK20A + { .compatible = "nvidia,tegra124-gk20a", + .data = &gk20a_tegra_platform }, +#endif + { .compatible = "nvidia,generic-gk20a", + .data = &gk20a_generic_platform }, + { }, +}; + +int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *max_state) +{ + struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata; + + *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1; + return 0; +} + +int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *cur_state) +{ + struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata; + + *cur_state = gk20a_gpufreq_device->gk20a_freq_state; + return 0; +} + +int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev, + unsigned long cur_state) +{ + u32 target_freq; + struct gk20a *g; + struct gpufreq_table_data *gpu_cooling_table; + struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata; + + BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size); + + g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev); + + gpu_cooling_table = tegra_gpufreq_table_get(); + target_freq = gpu_cooling_table[cur_state].frequency; + + /* ensure a query for state will get the proper value */ + gk20a_gpufreq_device->gk20a_freq_state = cur_state; + + gk20a_clk_set_rate(g, target_freq); + + return 0; +} + +static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = { + .get_max_state = tegra_gpu_get_max_state, + .get_cur_state = tegra_gpu_get_cur_state, + .set_cur_state = tegra_gpu_set_cur_state, +}; + +static int gk20a_create_device( + struct platform_device *pdev, int devno, const char *cdev_name, + struct cdev *cdev, struct device **out, + const struct file_operations *ops) +{ + struct device *dev; + int err; + struct gk20a *g = get_gk20a(pdev); + + gk20a_dbg_fn(""); + + cdev_init(cdev, ops); + cdev->owner = THIS_MODULE; + + err = cdev_add(cdev, devno, 1); + if (err) { + dev_err(&pdev->dev, + "failed to add %s cdev\n", cdev_name); + return err; + } + + dev = device_create(g->class, NULL, devno, NULL, + (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d", + cdev_name, pdev->id); + + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + cdev_del(cdev); + dev_err(&pdev->dev, + "failed to create %s device for %s\n", + cdev_name, pdev->name); + return err; + } + + *out = dev; + return 0; +} + +static void gk20a_user_deinit(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + if (g->channel.node) { + device_destroy(g->class, g->channel.cdev.dev); + cdev_del(&g->channel.cdev); + } + + if (g->as.node) { + device_destroy(g->class, g->as.cdev.dev); + cdev_del(&g->as.cdev); + } + + if (g->ctrl.node) { + device_destroy(g->class, g->ctrl.cdev.dev); + cdev_del(&g->ctrl.cdev); + } + + if (g->dbg.node) { + device_destroy(g->class, g->dbg.cdev.dev); + cdev_del(&g->dbg.cdev); + } + + if (g->prof.node) { + device_destroy(g->class, g->prof.cdev.dev); + cdev_del(&g->prof.cdev); + } + + if (g->cdev_region) + unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); + + if (g->class) + class_destroy(g->class); +} + +static int gk20a_user_init(struct platform_device *dev) +{ + int err; + dev_t devno; + struct gk20a *g = get_gk20a(dev); + + g->class = class_create(THIS_MODULE, CLASS_NAME); + if (IS_ERR(g->class)) { + err = PTR_ERR(g->class); + g->class = NULL; + dev_err(&dev->dev, + "failed to create " CLASS_NAME " class\n"); + goto fail; + } + + err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME); + if (err) { + dev_err(&dev->dev, "failed to allocate devno\n"); + goto fail; + } + g->cdev_region = devno; + + err = gk20a_create_device(dev, devno++, "", + &g->channel.cdev, &g->channel.node, + &gk20a_channel_ops); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, "-as", + &g->as.cdev, &g->as.node, + &gk20a_as_ops); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, "-ctrl", + &g->ctrl.cdev, &g->ctrl.node, + &gk20a_ctrl_ops); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, "-dbg", + &g->dbg.cdev, &g->dbg.node, + &gk20a_dbg_ops); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, "-prof", + &g->prof.cdev, &g->prof.node, + &gk20a_prof_ops); + if (err) + goto fail; + + return 0; +fail: + gk20a_user_deinit(dev); + return err; +} + +struct channel_gk20a *gk20a_get_channel_from_file(int fd) +{ + struct channel_gk20a *ch; + struct file *f = fget(fd); + if (!f) + return 0; + + if (f->f_op != &gk20a_channel_ops) { + fput(f); + return 0; + } + + ch = (struct channel_gk20a *)f->private_data; + fput(f); + return ch; +} + +static int gk20a_pm_enable_clk(struct device *dev) +{ + int index = 0; + struct gk20a_platform *platform; + + platform = dev_get_drvdata(dev); + if (!platform) + return -EINVAL; + + for (index = 0; index < platform->num_clks; index++) { + int err = clk_prepare_enable(platform->clk[index]); + if (err) + return -EINVAL; + } + + return 0; +} + +static int gk20a_pm_disable_clk(struct device *dev) +{ + int index = 0; + struct gk20a_platform *platform; + + platform = dev_get_drvdata(dev); + if (!platform) + return -EINVAL; + + for (index = 0; index < platform->num_clks; index++) + clk_disable_unprepare(platform->clk[index]); + + return 0; +} + +#ifdef CONFIG_PM +const struct dev_pm_ops gk20a_pm_ops = { +#if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS) + .runtime_resume = gk20a_pm_enable_clk, + .runtime_suspend = gk20a_pm_disable_clk, +#endif +}; +#endif + +static int gk20a_pm_railgate(struct generic_pm_domain *domain) +{ + struct gk20a *g = container_of(domain, struct gk20a, pd); + struct gk20a_platform *platform = platform_get_drvdata(g->dev); + int ret = 0; + + if (platform->railgate) + ret = platform->railgate(platform->g->dev); + + return ret; +} + +static int gk20a_pm_unrailgate(struct generic_pm_domain *domain) +{ + struct gk20a *g = container_of(domain, struct gk20a, pd); + struct gk20a_platform *platform = platform_get_drvdata(g->dev); + int ret = 0; + + if (platform->unrailgate) + ret = platform->unrailgate(platform->g->dev); + + return ret; +} + +static int gk20a_pm_suspend(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int ret = 0; + + if (atomic_read(&dev->power.usage_count) > 1) + return -EBUSY; + + ret = gk20a_pm_prepare_poweroff(dev); + if (ret) + return ret; + + gk20a_scale_suspend(to_platform_device(dev)); + + if (platform->suspend) + platform->suspend(dev); + + return 0; +} + +static int gk20a_pm_resume(struct device *dev) +{ + int ret = 0; + + ret = gk20a_pm_finalize_poweron(dev); + if (ret) + return ret; + + gk20a_scale_resume(to_platform_device(dev)); + + return 0; +} + +static int gk20a_pm_initialise_domain(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct dev_power_governor *pm_domain_gov = NULL; + struct generic_pm_domain *domain = &platform->g->pd; + int ret = 0; + + domain->name = kstrdup(pdev->name, GFP_KERNEL); + + if (!platform->can_railgate) + pm_domain_gov = &pm_domain_always_on_gov; + + pm_genpd_init(domain, pm_domain_gov, true); + + domain->power_off = gk20a_pm_railgate; + domain->power_on = gk20a_pm_unrailgate; + domain->dev_ops.start = gk20a_pm_enable_clk; + domain->dev_ops.stop = gk20a_pm_disable_clk; + domain->dev_ops.save_state = gk20a_pm_prepare_poweroff; + domain->dev_ops.restore_state = gk20a_pm_finalize_poweron; + domain->dev_ops.suspend = gk20a_pm_suspend; + domain->dev_ops.resume = gk20a_pm_resume; + + device_set_wakeup_capable(&pdev->dev, 0); + ret = pm_genpd_add_device(domain, &pdev->dev); + + if (platform->railgate_delay) + pm_genpd_set_poweroff_delay(domain, platform->railgate_delay); + + return ret; +} + +static int gk20a_pm_init(struct platform_device *dev) +{ + struct gk20a_platform *platform = platform_get_drvdata(dev); + int err = 0; + + /* Initialise pm runtime */ + if (platform->clockgate_delay) { + pm_runtime_set_autosuspend_delay(&dev->dev, + platform->clockgate_delay); + pm_runtime_use_autosuspend(&dev->dev); + } + + pm_runtime_enable(&dev->dev); + if (!pm_runtime_enabled(&dev->dev)) + gk20a_pm_enable_clk(&dev->dev); + + /* Enable runtime railgating if possible. If not, + * turn on the rail now. */ + if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) + platform->railgate(dev); + else + platform->unrailgate(dev); + + /* genpd will take care of runtime power management if it is enabled */ + if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) + err = gk20a_pm_initialise_domain(dev); + + return err; +} + +static int gk20a_probe(struct platform_device *dev) +{ + struct gk20a *gk20a; + int err; + struct gk20a_platform *platform = NULL; + struct cooling_device_gk20a *gpu_cdev = NULL; + + if (dev->dev.of_node) { + const struct of_device_id *match; + + match = of_match_device(tegra_gk20a_of_match, &dev->dev); + if (match) + platform = (struct gk20a_platform *)match->data; + } else + platform = (struct gk20a_platform *)dev->dev.platform_data; + + if (!platform) { + dev_err(&dev->dev, "no platform data\n"); + return -ENODATA; + } + + gk20a_dbg_fn(""); + + platform_set_drvdata(dev, platform); + + gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL); + if (!gk20a) { + dev_err(&dev->dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + + set_gk20a(dev, gk20a); + gk20a->dev = dev; + + err = gk20a_user_init(dev); + if (err) + return err; + + gk20a_init_support(dev); + + spin_lock_init(&gk20a->mc_enable_lock); + + /* Initialize the platform interface. */ + err = platform->probe(dev); + if (err) { + dev_err(&dev->dev, "platform probe failed"); + return err; + } + + err = gk20a_pm_init(dev); + if (err) { + dev_err(&dev->dev, "pm init failed"); + return err; + } + + /* Initialise scaling */ + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) + gk20a_scale_init(dev); + + if (platform->late_probe) { + err = platform->late_probe(dev); + if (err) { + dev_err(&dev->dev, "late probe failed"); + return err; + } + } + + gk20a_debug_init(dev); + + /* Set DMA parameters to allow larger sgt lists */ + dev->dev.dma_parms = &gk20a->dma_parms; + dma_set_max_seg_size(&dev->dev, UINT_MAX); + + gpu_cdev = &gk20a->gk20a_cdev; + gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get(); + gpu_cdev->gk20a_freq_state = 0; + gpu_cdev->g = gk20a; + gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev, + &tegra_gpu_cooling_ops); + + gk20a->gr_idle_timeout_default = + CONFIG_GK20A_DEFAULT_TIMEOUT; + gk20a->timeouts_enabled = true; + + /* Set up initial clock gating settings */ + if (tegra_platform_is_silicon()) { + gk20a->slcg_enabled = true; + gk20a->blcg_enabled = true; + gk20a->elcg_enabled = true; + gk20a->elpg_enabled = true; + gk20a->aelpg_enabled = true; + } + + gk20a_create_sysfs(dev); + +#ifdef CONFIG_DEBUG_FS + clk_gk20a_debugfs_init(dev); + + spin_lock_init(&gk20a->debugfs_lock); + gk20a->mm.ltc_enabled = true; + gk20a->mm.ltc_enabled_debug = true; + gk20a->debugfs_ltc_enabled = + debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, + platform->debugfs, + &gk20a->mm.ltc_enabled_debug); + gk20a->mm.ltc_enabled_debug = true; + gk20a->debugfs_gr_idle_timeout_default = + debugfs_create_u32("gr_idle_timeout_default_us", + S_IRUGO|S_IWUSR, platform->debugfs, + &gk20a->gr_idle_timeout_default); + gk20a->debugfs_timeouts_enabled = + debugfs_create_bool("timeouts_enabled", + S_IRUGO|S_IWUSR, + platform->debugfs, + &gk20a->timeouts_enabled); + gk20a_pmu_debugfs_init(dev); +#endif + +#ifdef CONFIG_INPUT_CFBOOST + cfb_add_device(&dev->dev); +#endif + + return 0; +} + +static int __exit gk20a_remove(struct platform_device *dev) +{ + struct gk20a *g = get_gk20a(dev); + gk20a_dbg_fn(""); + +#ifdef CONFIG_INPUT_CFBOOST + cfb_remove_device(&dev->dev); +#endif + + if (g->remove_support) + g->remove_support(dev); + + gk20a_user_deinit(dev); + + set_gk20a(dev, 0); +#ifdef CONFIG_DEBUG_FS + debugfs_remove(g->debugfs_ltc_enabled); + debugfs_remove(g->debugfs_gr_idle_timeout_default); + debugfs_remove(g->debugfs_timeouts_enabled); +#endif + + kfree(g); + +#ifdef CONFIG_PM_RUNTIME + pm_runtime_put(&dev->dev); + pm_runtime_disable(&dev->dev); +#else + nvhost_module_disable_clk(&dev->dev); +#endif + + return 0; +} + +static struct platform_driver gk20a_driver = { + .probe = gk20a_probe, + .remove = __exit_p(gk20a_remove), + .driver = { + .owner = THIS_MODULE, + .name = "gk20a", +#ifdef CONFIG_OF + .of_match_table = tegra_gk20a_of_match, +#endif +#ifdef CONFIG_PM + .pm = &gk20a_pm_ops, +#endif + } +}; + +static int __init gk20a_init(void) +{ + return platform_driver_register(&gk20a_driver); +} + +static void __exit gk20a_exit(void) +{ + platform_driver_unregister(&gk20a_driver); +} + +bool is_gk20a_module(struct platform_device *dev) +{ + return &gk20a_driver.driver == dev->dev.driver; +} + +void gk20a_busy_noresume(struct platform_device *pdev) +{ + pm_runtime_get_noresume(&pdev->dev); +} + +int gk20a_channel_busy(struct platform_device *pdev) +{ + int ret = 0; + + ret = gk20a_platform_channel_busy(pdev); + if (ret) + return ret; + + ret = gk20a_busy(pdev); + if (ret) + gk20a_platform_channel_idle(pdev); + + return ret; +} + +void gk20a_channel_idle(struct platform_device *pdev) +{ + gk20a_idle(pdev); + gk20a_platform_channel_idle(pdev); +} + +int gk20a_busy(struct platform_device *pdev) +{ + int ret = 0; + +#ifdef CONFIG_PM_RUNTIME + ret = pm_runtime_get_sync(&pdev->dev); +#endif + gk20a_scale_notify_busy(pdev); + + return ret < 0 ? ret : 0; +} + +void gk20a_idle(struct platform_device *pdev) +{ +#ifdef CONFIG_PM_RUNTIME + if (atomic_read(&pdev->dev.power.usage_count) == 1) + gk20a_scale_notify_idle(pdev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_sync_autosuspend(&pdev->dev); +#else + gk20a_scale_notify_idle(pdev); +#endif +} + +void gk20a_disable(struct gk20a *g, u32 units) +{ + u32 pmc; + + gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units); + + spin_lock(&g->mc_enable_lock); + pmc = gk20a_readl(g, mc_enable_r()); + pmc &= ~units; + gk20a_writel(g, mc_enable_r(), pmc); + spin_unlock(&g->mc_enable_lock); +} + +void gk20a_enable(struct gk20a *g, u32 units) +{ + u32 pmc; + + gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units); + + spin_lock(&g->mc_enable_lock); + pmc = gk20a_readl(g, mc_enable_r()); + pmc |= units; + gk20a_writel(g, mc_enable_r(), pmc); + spin_unlock(&g->mc_enable_lock); + gk20a_readl(g, mc_enable_r()); + + udelay(20); +} + +void gk20a_reset(struct gk20a *g, u32 units) +{ + gk20a_disable(g, units); + udelay(20); + gk20a_enable(g, units); +} + +int gk20a_init_gpu_characteristics(struct gk20a *g) +{ + struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics; + + gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); + gpu->on_board_video_memory_size = 0; /* integrated GPU */ + + gpu->num_gpc = g->gr.gpc_count; + gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; + + gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */ + + gpu->big_page_size = g->mm.big_page_size; + gpu->compression_page_size = g->mm.compression_page_size; + + return 0; +} + +int nvhost_vpr_info_fetch(void) +{ + struct gk20a *g = get_gk20a(to_platform_device( + bus_find_device_by_name(&platform_bus_type, + NULL, "gk20a.0"))); + + if (!g) { + pr_info("gk20a ins't ready yet\n"); + return 0; + } + + return gk20a_mm_mmu_vpr_info_fetch(g); +} + +static const struct firmware * +do_request_firmware(struct device *dev, const char *prefix, const char *fw_name) +{ + const struct firmware *fw; + char *fw_path = NULL; + int path_len, err; + + if (prefix) { + path_len = strlen(prefix) + strlen(fw_name); + path_len += 2; /* for the path separator and zero terminator*/ + + fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL); + if (!fw_path) + return NULL; + + sprintf(fw_path, "%s/%s", prefix, fw_name); + fw_name = fw_path; + } + + err = request_firmware(&fw, fw_name, dev); + kfree(fw_path); + if (err) + return NULL; + return fw; +} + +/* This is a simple wrapper around request_firmware that takes 'fw_name' and + * applies an IP specific relative path prefix to it. The caller is + * responsible for calling release_firmware later. */ +const struct firmware * +gk20a_request_firmware(struct gk20a *g, const char *fw_name) +{ + struct device *dev = &g->dev->dev; + const struct firmware *fw; + + /* current->fs is NULL when calling from SYS_EXIT. + Add a check here to prevent crash in request_firmware */ + if (!current->fs || !fw_name) + return NULL; + + BUG_ON(!g->ops.name); + fw = do_request_firmware(dev, g->ops.name, fw_name); + +#ifdef CONFIG_TEGRA_GK20A + /* TO BE REMOVED - Support loading from legacy SOC specific path. */ + if (!fw) + fw = nvhost_client_request_firmware(g->dev, fw_name); +#endif + + if (!fw) { + dev_err(dev, "failed to get firmware\n"); + return NULL; + } + + return fw; +} + +module_init(gk20a_init); +module_exit(gk20a_exit); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h new file mode 100644 index 00000000..a9081a9d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -0,0 +1,559 @@ +/* + * drivers/video/tegra/host/gk20a/gk20a.h + * + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef _NVHOST_GK20A_H_ +#define _NVHOST_GK20A_H_ + + +struct gk20a; +struct fifo_gk20a; +struct channel_gk20a; +struct gr_gk20a; +struct sim_gk20a; + +#include +#include +#include +#include + +#include "../../../arch/arm/mach-tegra/iomap.h" + +#include "as_gk20a.h" +#include "clk_gk20a.h" +#include "fifo_gk20a.h" +#include "gr_gk20a.h" +#include "sim_gk20a.h" +#include "pmu_gk20a.h" +#include "priv_ring_gk20a.h" +#include "therm_gk20a.h" +#include "platform_gk20a.h" + +extern struct platform_device tegra_gk20a_device; + +bool is_gk20a_module(struct platform_device *dev); + +struct cooling_device_gk20a { + struct thermal_cooling_device *gk20a_cooling_dev; + unsigned int gk20a_freq_state; + unsigned int gk20a_freq_table_size; + struct gk20a *g; +}; + +struct gpu_ops { + struct { + int (*determine_L2_size_bytes)(struct gk20a *gk20a); + void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways); + int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr); + int (*clear_comptags)(struct gk20a *g, u32 min, u32 max); + void (*set_zbc_color_entry)(struct gk20a *g, + struct zbc_entry *color_val, + u32 index); + void (*set_zbc_depth_entry)(struct gk20a *g, + struct zbc_entry *depth_val, + u32 index); + void (*clear_zbc_color_entry)(struct gk20a *g, u32 index); + void (*clear_zbc_depth_entry)(struct gk20a *g, u32 index); + int (*init_zbc)(struct gk20a *g, struct gr_gk20a *gr); + void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); + void (*sync_debugfs)(struct gk20a *g); + void (*elpg_flush)(struct gk20a *g); + } ltc; + struct { + int (*init_fs_state)(struct gk20a *g); + void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset); + void (*bundle_cb_defaults)(struct gk20a *g); + void (*cb_size_default)(struct gk20a *g); + int (*calc_global_ctx_buffer_size)(struct gk20a *g); + void (*commit_global_attrib_cb)(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, bool patch); + void (*commit_global_bundle_cb)(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u64 size, bool patch); + int (*commit_global_cb_manager)(struct gk20a *g, + struct channel_gk20a *ch, + bool patch); + void (*commit_global_pagepool)(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch); + void (*init_gpc_mmu)(struct gk20a *g); + int (*handle_sw_method)(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data); + void (*set_alpha_circular_buffer_size)(struct gk20a *g, + u32 data); + void (*set_circular_buffer_size)(struct gk20a *g, u32 data); + void (*enable_hww_exceptions)(struct gk20a *g); + bool (*is_valid_class)(struct gk20a *g, u32 class_num); + void (*get_sm_dsm_perf_regs)(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride); + void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride); + void (*set_hww_esr_report_mask)(struct gk20a *g); + int (*setup_alpha_beta_tables)(struct gk20a *g, + struct gr_gk20a *gr); + } gr; + const char *name; + struct { + void (*init_fs_state)(struct gk20a *g); + void (*reset)(struct gk20a *g); + void (*init_uncompressed_kind_map)(struct gk20a *g); + void (*init_kind_attr)(struct gk20a *g); + } fb; + struct { + void (*slcg_gr_load_gating_prod)(struct gk20a *g, bool prod); + void (*slcg_perf_load_gating_prod)(struct gk20a *g, bool prod); + void (*blcg_gr_load_gating_prod)(struct gk20a *g, bool prod); + void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); + void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod); + } clock_gating; + struct { + void (*bind_channel)(struct channel_gk20a *ch_gk20a); + } fifo; + struct pmu_v { + /*used for change of enum zbc update cmd id from ver 0 to ver1*/ + u32 cmd_id_zbc_table_update; + u32 (*get_pmu_cmdline_args_size)(struct pmu_gk20a *pmu); + void (*set_pmu_cmdline_args_cpu_freq)(struct pmu_gk20a *pmu, + u32 freq); + void * (*get_pmu_cmdline_args_ptr)(struct pmu_gk20a *pmu); + u32 (*get_pmu_allocation_struct_size)(struct pmu_gk20a *pmu); + void (*set_pmu_allocation_ptr)(struct pmu_gk20a *pmu, + void **pmu_alloc_ptr, void *assign_ptr); + void (*pmu_allocation_set_dmem_size)(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u16 size); + u16 (*pmu_allocation_get_dmem_size)(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr); + u32 (*pmu_allocation_get_dmem_offset)(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr); + u32 * (*pmu_allocation_get_dmem_offset_addr)( + struct pmu_gk20a *pmu, void *pmu_alloc_ptr); + void (*pmu_allocation_set_dmem_offset)(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u32 offset); + void (*get_pmu_init_msg_pmu_queue_params)( + struct pmu_queue *queue, u32 id, + void *pmu_init_msg); + void *(*get_pmu_msg_pmu_init_msg_ptr)( + struct pmu_init_msg *init); + u16 (*get_pmu_init_msg_pmu_sw_mg_off)( + union pmu_init_msg_pmu *init_msg); + u16 (*get_pmu_init_msg_pmu_sw_mg_size)( + union pmu_init_msg_pmu *init_msg); + u32 (*get_pmu_perfmon_cmd_start_size)(void); + int (*get_perfmon_cmd_start_offsetofvar)( + enum pmu_perfmon_cmd_start_fields field); + void (*perfmon_start_set_cmd_type)(struct pmu_perfmon_cmd *pc, + u8 value); + void (*perfmon_start_set_group_id)(struct pmu_perfmon_cmd *pc, + u8 value); + void (*perfmon_start_set_state_id)(struct pmu_perfmon_cmd *pc, + u8 value); + void (*perfmon_start_set_flags)(struct pmu_perfmon_cmd *pc, + u8 value); + u8 (*perfmon_start_get_flags)(struct pmu_perfmon_cmd *pc); + u32 (*get_pmu_perfmon_cmd_init_size)(void); + int (*get_perfmon_cmd_init_offsetofvar)( + enum pmu_perfmon_cmd_start_fields field); + void (*perfmon_cmd_init_set_sample_buffer)( + struct pmu_perfmon_cmd *pc, u16 value); + void (*perfmon_cmd_init_set_dec_cnt)( + struct pmu_perfmon_cmd *pc, u8 value); + void (*perfmon_cmd_init_set_base_cnt_id)( + struct pmu_perfmon_cmd *pc, u8 value); + void (*perfmon_cmd_init_set_samp_period_us)( + struct pmu_perfmon_cmd *pc, u32 value); + void (*perfmon_cmd_init_set_num_cnt)(struct pmu_perfmon_cmd *pc, + u8 value); + void (*perfmon_cmd_init_set_mov_avg)(struct pmu_perfmon_cmd *pc, + u8 value); + void *(*get_pmu_seq_in_a_ptr)( + struct pmu_sequence *seq); + void *(*get_pmu_seq_out_a_ptr)( + struct pmu_sequence *seq); + } pmu_ver; +}; + +struct gk20a { + struct platform_device *dev; + + struct resource *reg_mem; + void __iomem *regs; + + struct resource *bar1_mem; + void __iomem *bar1; + + bool power_on; + bool irq_requested; + + struct clk_gk20a clk; + struct fifo_gk20a fifo; + struct gr_gk20a gr; + struct sim_gk20a sim; + struct mm_gk20a mm; + struct pmu_gk20a pmu; + struct cooling_device_gk20a gk20a_cdev; + + /* Save pmu fw here so that it lives cross suspend/resume. + pmu suspend destroys all pmu sw/hw states. Loading pmu + fw in resume crashes when the resume is from sys_exit. */ + const struct firmware *pmu_fw; + + u32 gr_idle_timeout_default; + u32 timeouts_enabled; + + bool slcg_enabled; + bool blcg_enabled; + bool elcg_enabled; + bool elpg_enabled; + bool aelpg_enabled; + +#ifdef CONFIG_DEBUG_FS + spinlock_t debugfs_lock; + struct dentry *debugfs_ltc_enabled; + struct dentry *debugfs_timeouts_enabled; + struct dentry *debugfs_gr_idle_timeout_default; +#endif + struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; + + /* held while manipulating # of debug/profiler sessions present */ + /* also prevents debug sessions from attaching until released */ + struct mutex dbg_sessions_lock; + int dbg_sessions; /* number attached */ + int dbg_powergating_disabled_refcount; /*refcount for pg disable */ + + void (*remove_support)(struct platform_device *); + + u64 pg_ingating_time_us; + u64 pg_ungating_time_us; + u32 pg_gating_cnt; + + spinlock_t mc_enable_lock; + + struct nvhost_gpu_characteristics gpu_characteristics; + + struct { + struct cdev cdev; + struct device *node; + } channel; + + struct gk20a_as as; + + struct { + struct cdev cdev; + struct device *node; + } ctrl; + + struct { + struct cdev cdev; + struct device *node; + } dbg; + + struct { + struct cdev cdev; + struct device *node; + } prof; + + struct mutex client_lock; + int client_refcount; /* open channels and ctrl nodes */ + + dev_t cdev_region; + struct class *class; + + struct gpu_ops ops; + + int irq_stall; + int irq_nonstall; + + struct generic_pm_domain pd; + + struct devfreq *devfreq; + + struct gk20a_scale_profile *scale_profile; + + struct device_dma_parameters dma_parms; +}; + +static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) +{ + return g->timeouts_enabled ? + g->gr_idle_timeout_default : MAX_SCHEDULE_TIMEOUT; +} + +static inline struct gk20a *get_gk20a(struct platform_device *dev) +{ + return gk20a_get_platform(dev)->g; +} + +enum BAR0_DEBUG_OPERATION { + BARO_ZERO_NOP = 0, + OP_END = 'DONE', + BAR0_READ32 = '0R32', + BAR0_WRITE32 = '0W32', +}; + +struct share_buffer_head { + enum BAR0_DEBUG_OPERATION operation; +/* size of the operation item */ + u32 size; + u32 completed; + u32 failed; + u64 context; + u64 completion_callback; +}; + +struct gk20a_cyclestate_buffer_elem { + struct share_buffer_head head; +/* in */ + u64 p_data; + u64 p_done; + u32 offset_bar0; + u16 first_bit; + u16 last_bit; +/* out */ +/* keep 64 bits to be consistent */ + u64 data; +}; + +/* debug accessories */ + +#ifdef CONFIG_DEBUG_FS + /* debug info, default is compiled-in but effectively disabled (0 mask) */ + #define GK20A_DEBUG + /*e.g: echo 1 > /d/tegra_host/dbg_mask */ + #define GK20A_DEFAULT_DBG_MASK 0 +#else + /* manually enable and turn it on the mask */ + /*#define NVHOST_DEBUG*/ + #define GK20A_DEFAULT_DBG_MASK (dbg_info) +#endif + +enum gk20a_dbg_categories { + gpu_dbg_info = BIT(0), /* lightly verbose info */ + gpu_dbg_fn = BIT(2), /* fn name tracing */ + gpu_dbg_reg = BIT(3), /* register accesses, very verbose */ + gpu_dbg_pte = BIT(4), /* gmmu ptes */ + gpu_dbg_intr = BIT(5), /* interrupts */ + gpu_dbg_pmu = BIT(6), /* gk20a pmu */ + gpu_dbg_clk = BIT(7), /* gk20a clk */ + gpu_dbg_map = BIT(8), /* mem mappings */ + gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ + gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ +}; + +#if defined(GK20A_DEBUG) +extern u32 gk20a_dbg_mask; +extern u32 gk20a_dbg_ftrace; +#define gk20a_dbg(dbg_mask, format, arg...) \ +do { \ + if (unlikely((dbg_mask) & gk20a_dbg_mask)) { \ + if (gk20a_dbg_ftrace) \ + trace_printk(format "\n", ##arg); \ + else \ + pr_info("gk20a %s: " format "\n", \ + __func__, ##arg); \ + } \ +} while (0) + +#else /* GK20A_DEBUG */ +#define gk20a_dbg(dbg_mask, format, arg...) \ +do { \ + if (0) \ + pr_info("gk20a %s: " format "\n", __func__, ##arg);\ +} while (0) + +#endif + +#define gk20a_err(d, fmt, arg...) \ + dev_err(d, "%s: " fmt "\n", __func__, ##arg) + +#define gk20a_warn(d, fmt, arg...) \ + dev_warn(d, "%s: " fmt "\n", __func__, ##arg) + +#define gk20a_dbg_fn(fmt, arg...) \ + gk20a_dbg(gpu_dbg_fn, fmt, ##arg) + +#define gk20a_dbg_info(fmt, arg...) \ + gk20a_dbg(gpu_dbg_info, fmt, ##arg) + +/* mem access with dbg_mem logging */ +static inline u8 gk20a_mem_rd08(void *ptr, int b) +{ + u8 _b = ((const u8 *)ptr)[b]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b); +#endif + return _b; +} +static inline u16 gk20a_mem_rd16(void *ptr, int s) +{ + u16 _s = ((const u16 *)ptr)[s]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s); +#endif + return _s; +} +static inline u32 gk20a_mem_rd32(void *ptr, int w) +{ + u32 _w = ((const u32 *)ptr)[w]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w); +#endif + return _w; +} +static inline void gk20a_mem_wr08(void *ptr, int b, u8 data) +{ +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data); +#endif + ((u8 *)ptr)[b] = data; +} +static inline void gk20a_mem_wr16(void *ptr, int s, u16 data) +{ +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data); +#endif + ((u16 *)ptr)[s] = data; +} +static inline void gk20a_mem_wr32(void *ptr, int w, u32 data) +{ +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data); +#endif + ((u32 *)ptr)[w] = data; +} + +/* register accessors */ +static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) +{ + gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); + writel(v, g->regs + r); +} +static inline u32 gk20a_readl(struct gk20a *g, u32 r) +{ + u32 v = readl(g->regs + r); + gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); + return v; +} + +static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) +{ + gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v); + writel(v, g->bar1 + b); +} + +static inline u32 gk20a_bar1_readl(struct gk20a *g, u32 b) +{ + u32 v = readl(g->bar1 + b); + gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v); + return v; +} + +/* convenience */ +static inline struct device *dev_from_gk20a(struct gk20a *g) +{ + return &g->dev->dev; +} +static inline struct gk20a *gk20a_from_as(struct gk20a_as *as) +{ + return container_of(as, struct gk20a, as); +} +static inline u32 u64_hi32(u64 n) +{ + return (u32)((n >> 32) & ~(u32)0); +} + +static inline u32 u64_lo32(u64 n) +{ + return (u32)(n & ~(u32)0); +} + +static inline u32 set_field(u32 val, u32 mask, u32 field) +{ + return ((val & ~mask) | field); +} + +/* invalidate channel lookup tlb */ +static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr) +{ + spin_lock(&gr->ch_tlb_lock); + memset(gr->chid_tlb, 0, + sizeof(struct gr_channel_map_tlb_entry) * + GR_CHANNEL_MAP_TLB_SIZE); + spin_unlock(&gr->ch_tlb_lock); +} + +/* classes that the device supports */ +/* TBD: get these from an open-sourced SDK? */ +enum { + KEPLER_C = 0xA297, + FERMI_TWOD_A = 0x902D, + KEPLER_COMPUTE_A = 0xA0C0, + KEPLER_INLINE_TO_MEMORY_A = 0xA040, + KEPLER_DMA_COPY_A = 0xA0B5, /*not sure about this one*/ +}; + +#if defined(CONFIG_GK20A_PMU) +static inline int support_gk20a_pmu(void) +{ + return 1; +} +#else +static inline int support_gk20a_pmu(void){return 0;} +#endif + +void gk20a_create_sysfs(struct platform_device *dev); + +#ifdef CONFIG_DEBUG_FS +int clk_gk20a_debugfs_init(struct platform_device *dev); +#endif + +#define GK20A_BAR0_IORESOURCE_MEM 0 +#define GK20A_BAR1_IORESOURCE_MEM 1 +#define GK20A_SIM_IORESOURCE_MEM 2 + +void gk20a_busy_noresume(struct platform_device *pdev); +int gk20a_busy(struct platform_device *pdev); +void gk20a_idle(struct platform_device *pdev); +int gk20a_channel_busy(struct platform_device *pdev); +void gk20a_channel_idle(struct platform_device *pdev); +void gk20a_disable(struct gk20a *g, u32 units); +void gk20a_enable(struct gk20a *g, u32 units); +void gk20a_reset(struct gk20a *g, u32 units); +int gk20a_get_client(struct gk20a *g); +void gk20a_put_client(struct gk20a *g); + +const struct firmware * +gk20a_request_firmware(struct gk20a *g, const char *fw_name); + +#define NVHOST_GPU_ARCHITECTURE_SHIFT 4 + +/* constructs unique and compact GPUID from nvhost_gpu_characteristics + * arch/impl fields */ +#define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl))) + +#define GK20A_GPUID_GK20A \ + GK20A_GPUID(NVHOST_GPU_ARCH_GK100, NVHOST_GPU_IMPL_GK20A) + +int gk20a_init_gpu_characteristics(struct gk20a *g); + +#endif /* _NVHOST_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c new file mode 100644 index 00000000..32c003b6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -0,0 +1,1247 @@ +/* + * gk20a allocator + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "gk20a_allocator.h" + +static inline void link_block_list(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + struct rb_node *rb_parent); +static inline void link_block_rb(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct rb_node **rb_link, + struct rb_node *rb_parent); +static void link_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, struct rb_node **rb_link, + struct rb_node *rb_parent); +static void insert_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block); + +static void unlink_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev); +static struct gk20a_alloc_block *unlink_blocks( + struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, u32 end); + +static struct gk20a_alloc_block *find_block( + struct gk20a_allocator *allocator, u32 addr); +static struct gk20a_alloc_block *find_block_prev( + struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev); +static struct gk20a_alloc_block *find_block_prepare( + struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent); + +static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align); +static void update_free_addr_cache(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + u32 addr, u32 len, bool free); +static int find_free_area(struct gk20a_allocator *allocator, + u32 *addr, u32 len); +static int find_free_area_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 *len); + +static void adjust_block(struct gk20a_alloc_block *block, + u32 start, u32 end, + struct gk20a_alloc_block *insert); +static struct gk20a_alloc_block *merge_block( + struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, u32 addr, u32 end); +static int split_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + u32 addr, int new_below); + +static int block_alloc_single_locked(struct gk20a_allocator *allocator, + u32 *addr, u32 len); +static int block_alloc_list_locked(struct gk20a_allocator *allocator, + u32 *addr, u32 len, + struct gk20a_alloc_block **pblock); +static int block_free_locked(struct gk20a_allocator *allocator, + u32 addr, u32 len); +static void block_free_list_locked(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *list); + +/* link a block into allocator block list */ +static inline void link_block_list(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + struct rb_node *rb_parent) +{ + struct gk20a_alloc_block *next; + + block->prev = prev; + if (prev) { + next = prev->next; + prev->next = block; + } else { + allocator->block_first = block; + if (rb_parent) + next = rb_entry(rb_parent, + struct gk20a_alloc_block, rb); + else + next = NULL; + } + block->next = next; + if (next) + next->prev = block; +} + +/* link a block into allocator rb tree */ +static inline void link_block_rb(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + rb_link_node(&block->rb, rb_parent, rb_link); + rb_insert_color(&block->rb, &allocator->rb_root); +} + +/* add a block to allocator with known location */ +static void link_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + struct gk20a_alloc_block *next; + + link_block_list(allocator, block, prev, rb_parent); + link_block_rb(allocator, block, rb_link, rb_parent); + allocator->block_count++; + + next = block->next; + allocator_dbg(allocator, "link new block %d:%d between block %d:%d and block %d:%d", + block->start, block->end, + prev ? prev->start : -1, prev ? prev->end : -1, + next ? next->start : -1, next ? next->end : -1); +} + +/* add a block to allocator */ +static void insert_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + struct gk20a_alloc_block *prev; + struct rb_node **rb_link, *rb_parent; + + find_block_prepare(allocator, block->start, + &prev, &rb_link, &rb_parent); + link_block(allocator, block, prev, rb_link, rb_parent); +} + +/* remove a block from allocator */ +static void unlink_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev) +{ + struct gk20a_alloc_block *next = block->next; + + allocator_dbg(allocator, "unlink block %d:%d between block %d:%d and block %d:%d", + block->start, block->end, + prev ? prev->start : -1, prev ? prev->end : -1, + next ? next->start : -1, next ? next->end : -1); + + BUG_ON(block->start < allocator->base); + BUG_ON(block->end > allocator->limit); + + if (prev) + prev->next = next; + else + allocator->block_first = next; + + if (next) + next->prev = prev; + rb_erase(&block->rb, &allocator->rb_root); + if (allocator->block_recent == block) + allocator->block_recent = prev; + + allocator->block_count--; +} + +/* remove a list of blocks from allocator. the list can contain both + regular blocks and non-contiguous blocks. skip all non-contiguous + blocks, remove regular blocks into a separate list, return list head */ +static struct gk20a_alloc_block * +unlink_blocks(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + u32 end) +{ + struct gk20a_alloc_block **insertion_point; + struct gk20a_alloc_block *last_unfreed_block = prev; + struct gk20a_alloc_block *last_freed_block = NULL; + struct gk20a_alloc_block *first_freed_block = NULL; + + insertion_point = (prev ? &prev->next : &allocator->block_first); + *insertion_point = NULL; + + do { + if (!block->nc_block) { + allocator_dbg(allocator, "unlink block %d:%d", + block->start, block->end); + if (last_freed_block) + last_freed_block->next = block; + block->prev = last_freed_block; + rb_erase(&block->rb, &allocator->rb_root); + last_freed_block = block; + allocator->block_count--; + if (!first_freed_block) + first_freed_block = block; + } else { + allocator_dbg(allocator, "skip nc block %d:%d", + block->start, block->end); + if (!*insertion_point) + *insertion_point = block; + if (last_unfreed_block) + last_unfreed_block->next = block; + block->prev = last_unfreed_block; + last_unfreed_block = block; + } + block = block->next; + } while (block && block->start < end); + + if (!*insertion_point) + *insertion_point = block; + + if (block) + block->prev = last_unfreed_block; + if (last_unfreed_block) + last_unfreed_block->next = block; + if (last_freed_block) + last_freed_block->next = NULL; + + allocator->block_recent = NULL; + + return first_freed_block; +} + +/* Look up the first block which satisfies addr < block->end, + NULL if none */ +static struct gk20a_alloc_block * +find_block(struct gk20a_allocator *allocator, u32 addr) +{ + struct gk20a_alloc_block *block = allocator->block_recent; + + if (!(block && block->end > addr && block->start <= addr)) { + struct rb_node *rb_node; + + rb_node = allocator->rb_root.rb_node; + block = NULL; + + while (rb_node) { + struct gk20a_alloc_block *block_tmp; + + block_tmp = rb_entry(rb_node, + struct gk20a_alloc_block, rb); + + if (block_tmp->end > addr) { + block = block_tmp; + if (block_tmp->start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; + if (block) + allocator->block_recent = block; + } + } + return block; +} + +/* Same as find_block, but also return a pointer to the previous block */ +static struct gk20a_alloc_block * +find_block_prev(struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev) +{ + struct gk20a_alloc_block *block = NULL, *prev = NULL; + struct rb_node *rb_node; + if (!allocator) + goto out; + + block = allocator->block_first; + + rb_node = allocator->rb_root.rb_node; + + while (rb_node) { + struct gk20a_alloc_block *block_tmp; + block_tmp = rb_entry(rb_node, struct gk20a_alloc_block, rb); + + if (addr < block_tmp->end) + rb_node = rb_node->rb_left; + else { + prev = block_tmp; + if (!prev->next || addr < prev->next->end) + break; + rb_node = rb_node->rb_right; + } + } + +out: + *pprev = prev; + return prev ? prev->next : block; +} + +/* Same as find_block, but also return a pointer to the previous block + and return rb_node to prepare for rbtree insertion */ +static struct gk20a_alloc_block * +find_block_prepare(struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent) +{ + struct gk20a_alloc_block *block; + struct rb_node **__rb_link, *__rb_parent, *rb_prev; + + __rb_link = &allocator->rb_root.rb_node; + rb_prev = __rb_parent = NULL; + block = NULL; + + while (*__rb_link) { + struct gk20a_alloc_block *block_tmp; + + __rb_parent = *__rb_link; + block_tmp = rb_entry(__rb_parent, + struct gk20a_alloc_block, rb); + + if (block_tmp->end > addr) { + block = block_tmp; + if (block_tmp->start <= addr) + break; + __rb_link = &__rb_parent->rb_left; + } else { + rb_prev = __rb_parent; + __rb_link = &__rb_parent->rb_right; + } + } + + *pprev = NULL; + if (rb_prev) + *pprev = rb_entry(rb_prev, struct gk20a_alloc_block, rb); + *rb_link = __rb_link; + *rb_parent = __rb_parent; + return block; +} + +/* return available space */ +static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align) +{ + if (addr >= limit) + return 0; + if (addr + len <= limit) + return len; + return (limit - addr) & ~(align - 1); +} + +/* update first_free_addr/last_free_addr based on new free addr + called when free block(s) and allocate block(s) */ +static void update_free_addr_cache(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *next, + u32 addr, u32 len, bool free) +{ + /* update from block free */ + if (free) { + if (allocator->first_free_addr > addr) + allocator->first_free_addr = addr; + } else { /* update from block alloc */ + if (allocator->last_free_addr < addr + len) + allocator->last_free_addr = addr + len; + if (allocator->first_free_addr == addr) { + if (!next || next->start > addr + len) + allocator->first_free_addr = addr + len; + else + allocator->first_free_addr = next->end; + } + } + + if (allocator->first_free_addr > allocator->last_free_addr) + allocator->first_free_addr = allocator->last_free_addr; +} + +/* find a free address range for a fixed len */ +static int find_free_area(struct gk20a_allocator *allocator, + u32 *addr, u32 len) +{ + struct gk20a_alloc_block *block; + u32 start_addr, search_base, search_limit; + + /* fixed addr allocation */ + /* note: constraints for fixed are handled by caller */ + if (*addr) { + block = find_block(allocator, *addr); + if (allocator->limit - len >= *addr && + (!block || *addr + len <= block->start)) { + update_free_addr_cache(allocator, block, + *addr, len, false); + return 0; + } else + return -ENOMEM; + } + + if (!allocator->constraint.enable) { + search_base = allocator->base; + search_limit = allocator->limit; + } else { + start_addr = *addr = allocator->constraint.base; + search_base = allocator->constraint.base; + search_limit = allocator->constraint.limit; + } + + /* cached_hole_size has max free space up to last_free_addr */ + if (len > allocator->cached_hole_size) + start_addr = *addr = allocator->last_free_addr; + else { + start_addr = *addr = allocator->base; + allocator->cached_hole_size = 0; + } + + allocator_dbg(allocator, "start search addr : %d", start_addr); + +full_search: + for (block = find_block(allocator, *addr);; block = block->next) { + if (search_limit - len < *addr) { + /* start a new search in case we missed any hole */ + if (start_addr != search_base) { + start_addr = *addr = search_base; + allocator->cached_hole_size = 0; + allocator_dbg(allocator, "start a new search from base"); + goto full_search; + } + return -ENOMEM; + } + if (!block || *addr + len <= block->start) { + update_free_addr_cache(allocator, block, + *addr, len, false); + allocator_dbg(allocator, "free space from %d, len %d", + *addr, len); + allocator_dbg(allocator, "next free addr: %d", + allocator->last_free_addr); + return 0; + } + if (*addr + allocator->cached_hole_size < block->start) + allocator->cached_hole_size = block->start - *addr; + *addr = block->end; + } +} + +/* find a free address range for as long as it meets alignment or meet len */ +static int find_free_area_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 *len) +{ + struct gk20a_alloc_block *block; + u32 start_addr; + u32 avail_len; + + /* fixed addr allocation */ + if (*addr) { + block = find_block(allocator, *addr); + if (allocator->limit - *len >= *addr) { + if (!block) + return 0; + + avail_len = check_free_space(*addr, block->start, + *len, allocator->align); + if (avail_len != 0) { + update_free_addr_cache(allocator, block, + *addr, avail_len, false); + allocator_dbg(allocator, + "free space between %d, %d, len %d", + *addr, block->start, avail_len); + allocator_dbg(allocator, "next free addr: %d", + allocator->last_free_addr); + *len = avail_len; + return 0; + } else + return -ENOMEM; + } else + return -ENOMEM; + } + + start_addr = *addr = allocator->first_free_addr; + + allocator_dbg(allocator, "start search addr : %d", start_addr); + + for (block = find_block(allocator, *addr);; block = block->next) { + if (allocator->limit - *len < *addr) + return -ENOMEM; + if (!block) { + update_free_addr_cache(allocator, block, + *addr, *len, false); + allocator_dbg(allocator, "free space from %d, len %d", + *addr, *len); + allocator_dbg(allocator, "next free addr: %d", + allocator->first_free_addr); + return 0; + } + + avail_len = check_free_space(*addr, block->start, + *len, allocator->align); + if (avail_len != 0) { + update_free_addr_cache(allocator, block, + *addr, avail_len, false); + allocator_dbg(allocator, "free space between %d, %d, len %d", + *addr, block->start, avail_len); + allocator_dbg(allocator, "next free addr: %d", + allocator->first_free_addr); + *len = avail_len; + return 0; + } + if (*addr + allocator->cached_hole_size < block->start) + allocator->cached_hole_size = block->start - *addr; + *addr = block->end; + } +} + +/* expand/shrink a block with new start and new end + split_block function provides insert block for shrink */ +static void adjust_block(struct gk20a_alloc_block *block, + u32 start, u32 end, struct gk20a_alloc_block *insert) +{ + struct gk20a_allocator *allocator = block->allocator; + + allocator_dbg(allocator, "curr block %d:%d, new start %d, new end %d", + block->start, block->end, start, end); + + /* expand */ + if (!insert) { + if (start == block->end) { + struct gk20a_alloc_block *next = block->next; + + if (next && end == next->start) { + /* ....AAAA.... */ + /* PPPP....NNNN */ + /* PPPPPPPPPPPP */ + unlink_block(allocator, next, block); + block->end = next->end; + kmem_cache_free(allocator->block_cache, next); + } else { + /* ....AAAA.... */ + /* PPPP........ */ + /* PPPPPPPP.... */ + block->end = end; + } + } + + if (end == block->start) { + /* ....AAAA.... */ + /* ........NNNN */ + /* PP..NNNNNNNN ....NNNNNNNN */ + block->start = start; + } + } else { /* shrink */ + /* BBBBBBBB -> BBBBIIII OR BBBBBBBB -> IIIIBBBB */ + block->start = start; + block->end = end; + insert_block(allocator, insert); + } +} + +/* given a range [addr, end], merge it with blocks before or after or both + if they can be combined into a contiguous block */ +static struct gk20a_alloc_block * +merge_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *prev, u32 addr, u32 end) +{ + struct gk20a_alloc_block *next; + + if (prev) + next = prev->next; + else + next = allocator->block_first; + + allocator_dbg(allocator, "curr block %d:%d", addr, end); + if (prev) + allocator_dbg(allocator, "prev block %d:%d", + prev->start, prev->end); + if (next) + allocator_dbg(allocator, "next block %d:%d", + next->start, next->end); + + /* don't merge with non-contiguous allocation block */ + if (prev && prev->end == addr && !prev->nc_block) { + adjust_block(prev, addr, end, NULL); + return prev; + } + + /* don't merge with non-contiguous allocation block */ + if (next && end == next->start && !next->nc_block) { + adjust_block(next, addr, end, NULL); + return next; + } + + return NULL; +} + +/* split a block based on addr. addr must be within (start, end). + if new_below == 1, link new block before adjusted current block */ +static int split_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, u32 addr, int new_below) +{ + struct gk20a_alloc_block *new_block; + + allocator_dbg(allocator, "start %d, split %d, end %d, new_below %d", + block->start, addr, block->end, new_below); + + BUG_ON(!(addr > block->start && addr < block->end)); + + new_block = kmem_cache_alloc(allocator->block_cache, GFP_KERNEL); + if (!new_block) + return -ENOMEM; + + *new_block = *block; + + if (new_below) + new_block->end = addr; + else + new_block->start = addr; + + if (new_below) + adjust_block(block, addr, block->end, new_block); + else + adjust_block(block, block->start, addr, new_block); + + return 0; +} + +/* free a list of blocks */ +static void free_blocks(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + struct gk20a_alloc_block *curr_block; + while (block) { + curr_block = block; + block = block->next; + kmem_cache_free(allocator->block_cache, curr_block); + } +} + +/* called with rw_sema acquired */ +static int block_alloc_single_locked(struct gk20a_allocator *allocator, + u32 *addr_req, u32 len) +{ + struct gk20a_alloc_block *block, *prev; + struct rb_node **rb_link, *rb_parent; + u32 addr = *addr_req; + int err; + + *addr_req = ~0; + + err = find_free_area(allocator, &addr, len); + if (err) + return err; + + find_block_prepare(allocator, addr, &prev, &rb_link, &rb_parent); + + /* merge requested free space with existing block(s) + if they can be combined into one contiguous block */ + block = merge_block(allocator, prev, addr, addr + len); + if (block) { + *addr_req = addr; + return 0; + } + + /* create a new block if cannot merge */ + block = kmem_cache_zalloc(allocator->block_cache, GFP_KERNEL); + if (!block) + return -ENOMEM; + + block->allocator = allocator; + block->start = addr; + block->end = addr + len; + + link_block(allocator, block, prev, rb_link, rb_parent); + + *addr_req = addr; + + return 0; +} + +static int block_alloc_list_locked(struct gk20a_allocator *allocator, + u32 *addr_req, u32 nc_len, struct gk20a_alloc_block **pblock) +{ + struct gk20a_alloc_block *block; + struct gk20a_alloc_block *nc_head = NULL, *nc_prev = NULL; + u32 addr = *addr_req, len = nc_len; + int err = 0; + + *addr_req = ~0; + + while (nc_len > 0) { + err = find_free_area_nc(allocator, &addr, &len); + if (err) { + allocator_dbg(allocator, "not enough free space"); + goto clean_up; + } + + /* never merge non-contiguous allocation block, + just create a new block */ + block = kmem_cache_zalloc(allocator->block_cache, + GFP_KERNEL); + if (!block) { + err = -ENOMEM; + goto clean_up; + } + + block->allocator = allocator; + block->start = addr; + block->end = addr + len; + + insert_block(allocator, block); + + block->nc_prev = nc_prev; + if (nc_prev) + nc_prev->nc_next = block; + nc_prev = block; + block->nc_block = true; + + if (!nc_head) + nc_head = block; + + if (*addr_req == ~0) + *addr_req = addr; + + addr = 0; + nc_len -= len; + len = nc_len; + allocator_dbg(allocator, "remaining length %d", nc_len); + } + +clean_up: + if (err) { + while (nc_head) { + unlink_block(allocator, nc_head, nc_head->prev); + nc_prev = nc_head; + nc_head = nc_head->nc_next; + kmem_cache_free(allocator->block_cache, nc_prev); + } + *pblock = NULL; + *addr_req = ~0; + } else { + *pblock = nc_head; + } + + return err; +} + +/* called with rw_sema acquired */ +static int block_free_locked(struct gk20a_allocator *allocator, + u32 addr, u32 len) +{ + struct gk20a_alloc_block *block, *prev, *last; + u32 end; + int err; + + /* no block has block->end > addr, already free */ + block = find_block_prev(allocator, addr, &prev); + if (!block) + return 0; + + allocator_dbg(allocator, "first block in free range %d:%d", + block->start, block->end); + + end = addr + len; + /* not in any block, already free */ + if (block->start >= end) + return 0; + + /* don't touch nc_block in range free */ + if (addr > block->start && !block->nc_block) { + int err = split_block(allocator, block, addr, 0); + if (err) + return err; + prev = block; + } + + last = find_block(allocator, end); + if (last && end > last->start && !last->nc_block) { + + allocator_dbg(allocator, "last block in free range %d:%d", + last->start, last->end); + + err = split_block(allocator, last, end, 1); + if (err) + return err; + } + + block = prev ? prev->next : allocator->block_first; + + allocator_dbg(allocator, "first block for free %d:%d", + block->start, block->end); + + /* remove blocks between [addr, addr + len) from rb tree + and put them in a list */ + block = unlink_blocks(allocator, block, prev, end); + free_blocks(allocator, block); + + update_free_addr_cache(allocator, NULL, addr, len, true); + + return 0; +} + +/* called with rw_sema acquired */ +static void block_free_list_locked(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *list) +{ + struct gk20a_alloc_block *block; + u32 len; + + update_free_addr_cache(allocator, NULL, + list->start, list->end - list->start, true); + + while (list) { + block = list; + unlink_block(allocator, block, block->prev); + + len = block->end - block->start; + if (allocator->cached_hole_size < len) + allocator->cached_hole_size = len; + + list = block->nc_next; + kmem_cache_free(allocator->block_cache, block); + } +} + +static int +gk20a_allocator_constrain(struct gk20a_allocator *a, + bool enable, u32 base, u32 limit) +{ + if (enable) { + a->constraint.enable = (base >= a->base && + limit <= a->limit); + if (!a->constraint.enable) + return -EINVAL; + a->constraint.base = base; + a->constraint.limit = limit; + a->first_free_addr = a->last_free_addr = base; + + } else { + a->constraint.enable = false; + a->first_free_addr = a->last_free_addr = a->base; + } + + a->cached_hole_size = 0; + + return 0; +} + +/* init allocator struct */ +int gk20a_allocator_init(struct gk20a_allocator *allocator, + const char *name, u32 start, u32 len, u32 align) +{ + memset(allocator, 0, sizeof(struct gk20a_allocator)); + + strncpy(allocator->name, name, 32); + + allocator->block_cache = + kmem_cache_create(allocator->name, + sizeof(struct gk20a_alloc_block), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + if (!allocator->block_cache) + return -ENOMEM; + + allocator->rb_root = RB_ROOT; + + allocator->base = start; + allocator->limit = start + len - 1; + allocator->align = align; + + allocator_dbg(allocator, "%s : base %d, limit %d, align %d", + allocator->name, allocator->base, + allocator->limit, allocator->align); + + allocator->first_free_addr = allocator->last_free_addr = start; + allocator->cached_hole_size = len; + + init_rwsem(&allocator->rw_sema); + + allocator->alloc = gk20a_allocator_block_alloc; + allocator->alloc_nc = gk20a_allocator_block_alloc_nc; + allocator->free = gk20a_allocator_block_free; + allocator->free_nc = gk20a_allocator_block_free_nc; + allocator->constrain = gk20a_allocator_constrain; + + return 0; +} + +/* destroy allocator, free all remaining blocks if any */ +void gk20a_allocator_destroy(struct gk20a_allocator *allocator) +{ + struct gk20a_alloc_block *block, *next; + u32 free_count = 0; + + down_write(&allocator->rw_sema); + + for (block = allocator->block_first; block; ) { + allocator_dbg(allocator, "free remaining block %d:%d", + block->start, block->end); + next = block->next; + kmem_cache_free(allocator->block_cache, block); + free_count++; + block = next; + } + + up_write(&allocator->rw_sema); + + /* block_count doesn't match real number of blocks */ + BUG_ON(free_count != allocator->block_count); + + kmem_cache_destroy(allocator->block_cache); + + memset(allocator, 0, sizeof(struct gk20a_allocator)); +} + +/* + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is + * returned to caller in *addr. + * + * contiguous allocation, which allocates one block of + * contiguous address. +*/ +int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, + u32 *addr, u32 len) +{ + int ret; +#if defined(ALLOCATOR_DEBUG) + struct gk20a_alloc_block *block; + bool should_fail = false; +#endif + + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + + if (*addr + len > allocator->limit || /* check addr range */ + *addr & (allocator->align - 1) || /* check addr alignment */ + len == 0) /* check len */ + return -EINVAL; + + if (allocator->constraint.enable && + (*addr + len > allocator->constraint.limit || + *addr > allocator->constraint.base)) + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -ENOMEM; + + down_write(&allocator->rw_sema); + +#if defined(ALLOCATOR_DEBUG) + if (*addr) { + for (block = allocator->block_first; + block; block = block->next) { + if (block->end > *addr && block->start < *addr + len) { + should_fail = true; + break; + } + } + } +#endif + + ret = block_alloc_single_locked(allocator, addr, len); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + bool allocated = false; + BUG_ON(should_fail); + BUG_ON(*addr < allocator->base); + BUG_ON(*addr + len > allocator->limit); + for (block = allocator->block_first; + block; block = block->next) { + if (!block->nc_block && + block->start <= *addr && + block->end >= *addr + len) { + allocated = true; + break; + } + } + BUG_ON(!allocated); + } +#endif + + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + + return ret; +} + +/* + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is + * returned to caller in *addr. + * + * non-contiguous allocation, which returns a list of blocks with aggregated + * size == len. Individual block size must meet alignment requirement. + */ +int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 len, struct gk20a_alloc_block **pblock) +{ + int ret; + + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + + BUG_ON(pblock == NULL); + *pblock = NULL; + + if (*addr + len > allocator->limit || /* check addr range */ + *addr & (allocator->align - 1) || /* check addr alignment */ + len == 0) /* check len */ + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -ENOMEM; + + down_write(&allocator->rw_sema); + + ret = block_alloc_list_locked(allocator, addr, len, pblock); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + struct gk20a_alloc_block *block = *pblock; + BUG_ON(!block); + BUG_ON(block->start < allocator->base); + while (block->nc_next) { + BUG_ON(block->end > block->nc_next->start); + block = block->nc_next; + } + BUG_ON(block->end > allocator->limit); + } +#endif + + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + + return ret; +} + +/* free all blocks between start and end */ +int gk20a_allocator_block_free(struct gk20a_allocator *allocator, + u32 addr, u32 len) +{ + int ret; + + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); + + if (addr + len > allocator->limit || /* check addr range */ + addr < allocator->base || + addr & (allocator->align - 1)) /* check addr alignment */ + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -EINVAL; + + down_write(&allocator->rw_sema); + + ret = block_free_locked(allocator, addr, len); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + struct gk20a_alloc_block *block; + for (block = allocator->block_first; + block; block = block->next) { + if (!block->nc_block) + BUG_ON(block->start >= addr && + block->end <= addr + len); + } + } +#endif + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); + + return ret; +} + +/* free non-contiguous allocation block list */ +void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + /* nothing to free */ + if (!block) + return; + + down_write(&allocator->rw_sema); + block_free_list_locked(allocator, block); + up_write(&allocator->rw_sema); +} + +#if defined(ALLOCATOR_DEBUG) + +#include + +/* test suite */ +void gk20a_allocator_test(void) +{ + struct gk20a_allocator allocator; + struct gk20a_alloc_block *list[5]; + u32 addr, len; + u32 count; + int n; + + gk20a_allocator_init(&allocator, "test", 0, 10, 1); + + /* alloc/free a single block in the beginning */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + /* alloc/free a single block in the middle */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + /* alloc/free a single block in the end */ + addr = 8; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + + /* allocate contiguous blocks */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 4); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 4); + gk20a_allocator_dump(&allocator); + + /* no free space */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + + /* free in the end */ + gk20a_allocator_block_free(&allocator, 8, 2); + gk20a_allocator_dump(&allocator); + /* free in the beginning */ + gk20a_allocator_block_free(&allocator, 0, 2); + gk20a_allocator_dump(&allocator); + /* free in the middle */ + gk20a_allocator_block_free(&allocator, 4, 2); + gk20a_allocator_dump(&allocator); + + /* merge case PPPPAAAANNNN */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + /* merge case ....AAAANNNN */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + /* merge case PPPPAAAA.... */ + addr = 8; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + + /* test free across multiple blocks and split */ + gk20a_allocator_block_free(&allocator, 2, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, 6, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, 1, 8); + gk20a_allocator_dump(&allocator); + + /* test non-contiguous allocation */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc_nc(&allocator, &addr, 5, &list[0]); + gk20a_allocator_dump(&allocator); + gk20a_allocator_dump_nc_list(&allocator, list[0]); + + /* test free a range overlaping non-contiguous blocks */ + gk20a_allocator_block_free(&allocator, 2, 6); + gk20a_allocator_dump(&allocator); + + /* test non-contiguous free */ + gk20a_allocator_block_free_nc(&allocator, list[0]); + gk20a_allocator_dump(&allocator); + + gk20a_allocator_destroy(&allocator); + + /* random stress test */ + gk20a_allocator_init(&allocator, "test", 4096, 4096 * 1024, 4096); + for (;;) { + pr_debug("alloc tests...\n"); + for (count = 0; count < 50; count++) { + addr = 0; + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_alloc(&allocator, &addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 30; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("non-contiguous alloc tests...\n"); + for (n = 0; n < 5; n++) { + addr = 0; + len = random32() % (4096 * 1024 / 8); + gk20a_allocator_block_alloc_nc(&allocator, &addr, + len, &list[n]); + gk20a_allocator_dump(&allocator); + gk20a_allocator_dump_nc_list(&allocator, list[n]); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("non-contiguous free tests...\n"); + for (n = 4; n >= 0; n--) { + gk20a_allocator_dump_nc_list(&allocator, list[n]); + gk20a_allocator_block_free_nc(&allocator, list[n]); + gk20a_allocator_dump(&allocator); + } + + pr_debug("fixed addr alloc tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 32); + gk20a_allocator_block_alloc(&allocator, &addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + } + gk20a_allocator_destroy(&allocator); +} + +#endif /* ALLOCATOR_DEBUG */ + diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h new file mode 100644 index 00000000..dba397e2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -0,0 +1,177 @@ +/* + * gk20a allocator + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVHOST_ALLOCATOR_H__ +#define __NVHOST_ALLOCATOR_H__ + +#include +#include +#include + +/* #define ALLOCATOR_DEBUG */ + +struct allocator_block; + +/* main struct */ +struct gk20a_allocator { + + char name[32]; /* name for allocator */ + struct rb_root rb_root; /* rb tree root for blocks */ + + u32 base; /* min value of this linear space */ + u32 limit; /* max value = limit - 1 */ + u32 align; /* alignment size, power of 2 */ + + struct gk20a_alloc_block *block_first; /* first block in list */ + struct gk20a_alloc_block *block_recent; /* last visited block */ + + u32 first_free_addr; /* first free addr, non-contigous + allocation preferred start, + in order to pick up small holes */ + u32 last_free_addr; /* last free addr, contiguous + allocation preferred start */ + u32 cached_hole_size; /* max free hole size up to + last_free_addr */ + u32 block_count; /* number of blocks */ + + struct rw_semaphore rw_sema; /* lock */ + struct kmem_cache *block_cache; /* slab cache */ + + /* if enabled, constrain to [base, limit) */ + struct { + bool enable; + u32 base; + u32 limit; + } constraint; + + int (*alloc)(struct gk20a_allocator *allocator, + u32 *addr, u32 len); + int (*alloc_nc)(struct gk20a_allocator *allocator, + u32 *addr, u32 len, + struct gk20a_alloc_block **pblock); + int (*free)(struct gk20a_allocator *allocator, + u32 addr, u32 len); + void (*free_nc)(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block); + + int (*constrain)(struct gk20a_allocator *a, + bool enable, + u32 base, u32 limit); +}; + +/* a block of linear space range [start, end) */ +struct gk20a_alloc_block { + struct gk20a_allocator *allocator; /* parent allocator */ + struct rb_node rb; /* rb tree node */ + + u32 start; /* linear space range + [start, end) */ + u32 end; + + void *priv; /* backing structure for this + linear space block + page table, comp tag, etc */ + + struct gk20a_alloc_block *prev; /* prev block with lower address */ + struct gk20a_alloc_block *next; /* next block with higher address */ + + bool nc_block; + struct gk20a_alloc_block *nc_prev; /* prev block for + non-contiguous allocation */ + struct gk20a_alloc_block *nc_next; /* next block for + non-contiguous allocation */ +}; + +int gk20a_allocator_init(struct gk20a_allocator *allocator, + const char *name, u32 base, u32 size, u32 align); +void gk20a_allocator_destroy(struct gk20a_allocator *allocator); + +int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, + u32 *addr, u32 len); +int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 len, + struct gk20a_alloc_block **pblock); + +int gk20a_allocator_block_free(struct gk20a_allocator *allocator, + u32 addr, u32 len); +void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block); + +#if defined(ALLOCATOR_DEBUG) + +#define allocator_dbg(alloctor, format, arg...) \ +do { \ + if (1) \ + pr_debug("gk20a_allocator (%s) %s: " format "\n",\ + alloctor->name, __func__, ##arg);\ +} while (0) + +static inline void +gk20a_allocator_dump(struct gk20a_allocator *allocator) { + struct gk20a_alloc_block *block; + u32 count = 0; + + down_read(&allocator->rw_sema); + for (block = allocator->block_first; block; block = block->next) { + allocator_dbg(allocator, "block %d - %d:%d, nc %d", + count++, block->start, block->end, block->nc_block); + + if (block->prev) + BUG_ON(block->prev->end > block->start); + if (block->next) + BUG_ON(block->next->start < block->end); + } + allocator_dbg(allocator, "tracked count %d, actual count %d", + allocator->block_count, count); + allocator_dbg(allocator, "first block %d:%d", + allocator->block_first ? allocator->block_first->start : -1, + allocator->block_first ? allocator->block_first->end : -1); + allocator_dbg(allocator, "first free addr %d", + allocator->first_free_addr); + allocator_dbg(allocator, "last free addr %d", + allocator->last_free_addr); + allocator_dbg(allocator, "cached hole size %d", + allocator->cached_hole_size); + up_read(&allocator->rw_sema); + + BUG_ON(count != allocator->block_count); +} + +static inline void +gk20a_allocator_dump_nc_list( + struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + down_read(&allocator->rw_sema); + while (block) { + pr_debug("non-contiguous block %d:%d\n", + block->start, block->end); + block = block->nc_next; + } + up_read(&allocator->rw_sema); +} + +void gk20a_allocator_test(void); + +#else /* ALLOCATOR_DEBUG */ + +#define allocator_dbg(format, arg...) + +#endif /* ALLOCATOR_DEBUG */ + +#endif /*__NVHOST_ALLOCATOR_H__ */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c new file mode 100644 index 00000000..c6478a5e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2012-2014, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * This file is autogenerated. Do not edit. + */ + +#ifndef __gk20a_gating_reglist_h__ +#define __gk20a_gating_reglist_h__ + +#include +#include "gk20a_gating_reglist.h" + +struct gating_desc { + u32 addr; + u32 prod; + u32 disable; +}; +/* slcg gr */ +const struct gating_desc gk20a_slcg_gr[] = { + {.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe}, + {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe}, + {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00405910, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe}, + {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000001e}, + {.addr = 0x0041a894, .prod = 0x00000000, .disable = 0x0003fffe}, + {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e}, + {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e}, + {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001}, + {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00418c74, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00418cf4, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00418d74, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00418f10, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00418e10, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00419024, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e}, + {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe}, + {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e}, + {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419ad0, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x0041986c, .prod = 0x0000dfc0, .disable = 0x00fffffe}, + {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe}, + {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe}, + {.addr = 0x00419c74, .prod = 0x00000000, .disable = 0x0000001e}, + {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe}, + {.addr = 0x00419fdc, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00419fe4, .prod = 0x00000000, .disable = 0x0000000e}, + {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe}, + {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x0041be2c, .prod = 0x020bbfc0, .disable = 0xfffffffe}, + {.addr = 0x0041bfec, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x0041bed4, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe}, + {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff}, + {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0x00fffffe}, + {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001}, + {.addr = 0x0010e48c, .prod = 0x00000000, .disable = 0x0000003e}, + {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000000fe}, + {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe}, + {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f}, + {.addr = 0x0017ea98, .prod = 0x00000000, .disable = 0xfffffffe}, + {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe}, + {.addr = 0x00120048, .prod = 0x00000000, .disable = 0x00000049}, +}; + +/* slcg perf */ +const struct gating_desc gk20a_slcg_perf[] = { + {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000}, + {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000}, + {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000}, + {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000}, +}; + +/* blcg gr */ +const struct gating_desc gk20a_blcg_gr[] = { + {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000}, + {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000}, + {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000}, + {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000}, + {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000}, + {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000}, + {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x00419a40, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419acc, .prod = 0x00004047, .disable = 0x00000000}, + {.addr = 0x00419868, .prod = 0x00000043, .disable = 0x00000000}, + {.addr = 0x00419cd4, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419cdc, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419c70, .prod = 0x00004045, .disable = 0x00000000}, + {.addr = 0x00419fd0, .prod = 0x00004043, .disable = 0x00000000}, + {.addr = 0x00419fd8, .prod = 0x00004045, .disable = 0x00000000}, + {.addr = 0x00419fe0, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419fe8, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419ff0, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00419ff8, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00419f90, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000}, + {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000}, + {.addr = 0x0017ea60, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x0017ea68, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000}, + {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000}, + {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000}, + {.addr = 0x0017ea78, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x00106f24, .prod = 0x0000c242, .disable = 0x00000000}, + {.addr = 0x0041be00, .prod = 0x00000004, .disable = 0x00000007}, + {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000}, + {.addr = 0x0017ea70, .prod = 0x00000044, .disable = 0x00000000}, + {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000}, + {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000}, + {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000}, +}; + +/* pg gr */ +const struct gating_desc gk20a_pg_gr[] = { + {.addr = 0x004041f8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x004041fc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00409898, .prod = 0x10140000, .disable = 0x00000000}, + {.addr = 0x0040989c, .prod = 0xff00000a, .disable = 0x00000000}, + {.addr = 0x004078c8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x004078cc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00406008, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0040600c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00405868, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0040586c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00405914, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00405924, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408048, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0040804c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00407008, .prod = 0x10140000, .disable = 0x00000000}, + {.addr = 0x0040700c, .prod = 0xff00000a, .disable = 0x00000000}, + {.addr = 0x00405bf8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00405bfc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x0041a898, .prod = 0x10140000, .disable = 0x00000000}, + {.addr = 0x0041a89c, .prod = 0xff00000a, .disable = 0x00000000}, + {.addr = 0x00418510, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418514, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418610, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418614, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418690, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418694, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418720, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418724, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418840, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418844, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418bc4, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418bc8, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418978, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0041897c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418c78, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418c7c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418cf8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418cfc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418d78, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418d7c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418f14, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418f18, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00418e14, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00418e18, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419030, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419050, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419a88, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419a8c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419a90, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419a94, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419a98, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419a9c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419aa0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419aa4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419ad4, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419ad8, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419870, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419874, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419ce4, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419cf0, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419c78, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419c7c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fa0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fa4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fa8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fac, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fb0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fb4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fb8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fbc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fc0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fc4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00419fc8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00419fcc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x0041be30, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0041be34, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x0041bff0, .prod = 0x10747c00, .disable = 0x00000000}, + {.addr = 0x0041bff4, .prod = 0xff00000a, .disable = 0x00000000}, + {.addr = 0x0041bed8, .prod = 0x10240a00, .disable = 0x00000000}, + {.addr = 0x0041bee0, .prod = 0xff00000a, .disable = 0x00000000}, + {.addr = 0x00408820, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408824, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408828, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x0040882c, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ac0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408ac4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ac8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408acc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ad0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408ad4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ad8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408adc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ae0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408ae4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x00408ae8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x00408aec, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x004089c0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x004089c4, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x004089c8, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x004089cc, .prod = 0xff00a725, .disable = 0x00000000}, + {.addr = 0x004089d0, .prod = 0x10940000, .disable = 0x00000000}, + {.addr = 0x004089d4, .prod = 0xff00a725, .disable = 0x00000000}, +}; + +/* therm gr */ +const struct gating_desc gk20a_slcg_therm[] = { + {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f}, +}; + +/* static inline functions */ +void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g, + bool prod) +{ + u32 i; + u32 size = sizeof(gk20a_slcg_gr) / sizeof(struct gating_desc); + for (i = 0; i < size; i++) { + if (prod) + gk20a_writel(g, gk20a_slcg_gr[i].addr, + gk20a_slcg_gr[i].prod); + else + gk20a_writel(g, gk20a_slcg_gr[i].addr, + gk20a_slcg_gr[i].disable); + } +} + +void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g, + bool prod) +{ + u32 i; + u32 size = sizeof(gk20a_slcg_perf) / sizeof(struct gating_desc); + for (i = 0; i < size; i++) { + if (prod) + gk20a_writel(g, gk20a_slcg_perf[i].addr, + gk20a_slcg_perf[i].prod); + else + gk20a_writel(g, gk20a_slcg_perf[i].addr, + gk20a_slcg_perf[i].disable); + } +} + +void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g, + bool prod) +{ + u32 i; + u32 size = sizeof(gk20a_blcg_gr) / sizeof(struct gating_desc); + for (i = 0; i < size; i++) { + if (prod) + gk20a_writel(g, gk20a_blcg_gr[i].addr, + gk20a_blcg_gr[i].prod); + else + gk20a_writel(g, gk20a_blcg_gr[i].addr, + gk20a_blcg_gr[i].disable); + } +} + +void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g, + bool prod) +{ + u32 i; + u32 size = sizeof(gk20a_pg_gr) / sizeof(struct gating_desc); + for (i = 0; i < size; i++) { + if (prod) + gk20a_writel(g, gk20a_pg_gr[i].addr, + gk20a_pg_gr[i].prod); + else + gk20a_writel(g, gk20a_pg_gr[i].addr, + gk20a_pg_gr[i].disable); + } +} + +void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g, + bool prod) +{ + u32 i; + u32 size = sizeof(gk20a_slcg_therm) / sizeof(struct gating_desc); + for (i = 0; i < size; i++) { + if (prod) + gk20a_writel(g, gk20a_slcg_therm[i].addr, + gk20a_slcg_therm[i].prod); + else + gk20a_writel(g, gk20a_slcg_therm[i].addr, + gk20a_slcg_therm[i].disable); + } +} + +#endif /* __gk20a_gating_reglist_h__ */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h new file mode 100644 index 00000000..40a6c545 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h @@ -0,0 +1,39 @@ +/* + * drivers/video/tegra/host/gk20a/gk20a_gating_reglist.h + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * This file is autogenerated. Do not edit. + */ + +#include "gk20a.h" + +void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g, + bool prod); + +void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g, + bool prod); + +void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g, + bool prod); + +void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g, + bool prod); + +void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g, + bool prod); + + diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c new file mode 100644 index 00000000..d1fd71fe --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c @@ -0,0 +1,358 @@ +/* + * gk20a clock scaling profile + * + * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gk20a.h" +#include "pmu_gk20a.h" +#include "clk_gk20a.h" +#include "gk20a_scale.h" + +static ssize_t gk20a_scale_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct gk20a *g = get_gk20a(pdev); + u32 busy_time; + ssize_t res; + + if (!g->power_on) { + busy_time = 0; + } else { + gk20a_busy(g->dev); + gk20a_pmu_load_norm(g, &busy_time); + gk20a_idle(g->dev); + } + + res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); + + return res; +} + +static DEVICE_ATTR(load, S_IRUGO, gk20a_scale_load_show, NULL); + +/* + * gk20a_scale_qos_notify() + * + * This function is called when the minimum QoS requirement for the device + * has changed. The function calls postscaling callback if it is defined. + */ + +static int gk20a_scale_qos_notify(struct notifier_block *nb, + unsigned long n, void *p) +{ + struct gk20a_scale_profile *profile = + container_of(nb, struct gk20a_scale_profile, + qos_notify_block); + struct gk20a_platform *platform = platform_get_drvdata(profile->pdev); + struct gk20a *g = get_gk20a(profile->pdev); + unsigned long freq; + + if (!platform->postscale) + return NOTIFY_OK; + + /* get the frequency requirement. if devfreq is enabled, check if it + * has higher demand than qos */ + freq = gk20a_clk_round_rate(g, pm_qos_request(platform->qos_id)); + if (g->devfreq) + freq = max(g->devfreq->previous_freq, freq); + + platform->postscale(profile->pdev, freq); + + return NOTIFY_OK; +} + +/* + * gk20a_scale_make_freq_table(profile) + * + * This function initialises the frequency table for the given device profile + */ + +static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) +{ + struct gk20a *g = get_gk20a(profile->pdev); + unsigned long *freqs; + int num_freqs, err; + + /* make sure the clock is available */ + if (!gk20a_clk_get(g)) + return -ENOSYS; + + /* get gpu dvfs table */ + err = tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), + &freqs, &num_freqs); + if (err) + return -ENOSYS; + + profile->devfreq_profile.freq_table = (unsigned long *)freqs; + profile->devfreq_profile.max_state = num_freqs; + + return 0; +} + +/* + * gk20a_scale_target(dev, *freq, flags) + * + * This function scales the clock + */ + +static int gk20a_scale_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct gk20a *g = get_gk20a(to_platform_device(dev)); + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + unsigned long rounded_rate = gk20a_clk_round_rate(g, *freq); + + if (gk20a_clk_get_rate(g) == rounded_rate) { + *freq = rounded_rate; + return 0; + } + + gk20a_clk_set_rate(g, rounded_rate); + if (platform->postscale) + platform->postscale(profile->pdev, rounded_rate); + *freq = gk20a_clk_get_rate(g); + + return 0; +} + +/* + * update_load_estimate_gpmu(profile) + * + * Update load estimate using gpmu. The gpmu value is normalised + * based on the time it was asked last time. + */ + +static void update_load_estimate_gpmu(struct platform_device *pdev) +{ + struct gk20a *g = get_gk20a(pdev); + struct gk20a_scale_profile *profile = g->scale_profile; + unsigned long dt; + u32 busy_time; + ktime_t t; + + t = ktime_get(); + dt = ktime_us_delta(t, profile->last_event_time); + + profile->dev_stat.total_time = dt; + profile->last_event_time = t; + gk20a_pmu_load_norm(g, &busy_time); + profile->dev_stat.busy_time = (busy_time * dt) / 1000; +} + +/* + * gk20a_scale_suspend(pdev) + * + * This function informs devfreq of suspend + */ + +void gk20a_scale_suspend(struct platform_device *pdev) +{ + struct gk20a *g = get_gk20a(pdev); + struct devfreq *devfreq = g->devfreq; + + if (!devfreq) + return; + + devfreq_suspend_device(devfreq); +} + +/* + * gk20a_scale_resume(pdev) + * + * This functions informs devfreq of resume + */ + +void gk20a_scale_resume(struct platform_device *pdev) +{ + struct gk20a *g = get_gk20a(pdev); + struct devfreq *devfreq = g->devfreq; + + if (!devfreq) + return; + + devfreq_resume_device(devfreq); +} + +/* + * gk20a_scale_notify(pdev, busy) + * + * Calling this function informs that the device is idling (..or busy). This + * data is used to estimate the current load + */ + +static void gk20a_scale_notify(struct platform_device *pdev, bool busy) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct gk20a *g = get_gk20a(pdev); + struct gk20a_scale_profile *profile = g->scale_profile; + struct devfreq *devfreq = g->devfreq; + + /* inform edp about new constraint */ + if (platform->prescale) + platform->prescale(pdev); + + /* Is the device profile initialised? */ + if (!(profile && devfreq)) + return; + + mutex_lock(&devfreq->lock); + profile->dev_stat.busy = busy; + update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); +} + +void gk20a_scale_notify_idle(struct platform_device *pdev) +{ + gk20a_scale_notify(pdev, false); + +} + +void gk20a_scale_notify_busy(struct platform_device *pdev) +{ + gk20a_scale_notify(pdev, true); +} + +/* + * gk20a_scale_get_dev_status(dev, *stat) + * + * This function queries the current device status. + */ + +static int gk20a_scale_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct gk20a *g = get_gk20a(to_platform_device(dev)); + struct gk20a_scale_profile *profile = g->scale_profile; + + /* Make sure there are correct values for the current frequency */ + profile->dev_stat.current_frequency = gk20a_clk_get_rate(g); + + /* Update load estimate */ + update_load_estimate_gpmu(to_platform_device(dev)); + + /* Copy the contents of the current device status */ + *stat = profile->dev_stat; + + /* Finally, clear out the local values */ + profile->dev_stat.total_time = 0; + profile->dev_stat.busy_time = 0; + + return 0; +} + +/* + * gk20a_scale_init(pdev) + */ + +void gk20a_scale_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct gk20a *g = platform->g; + struct gk20a_scale_profile *profile; + int err; + + if (g->scale_profile) + return; + + profile = kzalloc(sizeof(*profile), GFP_KERNEL); + + profile->pdev = pdev; + profile->dev_stat.busy = false; + + /* Create frequency table */ + err = gk20a_scale_make_freq_table(profile); + if (err || !profile->devfreq_profile.max_state) + goto err_get_freqs; + + if (device_create_file(&pdev->dev, &dev_attr_load)) + goto err_create_sysfs_entry; + + /* Store device profile so we can access it if devfreq governor + * init needs that */ + g->scale_profile = profile; + + if (platform->devfreq_governor) { + struct devfreq *devfreq; + + profile->devfreq_profile.initial_freq = + profile->devfreq_profile.freq_table[0]; + profile->devfreq_profile.target = gk20a_scale_target; + profile->devfreq_profile.get_dev_status = + gk20a_scale_get_dev_status; + + devfreq = devfreq_add_device(&pdev->dev, + &profile->devfreq_profile, + platform->devfreq_governor, NULL); + + if (IS_ERR(devfreq)) + devfreq = NULL; + + g->devfreq = devfreq; + } + + /* Should we register QoS callback for this device? */ + if (platform->qos_id < PM_QOS_NUM_CLASSES && + platform->qos_id != PM_QOS_RESERVED && + platform->postscale) { + profile->qos_notify_block.notifier_call = + &gk20a_scale_qos_notify; + pm_qos_add_notifier(platform->qos_id, + &profile->qos_notify_block); + } + + return; + +err_get_freqs: + device_remove_file(&pdev->dev, &dev_attr_load); +err_create_sysfs_entry: + kfree(g->scale_profile); + g->scale_profile = NULL; +} + +/* + * gk20a_scale_hw_init(dev) + * + * Initialize hardware portion of the device + */ + +void gk20a_scale_hw_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + /* make sure that scaling has bee initialised */ + if (!profile) + return; + + profile->dev_stat.total_time = 0; + profile->last_event_time = ktime_get(); +} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.h b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h new file mode 100644 index 00000000..e76b1662 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h @@ -0,0 +1,51 @@ +/* + * gk20a clock scaling profile + * + * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef GK20A_SCALE_H +#define GK20A_SCALE_H + +#include +#include + +struct platform_device; +struct clk; + +struct gk20a_scale_profile { + struct platform_device *pdev; + ktime_t last_event_time; + struct devfreq_dev_profile devfreq_profile; + struct devfreq_dev_status dev_stat; + struct notifier_block qos_notify_block; + void *private_data; +}; + +/* Initialization and de-initialization for module */ +void gk20a_scale_init(struct platform_device *); +void gk20a_scale_hw_init(struct platform_device *pdev); + +/* + * call when performing submit to notify scaling mechanism that the module is + * in use + */ +void gk20a_scale_notify_busy(struct platform_device *); +void gk20a_scale_notify_idle(struct platform_device *); + +void gk20a_scale_suspend(struct platform_device *); +void gk20a_scale_resume(struct platform_device *); + +#endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c new file mode 100644 index 00000000..f6b43f50 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c @@ -0,0 +1,335 @@ +/* + * drivers/video/tegra/host/gk20a/gk20a_sysfs.c + * + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include "gk20a.h" +#include "gr_gk20a.h" +#include "fifo_gk20a.h" + + +#define PTIMER_FP_FACTOR 1000000 +/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is + the resolution of ptimer. */ +#define PTIMER_REF_FREQ_HZ 31250000 + + +static ssize_t elcg_enable_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + gk20a_busy(g->dev); + if (val) { + g->elcg_enabled = true; + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); + } else { + g->elcg_enabled = false; + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + } + gk20a_idle(g->dev); + + dev_info(device, "ELCG is %s.\n", g->elcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t elcg_enable_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + + return sprintf(buf, "%d\n", g->elcg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(elcg_enable, S_IRWXUGO, elcg_enable_read, elcg_enable_store); + +static ssize_t blcg_enable_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) + g->blcg_enabled = true; + else + g->blcg_enabled = false; + + gk20a_busy(g->dev); + g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); + gk20a_idle(g->dev); + + dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t blcg_enable_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + + return sprintf(buf, "%d\n", g->blcg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(blcg_enable, S_IRWXUGO, blcg_enable_read, blcg_enable_store); + +static ssize_t slcg_enable_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) + g->slcg_enabled = true; + else + g->slcg_enabled = false; + + /* + * TODO: slcg_therm_load_gating is not enabled anywhere during + * init. Therefore, it would be incongruous to add it here. Once + * it is added to init, we should add it here too. + */ + gk20a_busy(g->dev); + g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled); + g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); + gk20a_idle(g->dev); + + dev_info(device, "SLCG is %s.\n", g->slcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t slcg_enable_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + + return sprintf(buf, "%d\n", g->slcg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(slcg_enable, S_IRWXUGO, slcg_enable_read, slcg_enable_store); + +static ssize_t ptimer_scale_factor_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u32 tsc_freq_hz = clk_get_rate(clk_get_sys(NULL, "clk_m")); + u32 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / + ((u32)(tsc_freq_hz) / + (u32)(PTIMER_FP_FACTOR)); + ssize_t res = snprintf(buf, + PAGE_SIZE, + "%u.%u\n", + scaling_factor_fp / PTIMER_FP_FACTOR, + scaling_factor_fp % PTIMER_FP_FACTOR); + + return res; +} + +static DEVICE_ATTR(ptimer_scale_factor, + S_IRUGO, + ptimer_scale_factor_show, + NULL); + +static ssize_t railgate_delay_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int railgate_delay = 0, ret = 0; + + if (!platform->can_railgate) { + dev_info(dev, "does not support power-gating\n"); + return count; + } + + ret = sscanf(buf, "%d", &railgate_delay); + if (ret == 1 && railgate_delay >= 0) { + struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); + platform->railgate_delay = railgate_delay; + pm_genpd_set_poweroff_delay(genpd, platform->railgate_delay); + } else + dev_err(dev, "Invalid powergate delay\n"); + + return count; +} +static ssize_t railgate_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", platform->railgate_delay); +} +static DEVICE_ATTR(railgate_delay, S_IRWXUGO, railgate_delay_show, + railgate_delay_store); + +static ssize_t clockgate_delay_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int clockgate_delay = 0, ret = 0; + + ret = sscanf(buf, "%d", &clockgate_delay); + if (ret == 1 && clockgate_delay >= 0) { + platform->clockgate_delay = clockgate_delay; + pm_runtime_set_autosuspend_delay(dev, + platform->clockgate_delay); + } else + dev_err(dev, "Invalid clockgate delay\n"); + + return count; +} +static ssize_t clockgate_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", platform->clockgate_delay); +} +static DEVICE_ATTR(clockgate_delay, S_IRWXUGO, clockgate_delay_show, + clockgate_delay_store); + +static ssize_t counters_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct gk20a *g = get_gk20a(pdev); + u32 busy_cycles, total_cycles; + ssize_t res; + + gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles); + + res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); + + return res; +} + +static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); +static ssize_t counters_show_reset(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t res = counters_show(dev, attr, buf); + struct platform_device *pdev = to_platform_device(dev); + struct gk20a *g = get_gk20a(pdev); + + gk20a_pmu_reset_load_counters(g); + + return res; +} + +static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); + +static ssize_t elpg_enable_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + /* + * Since elpg is refcounted, we should not unnecessarily call + * enable/disable if it is already so. + */ + gk20a_channel_busy(g->dev); + if (val && !g->elpg_enabled) { + g->elpg_enabled = true; + gk20a_pmu_enable_elpg(g); + } else if (!val && g->elpg_enabled) { + g->elpg_enabled = false; + gk20a_pmu_disable_elpg(g); + } + gk20a_channel_idle(g->dev); + + dev_info(device, "ELPG is %s.\n", g->elpg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t elpg_enable_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + + return sprintf(buf, "%d\n", g->elpg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(elpg_enable, S_IRWXUGO, elpg_enable_read, elpg_enable_store); + +void gk20a_remove_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_elcg_enable); + device_remove_file(dev, &dev_attr_blcg_enable); + device_remove_file(dev, &dev_attr_slcg_enable); + device_remove_file(dev, &dev_attr_ptimer_scale_factor); + device_remove_file(dev, &dev_attr_elpg_enable); + device_remove_file(dev, &dev_attr_counters); + device_remove_file(dev, &dev_attr_counters_reset); + device_remove_file(dev, &dev_attr_railgate_delay); + device_remove_file(dev, &dev_attr_clockgate_delay); +} + +void gk20a_create_sysfs(struct platform_device *dev) +{ + int error = 0; + + error |= device_create_file(&dev->dev, &dev_attr_elcg_enable); + error |= device_create_file(&dev->dev, &dev_attr_blcg_enable); + error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); + error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); + error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); + error |= device_create_file(&dev->dev, &dev_attr_counters); + error |= device_create_file(&dev->dev, &dev_attr_counters_reset); + error |= device_create_file(&dev->dev, &dev_attr_railgate_delay); + error |= device_create_file(&dev->dev, &dev_attr_clockgate_delay); + + if (error) + dev_err(&dev->dev, "Failed to create sysfs attributes!\n"); +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c new file mode 100644 index 00000000..59404f1d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c @@ -0,0 +1,333 @@ +/* + * drivers/video/tegra/host/gk20a/gr_ctx_gk20a.c + * + * GK20A Graphics Context + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include "gk20a.h" +#include "gr_ctx_gk20a.h" +#include "hw_gr_gk20a.h" + +static int gr_gk20a_alloc_load_netlist_u32(u32 *src, u32 len, + struct u32_list_gk20a *u32_list) +{ + u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32); + if (!alloc_u32_list_gk20a(u32_list)) + return -ENOMEM; + + memcpy(u32_list->l, src, len); + + return 0; +} + +static int gr_gk20a_alloc_load_netlist_av(u32 *src, u32 len, + struct av_list_gk20a *av_list) +{ + av_list->count = len / sizeof(struct av_gk20a); + if (!alloc_av_list_gk20a(av_list)) + return -ENOMEM; + + memcpy(av_list->l, src, len); + + return 0; +} + +static int gr_gk20a_alloc_load_netlist_aiv(u32 *src, u32 len, + struct aiv_list_gk20a *aiv_list) +{ + aiv_list->count = len / sizeof(struct aiv_gk20a); + if (!alloc_aiv_list_gk20a(aiv_list)) + return -ENOMEM; + + memcpy(aiv_list->l, src, len); + + return 0; +} + +static int gr_gk20a_get_netlist_name(int index, char *name) +{ + switch (index) { +#ifdef GK20A_NETLIST_IMAGE_FW_NAME + case NETLIST_FINAL: + sprintf(name, GK20A_NETLIST_IMAGE_FW_NAME); + return 0; +#endif +#ifdef GK20A_NETLIST_IMAGE_A + case NETLIST_SLOT_A: + sprintf(name, GK20A_NETLIST_IMAGE_A); + return 0; +#endif +#ifdef GK20A_NETLIST_IMAGE_B + case NETLIST_SLOT_B: + sprintf(name, GK20A_NETLIST_IMAGE_B); + return 0; +#endif +#ifdef GK20A_NETLIST_IMAGE_C + case NETLIST_SLOT_C: + sprintf(name, GK20A_NETLIST_IMAGE_C); + return 0; +#endif +#ifdef GK20A_NETLIST_IMAGE_D + case NETLIST_SLOT_D: + sprintf(name, GK20A_NETLIST_IMAGE_D); + return 0; +#endif + default: + return -1; + } + + return -1; +} + +static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) +{ + struct device *d = dev_from_gk20a(g); + const struct firmware *netlist_fw; + struct netlist_image *netlist = NULL; + char name[MAX_NETLIST_NAME]; + u32 i, major_v = ~0, major_v_hw, netlist_num; + int net, max, err = -ENOENT; + + gk20a_dbg_fn(""); + +#ifdef GK20A_NETLIST_IMAGE_FW_NAME + net = NETLIST_FINAL; + max = 0; + major_v_hw = ~0; + g->gr.ctx_vars.dynamic = false; +#else + net = NETLIST_SLOT_A; + max = MAX_NETLIST; + major_v_hw = gk20a_readl(g, gr_fecs_ctx_state_store_major_rev_id_r()); + g->gr.ctx_vars.dynamic = true; +#endif + + for (; net < max; net++) { + + if (gr_gk20a_get_netlist_name(net, name) != 0) { + gk20a_warn(d, "invalid netlist index %d", net); + continue; + } + + netlist_fw = gk20a_request_firmware(g, name); + if (!netlist_fw) { + gk20a_warn(d, "failed to load netlist %s", name); + continue; + } + + netlist = (struct netlist_image *)netlist_fw->data; + + for (i = 0; i < netlist->header.regions; i++) { + u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset); + u32 size = netlist->regions[i].data_size; + + switch (netlist->regions[i].region_id) { + case NETLIST_REGIONID_FECS_UCODE_DATA: + gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_DATA"); + err = gr_gk20a_alloc_load_netlist_u32( + src, size, &g->gr.ctx_vars.ucode.fecs.data); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_FECS_UCODE_INST: + gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_INST"); + err = gr_gk20a_alloc_load_netlist_u32( + src, size, &g->gr.ctx_vars.ucode.fecs.inst); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_GPCCS_UCODE_DATA: + gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_DATA"); + err = gr_gk20a_alloc_load_netlist_u32( + src, size, &g->gr.ctx_vars.ucode.gpccs.data); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_GPCCS_UCODE_INST: + gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_INST"); + err = gr_gk20a_alloc_load_netlist_u32( + src, size, &g->gr.ctx_vars.ucode.gpccs.inst); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_SW_BUNDLE_INIT: + gk20a_dbg_info("NETLIST_REGIONID_SW_BUNDLE_INIT"); + err = gr_gk20a_alloc_load_netlist_av( + src, size, &g->gr.ctx_vars.sw_bundle_init); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_SW_METHOD_INIT: + gk20a_dbg_info("NETLIST_REGIONID_SW_METHOD_INIT"); + err = gr_gk20a_alloc_load_netlist_av( + src, size, &g->gr.ctx_vars.sw_method_init); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_SW_CTX_LOAD: + gk20a_dbg_info("NETLIST_REGIONID_SW_CTX_LOAD"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.sw_ctx_load); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_SW_NON_CTX_LOAD: + gk20a_dbg_info("NETLIST_REGIONID_SW_NON_CTX_LOAD"); + err = gr_gk20a_alloc_load_netlist_av( + src, size, &g->gr.ctx_vars.sw_non_ctx_load); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_SYS: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_SYS"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.sys); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_GPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_GPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.gpc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_TPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_TPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.tpc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_ZCULL_GPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_ZCULL_GPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.ppc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PM_SYS: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_SYS"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PM_GPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_GPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PM_TPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_TPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_BUFFER_SIZE: + g->gr.ctx_vars.buffer_size = *src; + gk20a_dbg_info("NETLIST_REGIONID_BUFFER_SIZE : %d", + g->gr.ctx_vars.buffer_size); + break; + case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX: + g->gr.ctx_vars.regs_base_index = *src; + gk20a_dbg_info("NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %d", + g->gr.ctx_vars.regs_base_index); + break; + case NETLIST_REGIONID_MAJORV: + major_v = *src; + gk20a_dbg_info("NETLIST_REGIONID_MAJORV : %d", + major_v); + break; + case NETLIST_REGIONID_NETLIST_NUM: + netlist_num = *src; + gk20a_dbg_info("NETLIST_REGIONID_NETLIST_NUM : %d", + netlist_num); + break; + case NETLIST_REGIONID_CTXREG_PMPPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped"); + break; + default: + gk20a_warn(d, "unrecognized region %d skipped", i); + break; + } + } + + if (net != NETLIST_FINAL && major_v != major_v_hw) { + gk20a_dbg_info("skip %s: major_v 0x%08x doesn't match hw 0x%08x", + name, major_v, major_v_hw); + goto clean_up; + } + + g->gr.ctx_vars.valid = true; + g->gr.netlist = net; + + release_firmware(netlist_fw); + gk20a_dbg_fn("done"); + goto done; + +clean_up: + kfree(g->gr.ctx_vars.ucode.fecs.inst.l); + kfree(g->gr.ctx_vars.ucode.fecs.data.l); + kfree(g->gr.ctx_vars.ucode.gpccs.inst.l); + kfree(g->gr.ctx_vars.ucode.gpccs.data.l); + kfree(g->gr.ctx_vars.sw_bundle_init.l); + kfree(g->gr.ctx_vars.sw_method_init.l); + kfree(g->gr.ctx_vars.sw_ctx_load.l); + kfree(g->gr.ctx_vars.sw_non_ctx_load.l); + kfree(g->gr.ctx_vars.ctxsw_regs.sys.l); + kfree(g->gr.ctx_vars.ctxsw_regs.gpc.l); + kfree(g->gr.ctx_vars.ctxsw_regs.tpc.l); + kfree(g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l); + kfree(g->gr.ctx_vars.ctxsw_regs.ppc.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); + release_firmware(netlist_fw); + err = -ENOENT; + } + +done: + if (g->gr.ctx_vars.valid) { + gk20a_dbg_info("netlist image %s loaded", name); + return 0; + } else { + gk20a_err(d, "failed to load netlist image!!"); + return err; + } +} + +int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr) +{ + if (tegra_platform_is_linsim()) + return gr_gk20a_init_ctx_vars_sim(g, gr); + else + return gr_gk20a_init_ctx_vars_fw(g, gr); +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h new file mode 100644 index 00000000..909a166a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h @@ -0,0 +1,149 @@ +/* + * GK20A Graphics Context + * + * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __GR_CTX_GK20A_H__ +#define __GR_CTX_GK20A_H__ + + +/* production netlist, one and only one from below */ +/*#undef GK20A_NETLIST_IMAGE_FW_NAME*/ +#define GK20A_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B +/* emulation netlists, match majorV with HW */ +#define GK20A_NETLIST_IMAGE_A "NETA_img.bin" +#define GK20A_NETLIST_IMAGE_B "NETB_img.bin" +#define GK20A_NETLIST_IMAGE_C "NETC_img.bin" +#define GK20A_NETLIST_IMAGE_D "NETD_img.bin" + +union __max_name { +#ifdef GK20A_NETLIST_IMAGE_A + char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)]; +#endif +#ifdef GK20A_NETLIST_IMAGE_B + char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)]; +#endif +#ifdef GK20A_NETLIST_IMAGE_C + char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)]; +#endif +#ifdef GK20A_NETLIST_IMAGE_D + char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)]; +#endif +}; + +#define MAX_NETLIST_NAME sizeof(union __max_name) + +/* index for emulation netlists */ +#define NETLIST_FINAL -1 +#define NETLIST_SLOT_A 0 +#define NETLIST_SLOT_B 1 +#define NETLIST_SLOT_C 2 +#define NETLIST_SLOT_D 3 +#define MAX_NETLIST 4 + +/* netlist regions */ +#define NETLIST_REGIONID_FECS_UCODE_DATA 0 +#define NETLIST_REGIONID_FECS_UCODE_INST 1 +#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2 +#define NETLIST_REGIONID_GPCCS_UCODE_INST 3 +#define NETLIST_REGIONID_SW_BUNDLE_INIT 4 +#define NETLIST_REGIONID_SW_CTX_LOAD 5 +#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6 +#define NETLIST_REGIONID_SW_METHOD_INIT 7 +#define NETLIST_REGIONID_CTXREG_SYS 8 +#define NETLIST_REGIONID_CTXREG_GPC 9 +#define NETLIST_REGIONID_CTXREG_TPC 10 +#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11 +#define NETLIST_REGIONID_CTXREG_PM_SYS 12 +#define NETLIST_REGIONID_CTXREG_PM_GPC 13 +#define NETLIST_REGIONID_CTXREG_PM_TPC 14 +#define NETLIST_REGIONID_MAJORV 15 +#define NETLIST_REGIONID_BUFFER_SIZE 16 +#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17 +#define NETLIST_REGIONID_NETLIST_NUM 18 +#define NETLIST_REGIONID_CTXREG_PPC 19 +#define NETLIST_REGIONID_CTXREG_PMPPC 20 + +struct netlist_region { + u32 region_id; + u32 data_size; + u32 data_offset; +}; + +struct netlist_image_header { + u32 version; + u32 regions; +}; + +struct netlist_image { + struct netlist_image_header header; + struct netlist_region regions[1]; +}; + +struct av_gk20a { + u32 addr; + u32 value; +}; +struct aiv_gk20a { + u32 addr; + u32 index; + u32 value; +}; +struct aiv_list_gk20a { + struct aiv_gk20a *l; + u32 count; +}; +struct av_list_gk20a { + struct av_gk20a *l; + u32 count; +}; +struct u32_list_gk20a { + u32 *l; + u32 count; +}; + +static inline +struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) +{ + avl->l = kzalloc(avl->count * sizeof(*avl->l), GFP_KERNEL); + return avl->l; +} + +static inline +struct aiv_gk20a *alloc_aiv_list_gk20a(struct aiv_list_gk20a *aivl) +{ + aivl->l = kzalloc(aivl->count * sizeof(*aivl->l), GFP_KERNEL); + return aivl->l; +} + +static inline +u32 *alloc_u32_list_gk20a(struct u32_list_gk20a *u32l) +{ + u32l->l = kzalloc(u32l->count * sizeof(*u32l->l), GFP_KERNEL); + return u32l->l; +} + +struct gr_ucode_gk20a { + struct { + struct u32_list_gk20a inst; + struct u32_list_gk20a data; + } gpccs, fecs; +}; + +/* main entry for grctx loading */ +int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); +int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr); + +#endif /*__GR_CTX_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c new file mode 100644 index 00000000..12bba1fd --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c @@ -0,0 +1,256 @@ +/* + * drivers/video/tegra/host/gk20a/gr_ctx_sim_gk20a.c + * + * GK20A Graphics Context for Simulation + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "gk20a.h" +#include "gr_ctx_gk20a.h" + +int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr) +{ + int err = 0; + u32 i, temp; + char *size_path = NULL; + char *reg_path = NULL; + char *value_path = NULL; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, + "querying grctx info from chiplib"); + + g->gr.ctx_vars.dynamic = true; + g->gr.netlist = GR_NETLIST_DYNAMIC; + + /* query sizes and counts */ + gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0, + &g->gr.ctx_vars.ucode.fecs.inst.count); + gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0, + &g->gr.ctx_vars.ucode.fecs.data.count); + gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0, + &g->gr.ctx_vars.ucode.gpccs.inst.count); + gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0, + &g->gr.ctx_vars.ucode.gpccs.data.count); + gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp); + g->gr.ctx_vars.buffer_size = temp << 2; + gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0, + &g->gr.ctx_vars.sw_bundle_init.count); + gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0, + &g->gr.ctx_vars.sw_method_init.count); + gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0, + &g->gr.ctx_vars.sw_ctx_load.count); + + switch (0) { /*g->gr.ctx_vars.reg_init_override)*/ +#if 0 + case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF: + sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE"; + regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG"; + valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE"; + break; +#endif + default: + size_path = "GRCTX_NONCTXSW_REG_SIZE"; + reg_path = "GRCTX_NONCTXSW_REG:REG"; + value_path = "GRCTX_NONCTXSW_REG:VALUE"; + break; + } + + gk20a_sim_esc_readl(g, size_path, 0, + &g->gr.ctx_vars.sw_non_ctx_load.count); + + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.sys.count); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.gpc.count); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.tpc.count); +#if 0 + /* looks to be unused, actually chokes the sim */ + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.ppc.count); +#endif + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.pm_sys.count); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0, + &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count); + + err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst); + err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data); + err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst); + err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data); + err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init); + err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load); + err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc); + err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc); + + if (err) + goto fail; + + for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) + gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS", + i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]); + + for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) + gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS", + i, &g->gr.ctx_vars.ucode.fecs.data.l[i]); + + for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) + gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS", + i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]); + + for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) + gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS", + i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]); + + for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) { + struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l; + gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) { + struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l; + gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l; + gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) { + struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l; + gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr); + gk20a_sim_esc_readl(g, value_path, i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) { + struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l; + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR", + i, &l[i].addr); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX", + i, &l[i].index); + gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE", + i, &l[i].value); + } + + g->gr.ctx_vars.valid = true; + + gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0, + &g->gr.ctx_vars.regs_base_index); + + gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib"); + return 0; +fail: + gk20a_err(dev_from_gk20a(g), + "failed querying grctx info from chiplib"); + return err; + +} + diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c new file mode 100644 index 00000000..0f93940b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -0,0 +1,6747 @@ +/* + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include /* for udelay */ +#include /* for totalram_pages */ +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "kind_gk20a.h" +#include "gr_ctx_gk20a.h" + +#include "hw_ccsr_gk20a.h" +#include "hw_ctxsw_prog_gk20a.h" +#include "hw_fifo_gk20a.h" +#include "hw_gr_gk20a.h" +#include "hw_gmmu_gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_ram_gk20a.h" +#include "hw_pri_ringmaster_gk20a.h" +#include "hw_pri_ringstation_sys_gk20a.h" +#include "hw_pri_ringstation_gpc_gk20a.h" +#include "hw_pri_ringstation_fbp_gk20a.h" +#include "hw_proj_gk20a.h" +#include "hw_top_gk20a.h" +#include "hw_ltc_gk20a.h" +#include "hw_fb_gk20a.h" +#include "hw_therm_gk20a.h" +#include "hw_pbdma_gk20a.h" +#include "gr_pri_gk20a.h" +#include "regops_gk20a.h" +#include "dbg_gpu_gk20a.h" + +#define BLK_SIZE (256) + +static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); + +/* global ctx buffer */ +static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); +static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); +static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, + struct channel_gk20a *c); +static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); + +/* channel gr ctx buffer */ +static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, + struct channel_gk20a *c); +static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); + +/* channel patch ctx buffer */ +static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, + struct channel_gk20a *c); +static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); + +/* golden ctx image */ +static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, + struct channel_gk20a *c); +static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, + struct channel_gk20a *c); + +void gk20a_fecs_dump_falcon_stats(struct gk20a *g) +{ + int i; + + gk20a_err(dev_from_gk20a(g), "gr_fecs_os_r : %d", + gk20a_readl(g, gr_fecs_os_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_cpuctl_r : 0x%x", + gk20a_readl(g, gr_fecs_cpuctl_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_idlestate_r : 0x%x", + gk20a_readl(g, gr_fecs_idlestate_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox0_r : 0x%x", + gk20a_readl(g, gr_fecs_mailbox0_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox1_r : 0x%x", + gk20a_readl(g, gr_fecs_mailbox1_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_irqstat_r : 0x%x", + gk20a_readl(g, gr_fecs_irqstat_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmode_r : 0x%x", + gk20a_readl(g, gr_fecs_irqmode_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmask_r : 0x%x", + gk20a_readl(g, gr_fecs_irqmask_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_irqdest_r : 0x%x", + gk20a_readl(g, gr_fecs_irqdest_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_debug1_r : 0x%x", + gk20a_readl(g, gr_fecs_debug1_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_debuginfo_r : 0x%x", + gk20a_readl(g, gr_fecs_debuginfo_r())); + + for (i = 0; i < gr_fecs_ctxsw_mailbox__size_1_v(); i++) + gk20a_err(dev_from_gk20a(g), "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x", + i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i))); + + gk20a_err(dev_from_gk20a(g), "gr_fecs_engctl_r : 0x%x", + gk20a_readl(g, gr_fecs_engctl_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_curctx_r : 0x%x", + gk20a_readl(g, gr_fecs_curctx_r())); + gk20a_err(dev_from_gk20a(g), "gr_fecs_nxtctx_r : 0x%x", + gk20a_readl(g, gr_fecs_nxtctx_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_IMB : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_DMB : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CSW : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CTX : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_EXCI : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + for (i = 0; i < 4; i++) { + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_PC : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + + gk20a_writel(g, gr_fecs_icd_cmd_r(), + gr_fecs_icd_cmd_opc_rreg_f() | + gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP)); + gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_SP : 0x%x", + gk20a_readl(g, gr_fecs_icd_rdata_r())); + } +} + +static void gr_gk20a_load_falcon_dmem(struct gk20a *g) +{ + u32 i, ucode_u32_size; + const u32 *ucode_u32_data; + u32 checksum; + + gk20a_dbg_fn(""); + + gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | + gr_gpccs_dmemc_blk_f(0) | + gr_gpccs_dmemc_aincw_f(1))); + + ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count; + ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l; + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + + gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | + gr_fecs_dmemc_blk_f(0) | + gr_fecs_dmemc_aincw_f(1))); + + ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count; + ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l; + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + gk20a_dbg_fn("done"); +} + +static void gr_gk20a_load_falcon_imem(struct gk20a *g) +{ + u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size; + const u32 *ucode_u32_data; + u32 tag, i, pad_start, pad_end; + u32 checksum; + + gk20a_dbg_fn(""); + + cfg = gk20a_readl(g, gr_fecs_cfg_r()); + fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); + + cfg = gk20a_readl(g, gr_gpc0_cfg_r()); + gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); + + /* Use the broadcast address to access all of the GPCCS units. */ + gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | + gr_gpccs_imemc_blk_f(0) | + gr_gpccs_imemc_aincw_f(1))); + + /* Setup the tags for the instruction memory. */ + tag = 0; + gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); + + ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count; + ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l; + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + if (i && ((i % (256/sizeof(u32))) == 0)) { + tag++; + gk20a_writel(g, gr_gpccs_imemt_r(0), + gr_gpccs_imemt_tag_f(tag)); + } + gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + + pad_start = i*4; + pad_end = pad_start+(256-pad_start%256)+256; + for (i = pad_start; + (i < gpccs_imem_size * 256) && (i < pad_end); + i += 4) { + if (i && ((i % 256) == 0)) { + tag++; + gk20a_writel(g, gr_gpccs_imemt_r(0), + gr_gpccs_imemt_tag_f(tag)); + } + gk20a_writel(g, gr_gpccs_imemd_r(0), 0); + } + + gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | + gr_fecs_imemc_blk_f(0) | + gr_fecs_imemc_aincw_f(1))); + + /* Setup the tags for the instruction memory. */ + tag = 0; + gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); + + ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count; + ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l; + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + if (i && ((i % (256/sizeof(u32))) == 0)) { + tag++; + gk20a_writel(g, gr_fecs_imemt_r(0), + gr_fecs_imemt_tag_f(tag)); + } + gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + + pad_start = i*4; + pad_end = pad_start+(256-pad_start%256)+256; + for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) { + if (i && ((i % 256) == 0)) { + tag++; + gk20a_writel(g, gr_fecs_imemt_r(0), + gr_fecs_imemt_tag_f(tag)); + } + gk20a_writel(g, gr_fecs_imemd_r(0), 0); + } +} + +static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, + u32 expect_delay) +{ + u32 delay = expect_delay; + bool gr_enabled; + bool ctxsw_active; + bool gr_busy; + + gk20a_dbg_fn(""); + + do { + /* fmodel: host gets fifo_engine_status(gr) from gr + only when gr_status is read */ + gk20a_readl(g, gr_status_r()); + + gr_enabled = gk20a_readl(g, mc_enable_r()) & + mc_enable_pgraph_enabled_f(); + + ctxsw_active = gk20a_readl(g, + fifo_engine_status_r(ENGINE_GR_GK20A)) & + fifo_engine_status_ctxsw_in_progress_f(); + + gr_busy = gk20a_readl(g, gr_engine_status_r()) & + gr_engine_status_value_busy_f(); + + if (!gr_enabled || (!gr_busy && !ctxsw_active)) { + gk20a_dbg_fn("done"); + return 0; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies) + || !tegra_platform_is_silicon()); + + gk20a_err(dev_from_gk20a(g), + "timeout, ctxsw busy : %d, gr busy : %d", + ctxsw_active, gr_busy); + + return -EAGAIN; +} + +static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask) +{ + u32 delay = GR_IDLE_CHECK_DEFAULT; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 reg; + + gk20a_dbg_fn(""); + + if (!tegra_platform_is_linsim()) { + /* Force clocks on */ + gk20a_writel(g, gr_fe_pwr_mode_r(), + gr_fe_pwr_mode_req_send_f() | + gr_fe_pwr_mode_mode_force_on_f()); + + /* Wait for the clocks to indicate that they are on */ + do { + reg = gk20a_readl(g, gr_fe_pwr_mode_r()); + + if (gr_fe_pwr_mode_req_v(reg) == + gr_fe_pwr_mode_req_done_v()) + break; + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies)); + + if (!time_before(jiffies, end_jiffies)) { + gk20a_err(dev_from_gk20a(g), + "failed to force the clocks on\n"); + WARN_ON(1); + } + } + if (rst_mask) { + gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask); + } else { + gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), + gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() | + gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f()); + } + + /* we need to read the reset register *and* wait for a moment to ensure + * reset propagation */ + + gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r()); + udelay(20); + + gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), + gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() | + gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() | + gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f()); + + /* we need to readl the reset and then wait a small moment after that */ + gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r()); + udelay(20); + + if (!tegra_platform_is_linsim()) { + /* Set power mode back to auto */ + gk20a_writel(g, gr_fe_pwr_mode_r(), + gr_fe_pwr_mode_req_send_f() | + gr_fe_pwr_mode_mode_auto_f()); + + /* Wait for the request to complete */ + end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + do { + reg = gk20a_readl(g, gr_fe_pwr_mode_r()); + + if (gr_fe_pwr_mode_req_v(reg) == + gr_fe_pwr_mode_req_done_v()) + break; + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies)); + + if (!time_before(jiffies, end_jiffies)) + gk20a_warn(dev_from_gk20a(g), + "failed to set power mode to auto\n"); + } + + return 0; +} + +static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, + u32 *mailbox_ret, u32 opc_success, + u32 mailbox_ok, u32 opc_fail, + u32 mailbox_fail) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 delay = GR_IDLE_CHECK_DEFAULT; + u32 check = WAIT_UCODE_LOOP; + u32 reg; + + gk20a_dbg_fn(""); + + while (check == WAIT_UCODE_LOOP) { + if (!time_before(jiffies, end_jiffies) && + tegra_platform_is_silicon()) + check = WAIT_UCODE_TIMEOUT; + + reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id)); + + if (mailbox_ret) + *mailbox_ret = reg; + + switch (opc_success) { + case GR_IS_UCODE_OP_EQUAL: + if (reg == mailbox_ok) + check = WAIT_UCODE_OK; + break; + case GR_IS_UCODE_OP_NOT_EQUAL: + if (reg != mailbox_ok) + check = WAIT_UCODE_OK; + break; + case GR_IS_UCODE_OP_AND: + if (reg & mailbox_ok) + check = WAIT_UCODE_OK; + break; + case GR_IS_UCODE_OP_LESSER: + if (reg < mailbox_ok) + check = WAIT_UCODE_OK; + break; + case GR_IS_UCODE_OP_LESSER_EQUAL: + if (reg <= mailbox_ok) + check = WAIT_UCODE_OK; + break; + case GR_IS_UCODE_OP_SKIP: + /* do no success check */ + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid success opcode 0x%x", opc_success); + + check = WAIT_UCODE_ERROR; + break; + } + + switch (opc_fail) { + case GR_IS_UCODE_OP_EQUAL: + if (reg == mailbox_fail) + check = WAIT_UCODE_ERROR; + break; + case GR_IS_UCODE_OP_NOT_EQUAL: + if (reg != mailbox_fail) + check = WAIT_UCODE_ERROR; + break; + case GR_IS_UCODE_OP_AND: + if (reg & mailbox_fail) + check = WAIT_UCODE_ERROR; + break; + case GR_IS_UCODE_OP_LESSER: + if (reg < mailbox_fail) + check = WAIT_UCODE_ERROR; + break; + case GR_IS_UCODE_OP_LESSER_EQUAL: + if (reg <= mailbox_fail) + check = WAIT_UCODE_ERROR; + break; + case GR_IS_UCODE_OP_SKIP: + /* do no check on fail*/ + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid fail opcode 0x%x", opc_fail); + check = WAIT_UCODE_ERROR; + break; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } + + if (check == WAIT_UCODE_TIMEOUT) { + gk20a_err(dev_from_gk20a(g), + "timeout waiting on ucode response"); + gk20a_fecs_dump_falcon_stats(g); + return -1; + } else if (check == WAIT_UCODE_ERROR) { + gk20a_err(dev_from_gk20a(g), + "ucode method failed on mailbox=%d value=0x%08x", + mailbox_id, reg); + gk20a_fecs_dump_falcon_stats(g); + return -1; + } + + gk20a_dbg_fn("done"); + return 0; +} + +/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) + * We should replace most, if not all, fecs method calls to this instead. */ +struct fecs_method_op_gk20a { + struct { + u32 addr; + u32 data; + } method; + + struct { + u32 id; + u32 data; + u32 clr; + u32 *ret; + u32 ok; + u32 fail; + } mailbox; + + struct { + u32 ok; + u32 fail; + } cond; + +}; + +int gr_gk20a_submit_fecs_method_op(struct gk20a *g, + struct fecs_method_op_gk20a op) +{ + struct gr_gk20a *gr = &g->gr; + int ret; + + mutex_lock(&gr->fecs_mutex); + + if (op.mailbox.id != 0) + gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), + op.mailbox.data); + + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), + gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); + + gk20a_writel(g, gr_fecs_method_data_r(), op.method.data); + gk20a_writel(g, gr_fecs_method_push_r(), + gr_fecs_method_push_adr_f(op.method.addr)); + + /* op.mb.id == 4 cases require waiting for completion on + * for op.mb.id == 0 */ + if (op.mailbox.id == 4) + op.mailbox.id = 0; + + ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, + op.cond.ok, op.mailbox.ok, + op.cond.fail, op.mailbox.fail); + + mutex_unlock(&gr->fecs_mutex); + + return ret; +} + +int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret) +{ + return gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .method.addr = fecs_method, + .method.data = ~0, + .mailbox = { .id = 1, /*sideband?*/ + .data = ~0, .clr = ~0, .ret = ret, + .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), + .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, + .cond.ok = GR_IS_UCODE_OP_EQUAL, + .cond.fail = GR_IS_UCODE_OP_EQUAL }); +} + +/* Stop processing (stall) context switches at FECS. + * The caller must hold the dbg_sessions_lock, else if mutliple stop methods + * are sent to the ucode in sequence, it can get into an undefined state. */ +int gr_gk20a_disable_ctxsw(struct gk20a *g) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_stop_ctxsw_v(), 0); +} + +/* Start processing (continue) context switches at FECS */ +int gr_gk20a_enable_ctxsw(struct gk20a *g) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_start_ctxsw_v(), 0); +} + + +static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) +{ + u32 addr_lo; + u32 addr_hi; + void *inst_ptr = NULL; + + gk20a_dbg_fn(""); + + /* flush gpu_va before commit */ + gk20a_mm_fb_flush(c->g); + gk20a_mm_l2_flush(c->g, true); + + inst_ptr = c->inst_block.cpuva; + if (!inst_ptr) + return -ENOMEM; + + addr_lo = u64_lo32(gpu_va) >> 12; + addr_hi = u64_hi32(gpu_va); + + gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), + ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | + ram_in_gr_wfi_ptr_lo_f(addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), + ram_in_gr_wfi_ptr_hi_f(addr_hi)); + + gk20a_mm_l2_invalidate(c->g); + + return 0; +} + +/* + * Context state can be written directly or "patched" at times. + * So that code can be used in either situation it is written + * using a series _ctx_patch_write(..., patch) statements. + * However any necessary cpu map/unmap and gpu l2 invalidates + * should be minimized (to avoid doing it once per patch write). + * Before a sequence of these set up with "_ctx_patch_write_begin" + * and close with "_ctx_patch_write_end." + */ +int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx) +{ + /* being defensive still... */ + if (ch_ctx->patch_ctx.cpu_va) { + gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); + return -EBUSY; + } + + ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages, + PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + + if (!ch_ctx->patch_ctx.cpu_va) + return -ENOMEM; + + return 0; +} + +int gr_gk20a_ctx_patch_write_end(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx) +{ + /* being defensive still... */ + if (!ch_ctx->patch_ctx.cpu_va) { + gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); + return -EINVAL; + } + + vunmap(ch_ctx->patch_ctx.cpu_va); + ch_ctx->patch_ctx.cpu_va = NULL; + + gk20a_mm_l2_invalidate(g); + return 0; +} + +int gr_gk20a_ctx_patch_write(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u32 addr, u32 data, bool patch) +{ + u32 patch_slot = 0; + void *patch_ptr = NULL; + bool mapped_here = false; + + BUG_ON(patch != 0 && ch_ctx == NULL); + + if (patch) { + if (!ch_ctx) + return -EINVAL; + /* we added an optimization prolog, epilog + * to get rid of unnecessary maps and l2 invals. + * but be defensive still... */ + if (!ch_ctx->patch_ctx.cpu_va) { + int err; + gk20a_err(dev_from_gk20a(g), + "per-write ctx patch begin?"); + /* yes, gr_gk20a_ctx_patch_smpc causes this one */ + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + return err; + mapped_here = true; + } else + mapped_here = false; + + patch_ptr = ch_ctx->patch_ctx.cpu_va; + patch_slot = ch_ctx->patch_ctx.data_count * 2; + + gk20a_mem_wr32(patch_ptr, patch_slot++, addr); + gk20a_mem_wr32(patch_ptr, patch_slot++, data); + + ch_ctx->patch_ctx.data_count++; + + if (mapped_here) + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + + } else + gk20a_writel(g, addr, data); + + return 0; +} + +static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, + struct channel_gk20a *c) +{ + u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa + >> ram_in_base_shift_v()); + u32 ret; + + gk20a_dbg_info("bind channel %d inst ptr 0x%08x", + c->hw_chid, inst_base_ptr); + + ret = gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .method.addr = gr_fecs_method_push_adr_bind_pointer_v(), + .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | + gr_fecs_current_ctx_target_vid_mem_f() | + gr_fecs_current_ctx_valid_f(1)), + .mailbox = { .id = 0, .data = 0, + .clr = 0x30, + .ret = NULL, + .ok = 0x10, + .fail = 0x20, }, + .cond.ok = GR_IS_UCODE_OP_AND, + .cond.fail = GR_IS_UCODE_OP_AND}); + if (ret) + gk20a_err(dev_from_gk20a(g), + "bind channel instance failed"); + + return ret; +} + +static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, + bool disable_fifo) +{ + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct fifo_gk20a *f = &g->fifo; + struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; + u32 va_lo, va_hi, va; + int ret = 0; + void *ctx_ptr = NULL; + + gk20a_dbg_fn(""); + + ctx_ptr = vmap(ch_ctx->gr_ctx.pages, + PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!ctx_ptr) + return -ENOMEM; + + if (ch_ctx->zcull_ctx.gpu_va == 0 && + ch_ctx->zcull_ctx.ctx_sw_mode == + ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { + ret = -EINVAL; + goto clean_up; + } + + va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va); + va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va); + va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000); + + if (disable_fifo) { + ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to disable gr engine activity\n"); + goto clean_up; + } + } + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, true); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, + ch_ctx->zcull_ctx.ctx_sw_mode); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); + + if (disable_fifo) { + ret = gk20a_fifo_enable_engine_activity(g, gr_info); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to enable gr engine activity\n"); + goto clean_up; + } + } + gk20a_mm_l2_invalidate(g); + +clean_up: + vunmap(ctx_ptr); + + return ret; +} + +static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, + struct channel_gk20a *c, bool patch) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = NULL; + u32 attrib_offset_in_chunk = 0; + u32 alpha_offset_in_chunk = 0; + u32 pd_ab_max_output; + u32 gpc_index, ppc_index; + u32 temp; + u32 cbm_cfg_size1, cbm_cfg_size2; + + gk20a_dbg_fn(""); + + if (patch) { + int err; + ch_ctx = &c->ch_ctx; + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + return err; + } + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), + gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | + gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), + patch); + + pd_ab_max_output = (gr->alpha_cb_default_size * + gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) / + gr_pd_ab_dist_cfg1_max_output_granularity_v(); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); + + alpha_offset_in_chunk = attrib_offset_in_chunk + + gr->tpc_count * gr->attrib_cb_size; + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + temp = proj_gpc_stride_v() * gpc_index; + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + cbm_cfg_size1 = gr->attrib_cb_default_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + cbm_cfg_size2 = gr->alpha_cb_default_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_cfg_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | + gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | + gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); + + attrib_offset_in_chunk += gr->attrib_cb_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_ppc0_cbm_cfg2_r() + temp + + proj_ppc_in_gpc_stride_v() * ppc_index, + gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | + gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); + + alpha_offset_in_chunk += gr->alpha_cb_size * + gr->pes_tpc_count[ppc_index][gpc_index]; + } + } + + if (patch) + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + + return 0; +} + +static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, + struct channel_gk20a *c, bool patch) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + u64 addr; + u32 size; + + gk20a_dbg_fn(""); + if (patch) { + int err; + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + return err; + } + + /* global pagepool buffer */ + addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> + gr_scc_pagepool_base_addr_39_8_align_bits_v()) | + (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << + (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); + + size = gr->global_ctx_buffer[PAGEPOOL].size / + gr_scc_pagepool_total_pages_byte_granularity_v(); + + if (size == gr_scc_pagepool_total_pages_hwmax_value_v()) + size = gr_scc_pagepool_total_pages_hwmax_v(); + + gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", + addr, size); + + g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); + + /* global bundle cb */ + addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> + gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | + (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << + (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); + + size = gr->bundle_cb_default_size; + + gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", + addr, size); + + g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); + + /* global attrib cb */ + addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> + gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | + (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << + (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); + + gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); + g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); + + if (patch) + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + + return 0; +} + +static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, bool patch) +{ + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), + gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | + gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), + gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | + gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); +} + +static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u64 size, bool patch) +{ + u32 data; + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), + gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), + gr_scc_bundle_cb_size_div_256b_f(size) | + gr_scc_bundle_cb_size_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(), + gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(), + gr_gpcs_setup_bundle_cb_size_div_256b_f(size) | + gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch); + + /* data for state_limit */ + data = (g->gr.bundle_cb_default_size * + gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / + gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); + + data = min_t(u32, data, g->gr.min_gpm_fifo_depth); + + gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", + g->gr.bundle_cb_token_limit, data); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), + gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | + gr_pd_ab_dist_cfg2_state_limit_f(data), patch); + +} + +static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = NULL; + u32 gpm_pd_cfg; + u32 pd_ab_dist_cfg0; + u32 ds_debug; + u32 mpc_vtg_debug; + u32 pe_vaf; + u32 pe_vsc_vpc; + + gk20a_dbg_fn(""); + + gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r()); + pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r()); + ds_debug = gk20a_readl(g, gr_ds_debug_r()); + mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r()); + + if (patch) { + int err; + ch_ctx = &c->ch_ctx; + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); + if (err) + return err; + } + + if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) { + pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r()); + pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r()); + + gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg; + pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf; + pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc; + pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0; + ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; + mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); + } else { + gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; + pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; + ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; + mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch); + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch); + } + + if (patch) + gr_gk20a_ctx_patch_write_end(g, ch_ctx); + + return 0; +} + +int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 norm_entries, norm_shift; + u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod; + u32 map0, map1, map2, map3, map4, map5; + + if (!gr->map_tiles) + return -1; + + gk20a_dbg_fn(""); + + gk20a_writel(g, gr_crstr_map_table_cfg_r(), + gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); + + map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) | + gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) | + gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) | + gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) | + gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) | + gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]); + + map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) | + gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) | + gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) | + gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) | + gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) | + gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]); + + map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) | + gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) | + gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) | + gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) | + gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) | + gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]); + + map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) | + gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) | + gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) | + gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) | + gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) | + gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]); + + map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) | + gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) | + gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) | + gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) | + gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) | + gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]); + + map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) | + gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) | + gr_crstr_gpc_map5_tile32_f(0) | + gr_crstr_gpc_map5_tile33_f(0) | + gr_crstr_gpc_map5_tile34_f(0) | + gr_crstr_gpc_map5_tile35_f(0); + + gk20a_writel(g, gr_crstr_gpc_map0_r(), map0); + gk20a_writel(g, gr_crstr_gpc_map1_r(), map1); + gk20a_writel(g, gr_crstr_gpc_map2_r(), map2); + gk20a_writel(g, gr_crstr_gpc_map3_r(), map3); + gk20a_writel(g, gr_crstr_gpc_map4_r(), map4); + gk20a_writel(g, gr_crstr_gpc_map5_r(), map5); + + switch (gr->tpc_count) { + case 1: + norm_shift = 4; + break; + case 2: + case 3: + norm_shift = 3; + break; + case 4: + case 5: + case 6: + case 7: + norm_shift = 2; + break; + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + norm_shift = 1; + break; + default: + norm_shift = 0; + break; + } + + norm_entries = gr->tpc_count << norm_shift; + coeff5_mod = (1 << 5) % norm_entries; + coeff6_mod = (1 << 6) % norm_entries; + coeff7_mod = (1 << 7) % norm_entries; + coeff8_mod = (1 << 8) % norm_entries; + coeff9_mod = (1 << 9) % norm_entries; + coeff10_mod = (1 << 10) % norm_entries; + coeff11_mod = (1 << 11) % norm_entries; + + gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), + gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) | + gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) | + gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) | + gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count)); + + gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(), + gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) | + gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod)); + + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4); + gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5); + + gk20a_writel(g, gr_rstr2d_map_table_cfg_r(), + gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) | + gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count)); + + gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0); + gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1); + gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2); + gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3); + gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4); + gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5); + + return 0; +} + +static inline u32 count_bits(u32 mask) +{ + u32 temp = mask; + u32 count; + for (count = 0; temp != 0; count++) + temp &= temp - 1; + + return count; +} + +static inline u32 clear_count_bits(u32 num, u32 clear_count) +{ + u32 count = clear_count; + for (; (num != 0) && (count != 0); count--) + num &= num - 1; + + return num; +} + +static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, + struct gr_gk20a *gr) +{ + u32 table_index_bits = 5; + u32 rows = (1 << table_index_bits); + u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows; + + u32 row; + u32 index; + u32 gpc_index; + u32 gpcs_per_reg = 4; + u32 pes_index; + u32 tpc_count_pes; + u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); + + u32 alpha_target, beta_target; + u32 alpha_bits, beta_bits; + u32 alpha_mask, beta_mask, partial_mask; + u32 reg_offset; + bool assign_alpha; + + u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()]; + u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()]; + u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()]; + + gk20a_dbg_fn(""); + + memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); + memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); + memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); + + for (row = 0; row < rows; ++row) { + alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); + beta_target = gr->tpc_count - alpha_target; + + assign_alpha = (alpha_target < beta_target); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg); + alpha_mask = beta_mask = 0; + + for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) { + tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index]; + + if (assign_alpha) { + alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes; + beta_bits = tpc_count_pes - alpha_bits; + } else { + beta_bits = (beta_target == 0) ? 0 : tpc_count_pes; + alpha_bits = tpc_count_pes - beta_bits; + } + + partial_mask = gr->pes_tpc_mask[pes_index][gpc_index]; + partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits); + alpha_mask |= partial_mask; + + partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask; + beta_mask |= partial_mask; + + alpha_target -= min(alpha_bits, alpha_target); + beta_target -= min(beta_bits, beta_target); + + if ((alpha_bits > 0) || (beta_bits > 0)) + assign_alpha = !assign_alpha; + } + + switch (gpc_index % gpcs_per_reg) { + case 0: + map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask); + map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask); + break; + case 1: + map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask); + map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask); + break; + case 2: + map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask); + map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask); + break; + case 3: + map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask); + map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask); + break; + } + map_reg_used[reg_offset] = true; + } + } + + for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) { + if (map_reg_used[index]) { + gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]); + gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]); + } + } + + return 0; +} + +static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 tpc_index, gpc_index; + u32 tpc_offset, gpc_offset; + u32 sm_id = 0, gpc_id = 0; + u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; + u32 tpc_per_gpc; + u32 max_ways_evict = INVALID_MAX_WAYS; + u32 l1c_dbg_reg_val; + + gk20a_dbg_fn(""); + + for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + gpc_offset = proj_gpc_stride_v() * gpc_index; + if (tpc_index < gr->gpc_tpc_count[gpc_index]) { + tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; + + gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); + gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); + gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, + gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); + gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); + + sm_id_to_gpc_id[sm_id] = gpc_index; + sm_id++; + } + + gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, + gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); + gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, + gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); + } + } + + for (tpc_index = 0, gpc_id = 0; + tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); + tpc_index++, gpc_id += 8) { + + if (gpc_id >= gr->gpc_count) + gpc_id = 0; + + tpc_per_gpc = + gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | + gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) | + gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) | + gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) | + gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) | + gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) | + gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) | + gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]); + + gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); + gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); + } + + /* gr__setup_pd_mapping stubbed for gk20a */ + gr_gk20a_setup_rop_mapping(g, gr); + if (g->ops.gr.setup_alpha_beta_tables) + g->ops.gr.setup_alpha_beta_tables(g, gr); + + if (gr->num_fbps == 1) + max_ways_evict = 9; + + if (max_ways_evict != INVALID_MAX_WAYS) + g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict); + + for (gpc_index = 0; + gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; + gpc_index += 4) { + + gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), + gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) || + gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) || + gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) || + gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); + } + + gk20a_writel(g, gr_cwd_fs_r(), + gr_cwd_fs_num_gpcs_f(gr->gpc_count) | + gr_cwd_fs_num_tpcs_f(gr->tpc_count)); + + gk20a_writel(g, gr_bes_zrop_settings_r(), + gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); + gk20a_writel(g, gr_bes_crop_settings_r(), + gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps)); + + /* turn on cya15 bit for a default val that missed the cut */ + l1c_dbg_reg_val = gk20a_readl(g, gr_gpc0_tpc0_l1c_dbg_r()); + l1c_dbg_reg_val |= gr_gpc0_tpc0_l1c_dbg_cya15_en_f(); + gk20a_writel(g, gr_gpc0_tpc0_l1c_dbg_r(), l1c_dbg_reg_val); + + return 0; +} + +static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) +{ + struct gk20a *g = c->g; + int ret; + + u32 inst_base_ptr = + u64_lo32(c->inst_block.cpu_pa + >> ram_in_base_shift_v()); + + + gk20a_dbg_fn(""); + + ret = gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .method.addr = save_type, + .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | + gr_fecs_current_ctx_target_vid_mem_f() | + gr_fecs_current_ctx_valid_f(1)), + .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, + .ok = 1, .fail = 2, + }, + .cond.ok = GR_IS_UCODE_OP_AND, + .cond.fail = GR_IS_UCODE_OP_AND, + }); + + if (ret) + gk20a_err(dev_from_gk20a(g), "save context image failed"); + + return ret; +} + +static u32 gk20a_init_sw_bundle(struct gk20a *g) +{ + struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init; + u32 last_bundle_data = 0; + u32 err = 0; + int i; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + + /* enable pipe mode override */ + gk20a_writel(g, gr_pipe_bundle_config_r(), + gr_pipe_bundle_config_override_pipe_mode_enabled_f()); + + /* load bundle init */ + for (i = 0; i < sw_bundle_init->count; i++) { + + if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) { + gk20a_writel(g, gr_pipe_bundle_data_r(), + sw_bundle_init->l[i].value); + last_bundle_data = sw_bundle_init->l[i].value; + } + + gk20a_writel(g, gr_pipe_bundle_address_r(), + sw_bundle_init->l[i].addr); + + if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) == + GR_GO_IDLE_BUNDLE) + err |= gr_gk20a_wait_idle(g, end_jiffies, + GR_IDLE_CHECK_DEFAULT); + } + + /* disable pipe mode override */ + gk20a_writel(g, gr_pipe_bundle_config_r(), + gr_pipe_bundle_config_override_pipe_mode_disabled_f()); + + return err; +} + +/* init global golden image from a fresh gr_ctx in channel ctx. + save a copy in local_golden_image in ctx_vars */ +static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); + u32 ctx_header_words; + u32 i; + u32 data; + void *ctx_ptr = NULL; + void *gold_ptr = NULL; + u32 err = 0; + + gk20a_dbg_fn(""); + + /* golden ctx is global to all channels. Although only the first + channel initializes golden image, driver needs to prevent multiple + channels from initializing golden ctx at the same time */ + mutex_lock(&gr->ctx_mutex); + + if (gr->ctx_vars.golden_image_initialized) + goto clean_up; + + err = gr_gk20a_fecs_ctx_bind_channel(g, c); + if (err) + goto clean_up; + + err = gk20a_init_sw_bundle(g); + if (err) + goto clean_up; + + err = gr_gk20a_elpg_protected_call(g, + gr_gk20a_commit_global_ctx_buffers(g, c, false)); + if (err) + goto clean_up; + + gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].pages, + PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].size) >> + PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!gold_ptr) + goto clean_up; + + ctx_ptr = vmap(ch_ctx->gr_ctx.pages, + PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!ctx_ptr) + goto clean_up; + + ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); + ctx_header_words >>= 2; + + /* Channel gr_ctx buffer is gpu cacheable. + Flush before cpu read. */ + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, false); + + for (i = 0; i < ctx_header_words; i++) { + data = gk20a_mem_rd32(ctx_ptr, i); + gk20a_mem_wr32(gold_ptr, i, data); + } + + gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, + ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); + + gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); + + gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); + + gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); + + if (gr->ctx_vars.local_golden_image == NULL) { + + gr->ctx_vars.local_golden_image = + kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL); + + if (gr->ctx_vars.local_golden_image == NULL) { + err = -ENOMEM; + goto clean_up; + } + + for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) + gr->ctx_vars.local_golden_image[i] = + gk20a_mem_rd32(gold_ptr, i); + } + + gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); + + gr->ctx_vars.golden_image_initialized = true; + + gk20a_mm_l2_invalidate(g); + + gk20a_writel(g, gr_fecs_current_ctx_r(), + gr_fecs_current_ctx_valid_false_f()); + +clean_up: + if (err) + gk20a_err(dev_from_gk20a(g), "fail"); + else + gk20a_dbg_fn("done"); + + if (gold_ptr) + vunmap(gold_ptr); + if (ctx_ptr) + vunmap(ctx_ptr); + + mutex_unlock(&gr->ctx_mutex); + return err; +} + +int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, + struct channel_gk20a *c, + bool enable_smpc_ctxsw) +{ + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + void *ctx_ptr = NULL; + u32 data; + + /*XXX caller responsible for making sure the channel is quiesced? */ + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, true); + + ctx_ptr = vmap(ch_ctx->gr_ctx.pages, + PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!ctx_ptr) + return -ENOMEM; + + data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); + data |= enable_smpc_ctxsw ? + ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : + ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, + data); + + vunmap(ctx_ptr); + + gk20a_mm_l2_invalidate(g); + + return 0; +} + +/* load saved fresh copy of gloden image into channel gr_ctx */ +static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gr_gk20a *gr = &g->gr; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + u32 virt_addr_lo; + u32 virt_addr_hi; + u32 i, v, data; + int ret = 0; + void *ctx_ptr = NULL; + + gk20a_dbg_fn(""); + + if (gr->ctx_vars.local_golden_image == NULL) + return -1; + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, true); + + ctx_ptr = vmap(ch_ctx->gr_ctx.pages, + PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!ctx_ptr) + return -ENOMEM; + + for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) + gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); + + virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); + virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, + ch_ctx->patch_ctx.data_count); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, + virt_addr_lo); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, + virt_addr_hi); + + /* no user for client managed performance counter ctx */ + ch_ctx->pm_ctx.ctx_sw_mode = + ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + data = data & ~ctxsw_prog_main_image_pm_mode_m(); + data |= ch_ctx->pm_ctx.ctx_sw_mode; + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, + data); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0); + + /* set priv access map */ + virt_addr_lo = + u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + virt_addr_hi = + u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, + ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f()); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, + virt_addr_lo); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, + virt_addr_hi); + /* disable verif features */ + v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); + v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); + v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); + + + vunmap(ctx_ptr); + + gk20a_mm_l2_invalidate(g); + + if (tegra_platform_is_linsim()) { + u32 inst_base_ptr = + u64_lo32(c->inst_block.cpu_pa + >> ram_in_base_shift_v()); + + ret = gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .method.data = + (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | + gr_fecs_current_ctx_target_vid_mem_f() | + gr_fecs_current_ctx_valid_f(1)), + .method.addr = + gr_fecs_method_push_adr_restore_golden_v(), + .mailbox = { + .id = 0, .data = 0, + .clr = ~0, .ret = NULL, + .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), + .fail = 0}, + .cond.ok = GR_IS_UCODE_OP_EQUAL, + .cond.fail = GR_IS_UCODE_OP_SKIP}); + + if (ret) + gk20a_err(dev_from_gk20a(g), + "restore context image failed"); + } + + return ret; +} + +static void gr_gk20a_start_falcon_ucode(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), + gr_fecs_ctxsw_mailbox_clear_value_f(~0)); + + gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); + gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0)); + + gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); + gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1)); + + gk20a_dbg_fn("done"); +} + +static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->pmu.vm; + struct device *d = dev_from_gk20a(g); + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + void *inst_ptr; + u32 pde_addr_lo; + u32 pde_addr_hi; + u64 pde_addr; + dma_addr_t iova; + + /* Alloc mem of inst block */ + ucode_info->inst_blk_desc.size = ram_in_alloc_size_v(); + ucode_info->inst_blk_desc.cpuva = dma_alloc_coherent(d, + ucode_info->inst_blk_desc.size, + &iova, + GFP_KERNEL); + if (!ucode_info->inst_blk_desc.cpuva) { + gk20a_err(d, "failed to allocate memory\n"); + return -ENOMEM; + } + + ucode_info->inst_blk_desc.iova = iova; + ucode_info->inst_blk_desc.cpu_pa = gk20a_get_phys_from_iova(d, + ucode_info->inst_blk_desc.iova); + + inst_ptr = ucode_info->inst_blk_desc.cpuva; + + /* Set inst block */ + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), + u64_lo32(vm->va_limit) | 0xFFF); + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), + ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + + pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr_lo = u64_lo32(pde_addr >> 12); + pde_addr_hi = u64_hi32(pde_addr); + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pde_addr_lo)); + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(pde_addr_hi)); + + /* Map ucode surface to GMMU */ + ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, + &ucode_info->surface_desc.sgt, + ucode_info->surface_desc.size, + 0, /* flags */ + gk20a_mem_flag_read_only); + if (!ucode_info->ucode_gpuva) { + gk20a_err(d, "failed to update gmmu ptes\n"); + return -ENOMEM; + } + + return 0; +} + +static void gr_gk20a_init_ctxsw_ucode_segment( + struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size) +{ + p_seg->offset = *offset; + p_seg->size = size; + *offset = ALIGN(*offset + size, BLK_SIZE); +} + +static void gr_gk20a_init_ctxsw_ucode_segments( + struct gk20a_ctxsw_ucode_segments *segments, u32 *offset, + struct gk20a_ctxsw_bootloader_desc *bootdesc, + u32 code_size, u32 data_size) +{ + u32 boot_size = ALIGN(bootdesc->size, sizeof(u32)); + segments->boot_entry = bootdesc->entry_point; + segments->boot_imem_offset = bootdesc->imem_offset; + gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size); + gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size); + gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size); +} + +static int gr_gk20a_copy_ctxsw_ucode_segments( + u8 *buf, + struct gk20a_ctxsw_ucode_segments *segments, + u32 *bootimage, + u32 *code, u32 *data) +{ + memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); + memcpy(buf + segments->code.offset, code, segments->code.size); + memcpy(buf + segments->data.offset, data, segments->data.size); + return 0; +} + +static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) +{ + struct device *d = dev_from_gk20a(g); + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->pmu.vm; + struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc; + struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc; + const struct firmware *fecs_fw; + const struct firmware *gpccs_fw; + u32 *fecs_boot_image; + u32 *gpccs_boot_image; + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u8 *buf; + u32 ucode_size; + int err = 0; + dma_addr_t iova; + DEFINE_DMA_ATTRS(attrs); + + fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); + if (!fecs_fw) { + gk20a_err(d, "failed to load fecs ucode!!"); + return -ENOENT; + } + + fecs_boot_desc = (void *)fecs_fw->data; + fecs_boot_image = (void *)(fecs_fw->data + + sizeof(struct gk20a_ctxsw_bootloader_desc)); + + gpccs_fw = gk20a_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE); + if (!gpccs_fw) { + release_firmware(fecs_fw); + gk20a_err(d, "failed to load gpccs ucode!!"); + return -ENOENT; + } + + gpccs_boot_desc = (void *)gpccs_fw->data; + gpccs_boot_image = (void *)(gpccs_fw->data + + sizeof(struct gk20a_ctxsw_bootloader_desc)); + + ucode_size = 0; + gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size, + fecs_boot_desc, + g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32), + g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32)); + gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size, + gpccs_boot_desc, + g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), + g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); + + ucode_info->surface_desc.size = ucode_size; + dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); + ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, + ucode_info->surface_desc.size, + &iova, + GFP_KERNEL, + &attrs); + if (!ucode_info->surface_desc.cpuva) { + gk20a_err(d, "memory allocation failed\n"); + err = -ENOMEM; + goto clean_up; + } + + ucode_info->surface_desc.iova = iova; + err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt, + ucode_info->surface_desc.cpuva, + ucode_info->surface_desc.iova, + ucode_info->surface_desc.size); + if (err) { + gk20a_err(d, "failed to create sg table\n"); + goto clean_up; + } + + buf = (u8 *)ucode_info->surface_desc.cpuva; + if (!buf) { + gk20a_err(d, "failed to map surface desc buffer"); + err = -ENOMEM; + goto clean_up; + } + + gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs, + fecs_boot_image, + g->gr.ctx_vars.ucode.fecs.inst.l, + g->gr.ctx_vars.ucode.fecs.data.l); + + release_firmware(fecs_fw); + fecs_fw = NULL; + + gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, + gpccs_boot_image, + g->gr.ctx_vars.ucode.gpccs.inst.l, + g->gr.ctx_vars.ucode.gpccs.data.l); + + release_firmware(gpccs_fw); + gpccs_fw = NULL; + + err = gr_gk20a_init_ctxsw_ucode_vaspace(g); + if (err) + goto clean_up; + + gk20a_free_sgtable(&ucode_info->surface_desc.sgt); + + return 0; + + clean_up: + if (ucode_info->ucode_gpuva) + gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, + ucode_info->surface_desc.size, gk20a_mem_flag_none); + if (ucode_info->surface_desc.sgt) + gk20a_free_sgtable(&ucode_info->surface_desc.sgt); + if (ucode_info->surface_desc.cpuva) + dma_free_attrs(d, ucode_info->surface_desc.size, + ucode_info->surface_desc.cpuva, + ucode_info->surface_desc.iova, + &attrs); + ucode_info->surface_desc.cpuva = NULL; + ucode_info->surface_desc.iova = 0; + + release_firmware(gpccs_fw); + gpccs_fw = NULL; + release_firmware(fecs_fw); + fecs_fw = NULL; + + return err; +} + +static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + int retries = 20; + phys_addr_t inst_ptr; + u32 val; + + while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & + gr_fecs_ctxsw_status_1_arb_busy_m()) && retries) { + udelay(2); + retries--; + } + if (!retries) + gk20a_err(dev_from_gk20a(g), "arbiter idle timeout"); + + gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); + + inst_ptr = ucode_info->inst_blk_desc.cpu_pa; + gk20a_writel(g, gr_fecs_new_ctx_r(), + gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | + gr_fecs_new_ctx_target_m() | + gr_fecs_new_ctx_valid_m()); + + gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), + gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | + gr_fecs_arb_ctx_ptr_target_m()); + + gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); + + /* Wait for arbiter command to complete */ + retries = 20; + val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); + while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) { + udelay(2); + retries--; + val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); + } + if (!retries) + gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); + + gk20a_writel(g, gr_fecs_current_ctx_r(), + gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) | + gr_fecs_current_ctx_target_m() | + gr_fecs_current_ctx_valid_m()); + /* Send command to arbiter to flush */ + gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s()); + + retries = 20; + val = (gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); + while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) { + udelay(2); + retries--; + val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); + } + if (!retries) + gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); +} + +static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) +{ + u32 addr_code32; + u32 addr_data32; + u32 addr_load32; + u32 dst = 0; + u32 blocks; + u32 b; + + addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); + addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); + addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); + + gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), + gr_fecs_dmactl_require_ctx_f(0)); + + /* + * Copy falcon bootloader header into dmem at offset 0. + * Configure dmem port 0 for auto-incrementing writes starting at dmem + * offset 0. + */ + gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0), + gr_fecs_dmemc_offs_f(0) | + gr_fecs_dmemc_blk_f(0) | + gr_fecs_dmemc_aincw_f(1)); + + /* Write out the actual data */ + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + + blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; + + /* + * Set the base FB address for the DMA transfer. Subtract off the 256 + * byte IMEM block offset such that the relative FB and IMEM offsets + * match, allowing the IMEM tags to be properly created. + */ + + dst = segments->boot_imem_offset; + gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), + (addr_load32 - (dst >> 8))); + + for (b = 0; b < blocks; b++) { + /* Setup destination IMEM offset */ + gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), + dst + (b << 8)); + + /* Setup source offset (relative to BASE) */ + gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), + dst + (b << 8)); + + gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), + gr_fecs_dmatrfcmd_imem_f(0x01) | + gr_fecs_dmatrfcmd_write_f(0x00) | + gr_fecs_dmatrfcmd_size_f(0x06) | + gr_fecs_dmatrfcmd_ctxdma_f(0)); + } + + /* Specify the falcon boot vector */ + gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), + gr_fecs_bootvec_vec_f(segments->boot_entry)); + + /* Write to CPUCTL to start the falcon */ + gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), + gr_fecs_cpuctl_startcpu_f(0x01)); + + return 0; +} + +static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 addr_base = ucode_info->ucode_gpuva; + + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); + + gr_gk20a_load_falcon_bind_instblk(g); + + gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, + &g->ctxsw_ucode_info.fecs, 0); + + gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, + &g->ctxsw_ucode_info.gpccs, + gr_gpcs_gpccs_falcon_hwcfg_r() - + gr_fecs_falcon_hwcfg_r()); +} + +static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 ret; + + gk20a_dbg_fn(""); + + if (tegra_platform_is_linsim()) { + gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), + gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); + gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), + gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); + } + + /* + * In case the gPMU falcon is not being used, revert to the old way of + * loading gr ucode, without the faster bootstrap routine. + */ + if (!support_gk20a_pmu()) { + gr_gk20a_load_falcon_dmem(g); + gr_gk20a_load_falcon_imem(g); + gr_gk20a_start_falcon_ucode(g); + } else { + if (!gr->skip_ucode_init) + gr_gk20a_init_ctxsw_ucode(g); + gr_gk20a_load_falcon_with_bootloader(g); + gr->skip_ucode_init = true; + } + + ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, + GR_IS_UCODE_OP_EQUAL, + eUcodeHandshakeInitComplete, + GR_IS_UCODE_OP_SKIP, 0); + if (ret) { + gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout"); + return ret; + } + + if (support_gk20a_pmu()) + gk20a_writel(g, gr_fecs_current_ctx_r(), + gr_fecs_current_ctx_valid_false_f()); + + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff); + gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff); + gk20a_writel(g, gr_fecs_method_push_r(), + gr_fecs_method_push_adr_set_watchdog_timeout_f()); + + gk20a_dbg_fn("done"); + return 0; +} + +static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 golden_ctx_image_size = 0; + u32 zcull_ctx_image_size = 0; + u32 pm_ctx_image_size = 0; + u32 ret; + struct fecs_method_op_gk20a op = { + .mailbox = { .id = 0, .data = 0, + .clr = ~0, .ok = 0, .fail = 0}, + .method.data = 0, + .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, + .cond.fail = GR_IS_UCODE_OP_SKIP, + }; + + gk20a_dbg_fn(""); + op.method.addr = gr_fecs_method_push_adr_discover_image_size_v(); + op.mailbox.ret = &golden_ctx_image_size; + ret = gr_gk20a_submit_fecs_method_op(g, op); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "query golden image size failed"); + return ret; + } + op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v(); + op.mailbox.ret = &zcull_ctx_image_size; + ret = gr_gk20a_submit_fecs_method_op(g, op); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "query zcull ctx image size failed"); + return ret; + } + op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v(); + op.mailbox.ret = &pm_ctx_image_size; + ret = gr_gk20a_submit_fecs_method_op(g, op); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "query pm ctx image size failed"); + return ret; + } + + if (!g->gr.ctx_vars.golden_image_size && + !g->gr.ctx_vars.zcull_ctxsw_image_size) { + g->gr.ctx_vars.golden_image_size = golden_ctx_image_size; + g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size; + } else { + /* hw is different after railgating? */ + BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size); + BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size); + } + + g->gr.ctx_vars.priv_access_map_size = 512 * 1024; + + gk20a_dbg_fn("done"); + return 0; +} + +static void gk20a_gr_destroy_ctx_buffer(struct platform_device *pdev, + struct gr_ctx_buffer_desc *desc) +{ + struct device *dev = &pdev->dev; + gk20a_free_sgtable(&desc->sgt); + dma_free_attrs(dev, desc->size, desc->pages, + desc->iova, &desc->attrs); +} + +static int gk20a_gr_alloc_ctx_buffer(struct platform_device *pdev, + struct gr_ctx_buffer_desc *desc, + size_t size) +{ + struct device *dev = &pdev->dev; + DEFINE_DMA_ATTRS(attrs); + dma_addr_t iova; + int err = 0; + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + + desc->pages = dma_alloc_attrs(&pdev->dev, size, &iova, + GFP_KERNEL, &attrs); + if (!desc->pages) + return -ENOMEM; + + desc->iova = iova; + desc->size = size; + desc->attrs = attrs; + desc->destroy = gk20a_gr_destroy_ctx_buffer; + err = gk20a_get_sgtable_from_pages(&pdev->dev, &desc->sgt, desc->pages, + desc->iova, desc->size); + if (err) { + dma_free_attrs(dev, desc->size, desc->pages, + desc->iova, &desc->attrs); + memset(desc, 0, sizeof(*desc)); + } + + return err; +} + +static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) +{ + struct gk20a_platform *platform = platform_get_drvdata(g->dev); + struct gr_gk20a *gr = &g->gr; + int i, attr_buffer_size, err; + struct platform_device *pdev = g->dev; + + u32 cb_buffer_size = gr->bundle_cb_default_size * + gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); + + u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() * + gr_scc_pagepool_total_pages_byte_granularity_v(); + + gk20a_dbg_fn(""); + + attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g); + + gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size); + + err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[CIRCULAR], + cb_buffer_size); + if (err) + goto clean_up; + + if (platform->secure_alloc) + platform->secure_alloc(pdev, + &gr->global_ctx_buffer[CIRCULAR_VPR], + cb_buffer_size); + + gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size); + + err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[PAGEPOOL], + pagepool_buffer_size); + if (err) + goto clean_up; + + if (platform->secure_alloc) + platform->secure_alloc(pdev, + &gr->global_ctx_buffer[PAGEPOOL_VPR], + pagepool_buffer_size); + + gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size); + + err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[ATTRIBUTE], + attr_buffer_size); + if (err) + goto clean_up; + + if (platform->secure_alloc) + platform->secure_alloc(pdev, + &gr->global_ctx_buffer[ATTRIBUTE_VPR], + attr_buffer_size); + + gk20a_dbg_info("golden_image_size : %d", + gr->ctx_vars.golden_image_size); + + err = gk20a_gr_alloc_ctx_buffer(pdev, + &gr->global_ctx_buffer[GOLDEN_CTX], + gr->ctx_vars.golden_image_size); + if (err) + goto clean_up; + + gk20a_dbg_info("priv_access_map_size : %d", + gr->ctx_vars.priv_access_map_size); + + err = gk20a_gr_alloc_ctx_buffer(pdev, + &gr->global_ctx_buffer[PRIV_ACCESS_MAP], + gr->ctx_vars.priv_access_map_size); + + if (err) + goto clean_up; + + gk20a_dbg_fn("done"); + return 0; + + clean_up: + gk20a_err(dev_from_gk20a(g), "fail"); + for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) { + if (gr->global_ctx_buffer[i].destroy) { + gr->global_ctx_buffer[i].destroy(pdev, + &gr->global_ctx_buffer[i]); + } + } + return -ENOMEM; +} + +static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g) +{ + struct platform_device *pdev = g->dev; + struct gr_gk20a *gr = &g->gr; + DEFINE_DMA_ATTRS(attrs); + u32 i; + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + + for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) { + gr->global_ctx_buffer[i].destroy(pdev, + &gr->global_ctx_buffer[i]); + } + + gk20a_dbg_fn("done"); +} + +static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, + struct channel_gk20a *c) +{ + struct vm_gk20a *ch_vm = c->vm; + u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; + u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + struct gr_gk20a *gr = &g->gr; + struct sg_table *sgt; + u64 size; + u64 gpu_va; + u32 i; + gk20a_dbg_fn(""); + + /* Circular Buffer */ + if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].sgt == NULL)) { + sgt = gr->global_ctx_buffer[CIRCULAR].sgt; + size = gr->global_ctx_buffer[CIRCULAR].size; + } else { + sgt = gr->global_ctx_buffer[CIRCULAR_VPR].sgt; + size = gr->global_ctx_buffer[CIRCULAR_VPR].size; + } + + gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none); + if (!gpu_va) + goto clean_up; + g_bfr_va[CIRCULAR_VA] = gpu_va; + g_bfr_size[CIRCULAR_VA] = size; + + /* Attribute Buffer */ + if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt == NULL)) { + sgt = gr->global_ctx_buffer[ATTRIBUTE].sgt; + size = gr->global_ctx_buffer[ATTRIBUTE].size; + } else { + sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt; + size = gr->global_ctx_buffer[ATTRIBUTE_VPR].size; + } + + gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none); + if (!gpu_va) + goto clean_up; + g_bfr_va[ATTRIBUTE_VA] = gpu_va; + g_bfr_size[ATTRIBUTE_VA] = size; + + /* Page Pool */ + if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].sgt == NULL)) { + sgt = gr->global_ctx_buffer[PAGEPOOL].sgt; + size = gr->global_ctx_buffer[PAGEPOOL].size; + } else { + sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].sgt; + size = gr->global_ctx_buffer[PAGEPOOL_VPR].size; + } + + gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none); + if (!gpu_va) + goto clean_up; + g_bfr_va[PAGEPOOL_VA] = gpu_va; + g_bfr_size[PAGEPOOL_VA] = size; + + /* Golden Image */ + sgt = gr->global_ctx_buffer[GOLDEN_CTX].sgt; + size = gr->global_ctx_buffer[GOLDEN_CTX].size; + gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, + gk20a_mem_flag_none); + if (!gpu_va) + goto clean_up; + g_bfr_va[GOLDEN_CTX_VA] = gpu_va; + g_bfr_size[GOLDEN_CTX_VA] = size; + + /* Priv register Access Map */ + sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].sgt; + size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].size; + gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, + gk20a_mem_flag_none); + if (!gpu_va) + goto clean_up; + g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; + g_bfr_size[PRIV_ACCESS_MAP_VA] = size; + + c->ch_ctx.global_ctx_buffer_mapped = true; + return 0; + + clean_up: + for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { + if (g_bfr_va[i]) { + gk20a_gmmu_unmap(ch_vm, g_bfr_va[i], + gr->global_ctx_buffer[i].size, + gk20a_mem_flag_none); + g_bfr_va[i] = 0; + } + } + return -ENOMEM; +} + +static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) +{ + struct vm_gk20a *ch_vm = c->vm; + u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; + u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + u32 i; + + gk20a_dbg_fn(""); + + for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { + if (g_bfr_va[i]) { + gk20a_gmmu_unmap(ch_vm, g_bfr_va[i], + g_bfr_size[i], + gk20a_mem_flag_none); + g_bfr_va[i] = 0; + g_bfr_size[i] = 0; + } + } + c->ch_ctx.global_ctx_buffer_mapped = false; +} + +static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gr_gk20a *gr = &g->gr; + struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx; + struct vm_gk20a *ch_vm = c->vm; + struct device *d = dev_from_gk20a(g); + struct sg_table *sgt; + DEFINE_DMA_ATTRS(attrs); + int err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (gr->ctx_vars.buffer_size == 0) + return 0; + + /* alloc channel gr ctx buffer */ + gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; + gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; + + gr_ctx->size = gr->ctx_vars.buffer_total_size; + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size, + &iova, GFP_KERNEL, &attrs); + if (!gr_ctx->pages) + return -ENOMEM; + + gr_ctx->iova = iova; + err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages, + gr_ctx->iova, gr_ctx->size); + if (err) + goto err_free; + + gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size, + NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none); + if (!gr_ctx->gpu_va) + goto err_free_sgt; + + gk20a_free_sgtable(&sgt); + + return 0; + + err_free_sgt: + gk20a_free_sgtable(&sgt); + err_free: + dma_free_attrs(d, gr_ctx->size, + gr_ctx->pages, gr_ctx->iova, &attrs); + gr_ctx->pages = NULL; + gr_ctx->iova = 0; + + return err; +} + +static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) +{ + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct vm_gk20a *ch_vm = c->vm; + struct gk20a *g = c->g; + struct device *d = dev_from_gk20a(g); + DEFINE_DMA_ATTRS(attrs); + + gk20a_dbg_fn(""); + + if (!ch_ctx->gr_ctx.gpu_va) + return; + + gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va, + ch_ctx->gr_ctx.size, gk20a_mem_flag_none); + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + dma_free_attrs(d, ch_ctx->gr_ctx.size, + ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs); + ch_ctx->gr_ctx.pages = NULL; + ch_ctx->gr_ctx.iova = 0; +} + +static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, + struct channel_gk20a *c) +{ + struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct device *d = dev_from_gk20a(g); + struct vm_gk20a *ch_vm = c->vm; + DEFINE_DMA_ATTRS(attrs); + struct sg_table *sgt; + int err = 0; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + patch_ctx->size = 128 * sizeof(u32); + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size, + &iova, GFP_KERNEL, + &attrs); + if (!patch_ctx->pages) + return -ENOMEM; + + patch_ctx->iova = iova; + err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages, + patch_ctx->iova, patch_ctx->size); + if (err) + goto err_free; + + patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size, + 0, gk20a_mem_flag_none); + if (!patch_ctx->gpu_va) + goto err_free_sgtable; + + gk20a_free_sgtable(&sgt); + + gk20a_dbg_fn("done"); + return 0; + + err_free_sgtable: + gk20a_free_sgtable(&sgt); + err_free: + dma_free_attrs(d, patch_ctx->size, + patch_ctx->pages, patch_ctx->iova, &attrs); + patch_ctx->pages = NULL; + patch_ctx->iova = 0; + gk20a_err(dev_from_gk20a(g), "fail"); + return err; +} + +static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c) +{ + struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct vm_gk20a *ch_vm = c->vm; + + gk20a_dbg_fn(""); + + if (patch_ctx->gpu_va) + gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va, + patch_ctx->size, gk20a_mem_flag_none); + patch_ctx->gpu_va = 0; + patch_ctx->data_count = 0; +} + +static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) +{ + struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct gk20a *g = c->g; + struct device *d = dev_from_gk20a(g); + DEFINE_DMA_ATTRS(attrs); + + gk20a_dbg_fn(""); + + gr_gk20a_unmap_channel_patch_ctx(c); + + if (patch_ctx->pages) { + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + dma_free_attrs(d, patch_ctx->size, + patch_ctx->pages, patch_ctx->iova, &attrs); + patch_ctx->pages = NULL; + patch_ctx->iova = 0; + } +} + +void gk20a_free_channel_ctx(struct channel_gk20a *c) +{ + gr_gk20a_unmap_global_ctx_buffers(c); + gr_gk20a_free_channel_patch_ctx(c); + gr_gk20a_free_channel_gr_ctx(c); + + /* zcull_ctx, pm_ctx */ + + memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); + + c->num_objects = 0; + c->first_init = false; +} + +static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) +{ + bool valid = false; + + switch (class_num) { + case KEPLER_COMPUTE_A: + case KEPLER_C: + case FERMI_TWOD_A: + case KEPLER_DMA_COPY_A: + valid = true; + break; + + default: + break; + } + + return valid; +} + +int gk20a_alloc_obj_ctx(struct channel_gk20a *c, + struct nvhost_alloc_obj_ctx_args *args) +{ + struct gk20a *g = c->g; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + int err = 0; + + gk20a_dbg_fn(""); + + /* an address space needs to have been bound at this point.*/ + if (!gk20a_channel_as_bound(c)) { + gk20a_err(dev_from_gk20a(g), + "not bound to address space at time" + " of grctx allocation"); + return -EINVAL; + } + + if (!g->ops.gr.is_valid_class(g, args->class_num)) { + gk20a_err(dev_from_gk20a(g), + "invalid obj class 0x%x", args->class_num); + err = -EINVAL; + goto out; + } + + /* allocate gr ctx buffer */ + if (ch_ctx->gr_ctx.pages == NULL) { + err = gr_gk20a_alloc_channel_gr_ctx(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate gr ctx buffer"); + goto out; + } + c->obj_class = args->class_num; + } else { + /*TBD: needs to be more subtle about which is being allocated + * as some are allowed to be allocated along same channel */ + gk20a_err(dev_from_gk20a(g), + "too many classes alloc'd on same channel"); + err = -EINVAL; + goto out; + } + + /* commit gr ctx buffer */ + err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to commit gr ctx buffer"); + goto out; + } + + /* allocate patch buffer */ + if (ch_ctx->patch_ctx.pages == NULL) { + err = gr_gk20a_alloc_channel_patch_ctx(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate patch buffer"); + goto out; + } + } + + /* map global buffer to channel gpu_va and commit */ + if (!ch_ctx->global_ctx_buffer_mapped) { + err = gr_gk20a_map_global_ctx_buffers(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to map global ctx buffer"); + goto out; + } + gr_gk20a_elpg_protected_call(g, + gr_gk20a_commit_global_ctx_buffers(g, c, true)); + } + + /* init golden image, ELPG enabled after this is done */ + err = gr_gk20a_init_golden_ctx_image(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to init golden ctx image"); + goto out; + } + + /* load golden image */ + if (!c->first_init) { + err = gr_gk20a_elpg_protected_call(g, + gr_gk20a_load_golden_ctx_image(g, c)); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to load golden ctx image"); + goto out; + } + c->first_init = true; + } + gk20a_mm_l2_invalidate(g); + + c->num_objects++; + + gk20a_dbg_fn("done"); + return 0; +out: + /* 1. gr_ctx, patch_ctx and global ctx buffer mapping + can be reused so no need to release them. + 2. golden image init and load is a one time thing so if + they pass, no need to undo. */ + gk20a_err(dev_from_gk20a(g), "fail"); + return err; +} + +int gk20a_free_obj_ctx(struct channel_gk20a *c, + struct nvhost_free_obj_ctx_args *args) +{ + unsigned long timeout = gk20a_get_gr_idle_timeout(c->g); + + gk20a_dbg_fn(""); + + if (c->num_objects == 0) + return 0; + + c->num_objects--; + + if (c->num_objects == 0) { + c->first_init = false; + gk20a_disable_channel(c, + !c->has_timedout, + timeout); + gr_gk20a_unmap_channel_patch_ctx(c); + } + + return 0; +} + +static void gk20a_remove_gr_support(struct gr_gk20a *gr) +{ + struct gk20a *g = gr->g; + struct device *d = dev_from_gk20a(g); + DEFINE_DMA_ATTRS(attrs); + + gk20a_dbg_fn(""); + + gr_gk20a_free_global_ctx_buffers(g); + + dma_free_coherent(d, gr->mmu_wr_mem.size, + gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); + gr->mmu_wr_mem.cpuva = NULL; + gr->mmu_wr_mem.iova = 0; + dma_free_coherent(d, gr->mmu_rd_mem.size, + gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova); + gr->mmu_rd_mem.cpuva = NULL; + gr->mmu_rd_mem.iova = 0; + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, + gr->compbit_store.base_iova, &attrs); + + memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc)); + memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc)); + memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); + + kfree(gr->gpc_tpc_count); + kfree(gr->gpc_zcb_count); + kfree(gr->gpc_ppc_count); + kfree(gr->pes_tpc_count[0]); + kfree(gr->pes_tpc_count[1]); + kfree(gr->pes_tpc_mask[0]); + kfree(gr->pes_tpc_mask[1]); + kfree(gr->gpc_skip_mask); + kfree(gr->map_tiles); + gr->gpc_tpc_count = NULL; + gr->gpc_zcb_count = NULL; + gr->gpc_ppc_count = NULL; + gr->pes_tpc_count[0] = NULL; + gr->pes_tpc_count[1] = NULL; + gr->pes_tpc_mask[0] = NULL; + gr->pes_tpc_mask[1] = NULL; + gr->gpc_skip_mask = NULL; + gr->map_tiles = NULL; + + kfree(gr->ctx_vars.ucode.fecs.inst.l); + kfree(gr->ctx_vars.ucode.fecs.data.l); + kfree(gr->ctx_vars.ucode.gpccs.inst.l); + kfree(gr->ctx_vars.ucode.gpccs.data.l); + kfree(gr->ctx_vars.sw_bundle_init.l); + kfree(gr->ctx_vars.sw_method_init.l); + kfree(gr->ctx_vars.sw_ctx_load.l); + kfree(gr->ctx_vars.sw_non_ctx_load.l); + kfree(gr->ctx_vars.ctxsw_regs.sys.l); + kfree(gr->ctx_vars.ctxsw_regs.gpc.l); + kfree(gr->ctx_vars.ctxsw_regs.tpc.l); + kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l); + kfree(gr->ctx_vars.ctxsw_regs.ppc.l); + kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l); + kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l); + kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l); + + kfree(gr->ctx_vars.local_golden_image); + gr->ctx_vars.local_golden_image = NULL; + + gk20a_allocator_destroy(&gr->comp_tags); +} + +static void gr_gk20a_bundle_cb_defaults(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + + gr->bundle_cb_default_size = + gr_scc_bundle_cb_size_div_256b__prod_v(); + gr->min_gpm_fifo_depth = + gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); + gr->bundle_cb_token_limit = + gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + +static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 gpc_index, pes_index; + u32 pes_tpc_mask; + u32 pes_tpc_count; + u32 pes_heavy_index; + u32 gpc_new_skip_mask; + u32 tmp; + + tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); + gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); + + tmp = gk20a_readl(g, top_num_gpcs_r()); + gr->max_gpc_count = top_num_gpcs_value_v(tmp); + + tmp = gk20a_readl(g, top_num_fbps_r()); + gr->max_fbps_count = top_num_fbps_value_v(tmp); + + tmp = gk20a_readl(g, top_tpc_per_gpc_r()); + gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); + + gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count; + + tmp = gk20a_readl(g, top_num_fbps_r()); + gr->sys_count = top_num_fbps_value_v(tmp); + + tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); + gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); + + gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); + gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v(); + + if (!gr->gpc_count) { + gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); + goto clean_up; + } + + gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + gr->gpc_skip_mask = + kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), + GFP_KERNEL); + + if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count || + !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] || + !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask) + goto clean_up; + + gr->ppc_count = 0; + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r()); + + gr->gpc_tpc_count[gpc_index] = + gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); + gr->tpc_count += gr->gpc_tpc_count[gpc_index]; + + gr->gpc_zcb_count[gpc_index] = + gr_gpc0_fs_gpc_num_available_zculls_v(tmp); + gr->zcb_count += gr->gpc_zcb_count[gpc_index]; + + gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc; + gr->ppc_count += gr->gpc_ppc_count[gpc_index]; + for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { + + tmp = gk20a_readl(g, + gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + + gpc_index * proj_gpc_stride_v()); + + pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); + pes_tpc_count = count_bits(pes_tpc_mask); + + gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; + gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; + } + + gpc_new_skip_mask = 0; + if (gr->pes_tpc_count[0][gpc_index] + + gr->pes_tpc_count[1][gpc_index] == 5) { + pes_heavy_index = + gr->pes_tpc_count[0][gpc_index] > + gr->pes_tpc_count[1][gpc_index] ? 0 : 1; + + gpc_new_skip_mask = + gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ + (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & + (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); + + } else if ((gr->pes_tpc_count[0][gpc_index] + + gr->pes_tpc_count[1][gpc_index] == 4) && + (gr->pes_tpc_count[0][gpc_index] != + gr->pes_tpc_count[1][gpc_index])) { + pes_heavy_index = + gr->pes_tpc_count[0][gpc_index] > + gr->pes_tpc_count[1][gpc_index] ? 0 : 1; + + gpc_new_skip_mask = + gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^ + (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & + (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); + } + gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; + } + + gk20a_dbg_info("fbps: %d", gr->num_fbps); + gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count); + gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count); + gk20a_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count); + gk20a_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count); + gk20a_dbg_info("max_tpc_count: %d", gr->max_tpc_count); + gk20a_dbg_info("sys_count: %d", gr->sys_count); + gk20a_dbg_info("gpc_count: %d", gr->gpc_count); + gk20a_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc); + gk20a_dbg_info("tpc_count: %d", gr->tpc_count); + gk20a_dbg_info("ppc_count: %d", gr->ppc_count); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + gk20a_dbg_info("gpc_tpc_count[%d] : %d", + gpc_index, gr->gpc_tpc_count[gpc_index]); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + gk20a_dbg_info("gpc_zcb_count[%d] : %d", + gpc_index, gr->gpc_zcb_count[gpc_index]); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + gk20a_dbg_info("gpc_ppc_count[%d] : %d", + gpc_index, gr->gpc_ppc_count[gpc_index]); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + gk20a_dbg_info("gpc_skip_mask[%d] : %d", + gpc_index, gr->gpc_skip_mask[gpc_index]); + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + for (pes_index = 0; + pes_index < gr->pe_count_per_gpc; + pes_index++) + gk20a_dbg_info("pes_tpc_count[%d][%d] : %d", + pes_index, gpc_index, + gr->pes_tpc_count[pes_index][gpc_index]); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + for (pes_index = 0; + pes_index < gr->pe_count_per_gpc; + pes_index++) + gk20a_dbg_info("pes_tpc_mask[%d][%d] : %d", + pes_index, gpc_index, + gr->pes_tpc_mask[pes_index][gpc_index]); + + g->ops.gr.bundle_cb_defaults(g); + g->ops.gr.cb_size_default(g); + g->ops.gr.calc_global_ctx_buffer_size(g); + gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(); + + gk20a_dbg_info("bundle_cb_default_size: %d", + gr->bundle_cb_default_size); + gk20a_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth); + gk20a_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit); + gk20a_dbg_info("attrib_cb_default_size: %d", + gr->attrib_cb_default_size); + gk20a_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size); + gk20a_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size); + gk20a_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size); + gk20a_dbg_info("timeslice_mode: %d", gr->timeslice_mode); + + return 0; + +clean_up: + return -ENOMEM; +} + +static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) +{ + struct device *d = dev_from_gk20a(g); + dma_addr_t iova; + + gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000; + + gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; + gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, + &iova, GFP_KERNEL); + if (!gr->mmu_wr_mem.cpuva) + goto err; + + gr->mmu_wr_mem.iova = iova; + + gr->mmu_rd_mem.size = gr->mmu_rd_mem_size; + gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size, + &iova, GFP_KERNEL); + if (!gr->mmu_rd_mem.cpuva) + goto err_free_wr_mem; + + gr->mmu_rd_mem.iova = iova; + return 0; + + err_free_wr_mem: + dma_free_coherent(d, gr->mmu_wr_mem.size, + gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); + gr->mmu_wr_mem.cpuva = NULL; + gr->mmu_wr_mem.iova = 0; + err: + return -ENOMEM; +} + +static u32 prime_set[18] = { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 }; + +static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) +{ + s32 comm_denom; + s32 mul_factor; + s32 *init_frac = NULL; + s32 *init_err = NULL; + s32 *run_err = NULL; + s32 *sorted_num_tpcs = NULL; + s32 *sorted_to_unsorted_gpc_map = NULL; + u32 gpc_index; + u32 gpc_mark = 0; + u32 num_tpc; + u32 max_tpc_count = 0; + u32 swap; + u32 tile_count; + u32 index; + bool delete_map = false; + bool gpc_sorted; + int ret = 0; + + init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); + init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); + run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); + sorted_num_tpcs = + kzalloc(proj_scal_max_gpcs_v() * + proj_scal_max_tpc_per_gpc_v() * sizeof(s32), + GFP_KERNEL); + sorted_to_unsorted_gpc_map = + kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); + + if (!(init_frac && init_err && run_err && sorted_num_tpcs && + sorted_to_unsorted_gpc_map)) { + ret = -ENOMEM; + goto clean_up; + } + + gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET; + + if (gr->tpc_count == 3) + gr->map_row_offset = 2; + else if (gr->tpc_count < 3) + gr->map_row_offset = 1; + else { + gr->map_row_offset = 3; + + for (index = 1; index < 18; index++) { + u32 prime = prime_set[index]; + if ((gr->tpc_count % prime) != 0) { + gr->map_row_offset = prime; + break; + } + } + } + + switch (gr->tpc_count) { + case 15: + gr->map_row_offset = 6; + break; + case 14: + gr->map_row_offset = 5; + break; + case 13: + gr->map_row_offset = 2; + break; + case 11: + gr->map_row_offset = 7; + break; + case 10: + gr->map_row_offset = 6; + break; + case 7: + case 5: + gr->map_row_offset = 1; + break; + default: + break; + } + + if (gr->map_tiles) { + if (gr->map_tile_count != gr->tpc_count) + delete_map = true; + + for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { + if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count) + delete_map = true; + } + + if (delete_map) { + kfree(gr->map_tiles); + gr->map_tiles = NULL; + gr->map_tile_count = 0; + } + } + + if (gr->map_tiles == NULL) { + gr->map_tile_count = proj_scal_max_gpcs_v(); + + gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL); + if (gr->map_tiles == NULL) { + ret = -ENOMEM; + goto clean_up; + } + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = gpc_index; + } + + gpc_sorted = false; + while (!gpc_sorted) { + gpc_sorted = true; + for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) { + if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) { + gpc_sorted = false; + swap = sorted_num_tpcs[gpc_index]; + sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1]; + sorted_num_tpcs[gpc_index + 1] = swap; + swap = sorted_to_unsorted_gpc_map[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = + sorted_to_unsorted_gpc_map[gpc_index + 1]; + sorted_to_unsorted_gpc_map[gpc_index + 1] = swap; + } + } + } + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) + if (gr->gpc_tpc_count[gpc_index] > max_tpc_count) + max_tpc_count = gr->gpc_tpc_count[gpc_index]; + + mul_factor = gr->gpc_count * max_tpc_count; + if (mul_factor & 0x1) + mul_factor = 2; + else + mul_factor = 1; + + comm_denom = gr->gpc_count * max_tpc_count * mul_factor; + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + num_tpc = sorted_num_tpcs[gpc_index]; + + init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor; + + if (num_tpc != 0) + init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2; + else + init_err[gpc_index] = 0; + + run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; + } + + while (gpc_mark < gr->tpc_count) { + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + if ((run_err[gpc_index] * 2) >= comm_denom) { + gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; + run_err[gpc_index] += init_frac[gpc_index] - comm_denom; + } else + run_err[gpc_index] += init_frac[gpc_index]; + } + } + } + +clean_up: + kfree(init_frac); + kfree(init_err); + kfree(run_err); + kfree(sorted_num_tpcs); + kfree(sorted_to_unsorted_gpc_map); + + if (ret) + gk20a_err(dev_from_gk20a(g), "fail"); + else + gk20a_dbg_fn("done"); + + return ret; +} + +static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr) +{ + struct gr_zcull_gk20a *zcull = &gr->zcull; + + zcull->aliquot_width = gr->tpc_count * 16; + zcull->aliquot_height = 16; + + zcull->width_align_pixels = gr->tpc_count * 16; + zcull->height_align_pixels = 32; + + zcull->aliquot_size = + zcull->aliquot_width * zcull->aliquot_height; + + /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ + zcull->pixel_squares_by_aliquots = + gr->zcb_count * 16 * 16 * gr->tpc_count / + (gr->gpc_count * gr->gpc_tpc_count[0]); + + zcull->total_aliquots = + gr_gpc0_zcull_total_ram_size_num_aliquots_f( + gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r())); + + return 0; +} + +u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) +{ + /* assuming gr has already been initialized */ + return gr->ctx_vars.zcull_ctxsw_image_size; +} + +int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, + struct channel_gk20a *c, u64 zcull_va, u32 mode) +{ + struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; + + zcull_ctx->ctx_sw_mode = mode; + zcull_ctx->gpu_va = zcull_va; + + /* TBD: don't disable channel in sw method processing */ + return gr_gk20a_ctx_zcull_setup(g, c, true); +} + +int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, + struct gr_zcull_info *zcull_params) +{ + struct gr_zcull_gk20a *zcull = &gr->zcull; + + zcull_params->width_align_pixels = zcull->width_align_pixels; + zcull_params->height_align_pixels = zcull->height_align_pixels; + zcull_params->pixel_squares_by_aliquots = + zcull->pixel_squares_by_aliquots; + zcull_params->aliquot_total = zcull->total_aliquots; + + zcull_params->region_byte_multiplier = + gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); + zcull_params->region_header_size = + proj_scal_litter_num_gpcs_v() * + gr_zcull_save_restore_header_bytes_per_gpc_v(); + + zcull_params->subregion_header_size = + proj_scal_litter_num_gpcs_v() * + gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); + + zcull_params->subregion_width_align_pixels = + gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); + zcull_params->subregion_height_align_pixels = + gr_gpc0_zcull_zcsize_height_subregion__multiple_v(); + zcull_params->subregion_count = gr_zcull_subregion_qty_v(); + + return 0; +} + +static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *color_val, u32 index) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; + u32 i; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 ret; + + ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to disable gr engine activity\n"); + return ret; + } + + ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to idle graphics\n"); + goto clean_up; + } + + /* update l2 table */ + g->ops.ltc.set_zbc_color_entry(g, color_val, index); + + /* update ds table */ + gk20a_writel(g, gr_ds_zbc_color_r_r(), + gr_ds_zbc_color_r_val_f(color_val->color_ds[0])); + gk20a_writel(g, gr_ds_zbc_color_g_r(), + gr_ds_zbc_color_g_val_f(color_val->color_ds[1])); + gk20a_writel(g, gr_ds_zbc_color_b_r(), + gr_ds_zbc_color_b_val_f(color_val->color_ds[2])); + gk20a_writel(g, gr_ds_zbc_color_a_r(), + gr_ds_zbc_color_a_val_f(color_val->color_ds[3])); + + gk20a_writel(g, gr_ds_zbc_color_fmt_r(), + gr_ds_zbc_color_fmt_val_f(color_val->format)); + + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_c_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* update local copy */ + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i]; + gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i]; + } + gr->zbc_col_tbl[index].format = color_val->format; + gr->zbc_col_tbl[index].ref_cnt++; + +clean_up: + ret = gk20a_fifo_enable_engine_activity(g, gr_info); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to enable gr engine activity\n"); + } + + return ret; +} + +static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *depth_val, u32 index) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 ret; + + ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to disable gr engine activity\n"); + return ret; + } + + ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to idle graphics\n"); + goto clean_up; + } + + /* update l2 table */ + g->ops.ltc.set_zbc_depth_entry(g, depth_val, index); + + /* update ds table */ + gk20a_writel(g, gr_ds_zbc_z_r(), + gr_ds_zbc_z_val_f(depth_val->depth)); + + gk20a_writel(g, gr_ds_zbc_z_fmt_r(), + gr_ds_zbc_z_fmt_val_f(depth_val->format)); + + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_z_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* update local copy */ + gr->zbc_dep_tbl[index].depth = depth_val->depth; + gr->zbc_dep_tbl[index].format = depth_val->format; + gr->zbc_dep_tbl[index].ref_cnt++; + +clean_up: + ret = gk20a_fifo_enable_engine_activity(g, gr_info); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to enable gr engine activity\n"); + } + + return ret; +} + +int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *zbc_val) +{ + struct zbc_color_table *c_tbl; + struct zbc_depth_table *d_tbl; + u32 i, ret = -ENOMEM; + bool added = false; + u32 entries; + + /* no endian swap ? */ + + switch (zbc_val->type) { + case GK20A_ZBC_TYPE_COLOR: + /* search existing tables */ + for (i = 0; i < gr->max_used_color_index; i++) { + + c_tbl = &gr->zbc_col_tbl[i]; + + if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format && + memcmp(c_tbl->color_ds, zbc_val->color_ds, + sizeof(zbc_val->color_ds)) == 0) { + + if (memcmp(c_tbl->color_l2, zbc_val->color_l2, + sizeof(zbc_val->color_l2))) { + gk20a_err(dev_from_gk20a(g), + "zbc l2 and ds color don't match with existing entries"); + return -EINVAL; + } + added = true; + c_tbl->ref_cnt++; + ret = 0; + break; + } + } + /* add new table */ + if (!added && + gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) { + + c_tbl = + &gr->zbc_col_tbl[gr->max_used_color_index]; + WARN_ON(c_tbl->ref_cnt != 0); + + ret = gr_gk20a_add_zbc_color(g, gr, + zbc_val, gr->max_used_color_index); + + if (!ret) + gr->max_used_color_index++; + } + break; + case GK20A_ZBC_TYPE_DEPTH: + /* search existing tables */ + for (i = 0; i < gr->max_used_depth_index; i++) { + + d_tbl = &gr->zbc_dep_tbl[i]; + + if (d_tbl->ref_cnt && + d_tbl->depth == zbc_val->depth && + d_tbl->format == zbc_val->format) { + added = true; + d_tbl->ref_cnt++; + ret = 0; + break; + } + } + /* add new table */ + if (!added && + gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) { + + d_tbl = + &gr->zbc_dep_tbl[gr->max_used_depth_index]; + WARN_ON(d_tbl->ref_cnt != 0); + + ret = gr_gk20a_add_zbc_depth(g, gr, + zbc_val, gr->max_used_depth_index); + + if (!ret) + gr->max_used_depth_index++; + } + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid zbc table type %d", zbc_val->type); + return -EINVAL; + } + + if (!added && ret == 0) { + /* update zbc for elpg only when new entry is added */ + entries = max(gr->max_used_color_index, + gr->max_used_depth_index); + gk20a_pmu_save_zbc(g, entries); + } + + return ret; +} + +int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; + u32 i, j; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 ret; + + ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to disable gr engine activity\n"); + return ret; + } + + ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to idle graphics\n"); + goto clean_up; + } + + for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) { + gr->zbc_col_tbl[i].format = 0; + gr->zbc_col_tbl[i].ref_cnt = 0; + + gk20a_writel(g, gr_ds_zbc_color_fmt_r(), + gr_ds_zbc_color_fmt_val_invalid_f()); + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_c_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* clear l2 table */ + g->ops.ltc.clear_zbc_color_entry(g, i); + + for (j = 0; j < GK20A_ZBC_COLOR_VALUE_SIZE; j++) { + gr->zbc_col_tbl[i].color_l2[j] = 0; + gr->zbc_col_tbl[i].color_ds[j] = 0; + } + } + gr->max_used_color_index = 0; + gr->max_default_color_index = 0; + + for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) { + gr->zbc_dep_tbl[i].depth = 0; + gr->zbc_dep_tbl[i].format = 0; + gr->zbc_dep_tbl[i].ref_cnt = 0; + + gk20a_writel(g, gr_ds_zbc_z_fmt_r(), + gr_ds_zbc_z_fmt_val_invalid_f()); + gk20a_writel(g, gr_ds_zbc_tbl_index_r(), + gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE)); + + /* trigger the write */ + gk20a_writel(g, gr_ds_zbc_tbl_ld_r(), + gr_ds_zbc_tbl_ld_select_z_f() | + gr_ds_zbc_tbl_ld_action_write_f() | + gr_ds_zbc_tbl_ld_trigger_active_f()); + + /* clear l2 table */ + g->ops.ltc.clear_zbc_depth_entry(g, i); + } + gr->max_used_depth_index = 0; + gr->max_default_depth_index = 0; + +clean_up: + ret = gk20a_fifo_enable_engine_activity(g, gr_info); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to enable gr engine activity\n"); + } + + /* elpg stuff */ + + return ret; +} + +/* get a zbc table entry specified by index + * return table size when type is invalid */ +int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_query_params *query_params) +{ + u32 index = query_params->index_size; + u32 i; + + switch (query_params->type) { + case GK20A_ZBC_TYPE_INVALID: + query_params->index_size = GK20A_ZBC_TABLE_SIZE; + break; + case GK20A_ZBC_TYPE_COLOR: + if (index >= GK20A_ZBC_TABLE_SIZE) { + gk20a_err(dev_from_gk20a(g), + "invalid zbc color table index\n"); + return -EINVAL; + } + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + query_params->color_l2[i] = + gr->zbc_col_tbl[index].color_l2[i]; + query_params->color_ds[i] = + gr->zbc_col_tbl[index].color_ds[i]; + } + query_params->format = gr->zbc_col_tbl[index].format; + query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt; + break; + case GK20A_ZBC_TYPE_DEPTH: + if (index >= GK20A_ZBC_TABLE_SIZE) { + gk20a_err(dev_from_gk20a(g), + "invalid zbc depth table index\n"); + return -EINVAL; + } + query_params->depth = gr->zbc_dep_tbl[index].depth; + query_params->format = gr->zbc_dep_tbl[index].format; + query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt; + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid zbc table type\n"); + return -EINVAL; + } + + return 0; +} + +int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr) +{ + struct zbc_entry zbc_val; + u32 i, err; + + /* load default color table */ + zbc_val.type = GK20A_ZBC_TYPE_COLOR; + + zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v(); + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val.color_ds[i] = 0; + zbc_val.color_l2[i] = 0; + } + err = gr_gk20a_add_zbc(g, gr, &zbc_val); + + zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v(); + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val.color_ds[i] = 0xffffffff; + zbc_val.color_l2[i] = 0x3f800000; + } + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); + + zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(); + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val.color_ds[i] = 0; + zbc_val.color_l2[i] = 0; + } + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); + + zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(); + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val.color_ds[i] = 0x3f800000; + zbc_val.color_l2[i] = 0x3f800000; + } + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); + + if (!err) + gr->max_default_color_index = 4; + else { + gk20a_err(dev_from_gk20a(g), + "fail to load default zbc color table\n"); + return err; + } + + /* load default depth table */ + zbc_val.type = GK20A_ZBC_TYPE_DEPTH; + + zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v(); + zbc_val.depth = 0; + err = gr_gk20a_add_zbc(g, gr, &zbc_val); + + zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v(); + zbc_val.depth = 0x3f800000; + err |= gr_gk20a_add_zbc(g, gr, &zbc_val); + + if (!err) + gr->max_default_depth_index = 2; + else { + gk20a_err(dev_from_gk20a(g), + "fail to load default zbc depth table\n"); + return err; + } + + return 0; +} + +int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *zbc_val) +{ + gk20a_dbg_fn(""); + + return gr_gk20a_elpg_protected_call(g, + gr_gk20a_add_zbc(g, gr, zbc_val)); +} + +void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine) +{ + u32 gate_ctrl; + + gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); + + switch (mode) { + case BLCG_RUN: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_blk_clk_m(), + therm_gate_ctrl_blk_clk_run_f()); + break; + case BLCG_AUTO: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_blk_clk_m(), + therm_gate_ctrl_blk_clk_auto_f()); + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid blcg mode %d", mode); + return; + } + + gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); +} + +void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) +{ + u32 gate_ctrl, idle_filter; + + gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine)); + + switch (mode) { + case ELCG_RUN: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_run_f()); + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_pwr_m(), + /* set elpg to auto to meet hw expectation */ + therm_gate_ctrl_eng_pwr_auto_f()); + break; + case ELCG_STOP: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_stop_f()); + break; + case ELCG_AUTO: + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_clk_m(), + therm_gate_ctrl_eng_clk_auto_f()); + break; + default: + gk20a_err(dev_from_gk20a(g), + "invalid elcg mode %d", mode); + } + + if (tegra_platform_is_linsim()) { + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_delay_after_m(), + therm_gate_ctrl_eng_delay_after_f(4)); + } + + /* 2 * (1 << 9) = 1024 clks */ + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_idle_filt_exp_m(), + therm_gate_ctrl_eng_idle_filt_exp_f(9)); + gate_ctrl = set_field(gate_ctrl, + therm_gate_ctrl_eng_idle_filt_mant_m(), + therm_gate_ctrl_eng_idle_filt_mant_f(2)); + gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl); + + /* default fecs_idle_filter to 0 */ + idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r()); + idle_filter &= ~therm_fecs_idle_filter_value_m(); + gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter); + /* default hubmmu_idle_filter to 0 */ + idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r()); + idle_filter &= ~therm_hubmmu_idle_filter_value_m(); + gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter); +} + +static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 gpc_index, gpc_tpc_count, gpc_zcull_count; + u32 *zcull_map_tiles, *zcull_bank_counters; + u32 map_counter; + u32 rcp_conserv; + u32 offset; + bool floorsweep = false; + + if (!gr->map_tiles) + return -1; + + zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() * + proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); + if (!zcull_map_tiles) { + gk20a_err(dev_from_gk20a(g), + "failed to allocate zcull temp buffers"); + return -ENOMEM; + } + zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() * + proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); + + if (!zcull_bank_counters) { + gk20a_err(dev_from_gk20a(g), + "failed to allocate zcull temp buffers"); + kfree(zcull_map_tiles); + return -ENOMEM; + } + + for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) { + zcull_map_tiles[map_counter] = + zcull_bank_counters[gr->map_tiles[map_counter]]; + zcull_bank_counters[gr->map_tiles[map_counter]]++; + } + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7])); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15])); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23])); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31])); + + kfree(zcull_map_tiles); + kfree(zcull_bank_counters); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + gpc_tpc_count = gr->gpc_tpc_count[gpc_index]; + gpc_zcull_count = gr->gpc_zcb_count[gpc_index]; + + if (gpc_zcull_count != gr->max_zcull_per_gpc_count && + gpc_zcull_count < gpc_tpc_count) { + gk20a_err(dev_from_gk20a(g), + "zcull_banks (%d) less than tpcs (%d) for gpc (%d)", + gpc_zcull_count, gpc_tpc_count, gpc_index); + return -EINVAL; + } + if (gpc_zcull_count != gr->max_zcull_per_gpc_count && + gpc_zcull_count != 0) + floorsweep = true; + } + + /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */ + rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v(); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + offset = gpc_index * proj_gpc_stride_v(); + + if (floorsweep) { + gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, + gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | + gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( + gr->max_zcull_per_gpc_count)); + } else { + gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, + gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) | + gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( + gr->gpc_tpc_count[gpc_index])); + } + + gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset, + gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) | + gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count)); + + gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset, + gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv)); + } + + gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(), + gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv)); + + return 0; +} + +static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g) +{ + /* enable tpc exception forwarding */ + gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), + gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f()); + + /* enable gpc exception forwarding */ + gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(), + gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f()); +} + +void gr_gk20a_enable_hww_exceptions(struct gk20a *g) +{ + /* enable exceptions */ + gk20a_writel(g, gr_fe_hww_esr_r(), + gr_fe_hww_esr_en_enable_f() | + gr_fe_hww_esr_reset_active_f()); + gk20a_writel(g, gr_memfmt_hww_esr_r(), + gr_memfmt_hww_esr_en_enable_f() | + gr_memfmt_hww_esr_reset_active_f()); + gk20a_writel(g, gr_scc_hww_esr_r(), + gr_scc_hww_esr_en_enable_f() | + gr_scc_hww_esr_reset_active_f()); + gk20a_writel(g, gr_mme_hww_esr_r(), + gr_mme_hww_esr_en_enable_f() | + gr_mme_hww_esr_reset_active_f()); + gk20a_writel(g, gr_pd_hww_esr_r(), + gr_pd_hww_esr_en_enable_f() | + gr_pd_hww_esr_reset_active_f()); + gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */ + gr_sked_hww_esr_reset_active_f()); + gk20a_writel(g, gr_ds_hww_esr_r(), + gr_ds_hww_esr_en_enabled_f() | + gr_ds_hww_esr_reset_task_f()); + gk20a_writel(g, gr_ds_hww_report_mask_r(), + gr_ds_hww_report_mask_sph0_err_report_f() | + gr_ds_hww_report_mask_sph1_err_report_f() | + gr_ds_hww_report_mask_sph2_err_report_f() | + gr_ds_hww_report_mask_sph3_err_report_f() | + gr_ds_hww_report_mask_sph4_err_report_f() | + gr_ds_hww_report_mask_sph5_err_report_f() | + gr_ds_hww_report_mask_sph6_err_report_f() | + gr_ds_hww_report_mask_sph7_err_report_f() | + gr_ds_hww_report_mask_sph8_err_report_f() | + gr_ds_hww_report_mask_sph9_err_report_f() | + gr_ds_hww_report_mask_sph10_err_report_f() | + gr_ds_hww_report_mask_sph11_err_report_f() | + gr_ds_hww_report_mask_sph12_err_report_f() | + gr_ds_hww_report_mask_sph13_err_report_f() | + gr_ds_hww_report_mask_sph14_err_report_f() | + gr_ds_hww_report_mask_sph15_err_report_f() | + gr_ds_hww_report_mask_sph16_err_report_f() | + gr_ds_hww_report_mask_sph17_err_report_f() | + gr_ds_hww_report_mask_sph18_err_report_f() | + gr_ds_hww_report_mask_sph19_err_report_f() | + gr_ds_hww_report_mask_sph20_err_report_f() | + gr_ds_hww_report_mask_sph21_err_report_f() | + gr_ds_hww_report_mask_sph22_err_report_f() | + gr_ds_hww_report_mask_sph23_err_report_f()); +} + +static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g) +{ + /* setup sm warp esr report masks */ + gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); + + /* setup sm global esr report mask */ + gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); +} + +static int gk20a_init_gr_setup_hw(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; + struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; + u32 data; + u32 addr_lo, addr_hi; + u64 addr; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 fe_go_idle_timeout_save; + u32 last_method_data = 0; + u32 i, err; + + gk20a_dbg_fn(""); + + /* slcg prod values */ + g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled); + g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); + + /* init mmu debug buffer */ + addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova); + addr_lo = u64_lo32(addr); + addr_hi = u64_hi32(addr); + addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) | + (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v())); + + gk20a_writel(g, fb_mmu_debug_wr_r(), + fb_mmu_debug_wr_aperture_vid_mem_f() | + fb_mmu_debug_wr_vol_false_f() | + fb_mmu_debug_wr_addr_v(addr)); + + addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova); + addr_lo = u64_lo32(addr); + addr_hi = u64_hi32(addr); + addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) | + (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v())); + + gk20a_writel(g, fb_mmu_debug_rd_r(), + fb_mmu_debug_rd_aperture_vid_mem_f() | + fb_mmu_debug_rd_vol_false_f() | + fb_mmu_debug_rd_addr_v(addr)); + + /* load gr floorsweeping registers */ + data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r()); + data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(), + gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f()); + gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data); + + gr_gk20a_zcull_init_hw(g, gr); + + g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); + g->ops.clock_gating.pg_gr_load_gating_prod(g, true); + + if (g->elcg_enabled) { + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); + } else { + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); + gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + } + + /* Bug 1340570: increase the clock timeout to avoid potential + * operation failure at high gpcclk rate. Default values are 0x400. + */ + gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800); + gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800); + gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800); + + /* enable fifo access */ + gk20a_writel(g, gr_gpfifo_ctl_r(), + gr_gpfifo_ctl_access_enabled_f() | + gr_gpfifo_ctl_semaphore_access_enabled_f()); + + /* TBD: reload gr ucode when needed */ + + /* enable interrupts */ + gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF); + + /* enable fecs error interrupts */ + gk20a_writel(g, gr_fecs_host_int_enable_r(), + gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() | + gr_fecs_host_int_enable_umimp_firmware_method_enable_f() | + gr_fecs_host_int_enable_umimp_illegal_method_enable_f() | + gr_fecs_host_int_enable_watchdog_enable_f()); + + g->ops.gr.enable_hww_exceptions(g); + g->ops.gr.set_hww_esr_report_mask(g); + + /* enable per GPC exceptions */ + gk20a_gr_enable_gpc_exceptions(g); + + /* TBD: ECC for L1/SM */ + /* TBD: enable per BE exceptions */ + + /* reset and enable all exceptions */ + gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF); + gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF); + + /* ignore status from some units */ + data = gk20a_readl(g, gr_status_mask_r()); + gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask); + + g->ops.ltc.init_zbc(g, gr); + g->ops.ltc.init_cbc(g, gr); + + /* load ctx init */ + for (i = 0; i < sw_ctx_load->count; i++) + gk20a_writel(g, sw_ctx_load->l[i].addr, + sw_ctx_load->l[i].value); + + err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (err) + goto out; + + /* save and disable fe_go_idle */ + fe_go_idle_timeout_save = + gk20a_readl(g, gr_fe_go_idle_timeout_r()); + gk20a_writel(g, gr_fe_go_idle_timeout_r(), + (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) | + gr_fe_go_idle_timeout_count_disabled_f()); + + /* override a few ctx state registers */ + g->ops.gr.commit_global_cb_manager(g, NULL, false); + gr_gk20a_commit_global_timeslice(g, NULL, false); + + /* floorsweep anything left */ + g->ops.gr.init_fs_state(g); + + err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (err) + goto restore_fe_go_idle; + +restore_fe_go_idle: + /* restore fe_go_idle */ + gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save); + + if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT)) + goto out; + + /* load method init */ + if (sw_method_init->count) { + gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(), + sw_method_init->l[0].value); + gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(), + gr_pri_mme_shadow_raw_index_write_trigger_f() | + sw_method_init->l[0].addr); + last_method_data = sw_method_init->l[0].value; + } + for (i = 1; i < sw_method_init->count; i++) { + if (sw_method_init->l[i].value != last_method_data) { + gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(), + sw_method_init->l[i].value); + last_method_data = sw_method_init->l[i].value; + } + gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(), + gr_pri_mme_shadow_raw_index_write_trigger_f() | + sw_method_init->l[i].addr); + } + + gk20a_mm_l2_invalidate(g); + + err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (err) + goto out; + +out: + gk20a_dbg_fn("done"); + return 0; +} + +static int gk20a_init_gr_prepare(struct gk20a *g) +{ + u32 gpfifo_ctrl, pmc_en; + u32 err = 0; + + /* disable fifo access */ + pmc_en = gk20a_readl(g, mc_enable_r()); + if (pmc_en & mc_enable_pgraph_enabled_f()) { + gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r()); + gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f(); + gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl); + } + + /* reset gr engine */ + gk20a_reset(g, mc_enable_pgraph_enabled_f() + | mc_enable_blg_enabled_f() + | mc_enable_perfmon_enabled_f()); + + /* enable fifo access */ + gk20a_writel(g, gr_gpfifo_ctl_r(), + gr_gpfifo_ctl_access_enabled_f() | + gr_gpfifo_ctl_semaphore_access_enabled_f()); + + if (!g->gr.ctx_vars.valid) { + err = gr_gk20a_init_ctx_vars(g, &g->gr); + if (err) + gk20a_err(dev_from_gk20a(g), + "fail to load gr init ctx"); + } + return err; +} + +static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g) +{ + int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT; + bool fecs_scrubbing; + bool gpccs_scrubbing; + + gk20a_dbg_fn(""); + + do { + fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) & + (gr_fecs_dmactl_imem_scrubbing_m() | + gr_fecs_dmactl_dmem_scrubbing_m()); + + gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) & + (gr_gpccs_dmactl_imem_scrubbing_m() | + gr_gpccs_dmactl_imem_scrubbing_m()); + + if (!fecs_scrubbing && !gpccs_scrubbing) { + gk20a_dbg_fn("done"); + return 0; + } + + udelay(GR_IDLE_CHECK_DEFAULT); + } while (--retries || !tegra_platform_is_silicon()); + + gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout"); + return -ETIMEDOUT; +} + +static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 i, err = 0; + + gk20a_dbg_fn(""); + + /* enable interrupts */ + gk20a_writel(g, gr_intr_r(), ~0); + gk20a_writel(g, gr_intr_en_r(), ~0); + + /* reset ctx switch state */ + gr_gk20a_ctx_reset(g, 0); + + /* clear scc ram */ + gk20a_writel(g, gr_scc_init_r(), + gr_scc_init_ram_trigger_f()); + + /* load non_ctx init */ + for (i = 0; i < sw_non_ctx_load->count; i++) + gk20a_writel(g, sw_non_ctx_load->l[i].addr, + sw_non_ctx_load->l[i].value); + + err = gr_gk20a_wait_mem_scrubbing(g); + if (err) + goto out; + + err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (err) + goto out; + + err = gr_gk20a_load_ctxsw_ucode(g, gr); + if (err) + goto out; + + /* this appears query for sw states but fecs actually init + ramchain, etc so this is hw init */ + err = gr_gk20a_init_ctx_state(g, gr); + if (err) + goto out; + +out: + if (err) + gk20a_err(dev_from_gk20a(g), "fail"); + else + gk20a_dbg_fn("done"); + + return 0; +} + +/* + * XXX Merge this list with the debugger/profiler + * session regops whitelists? + */ +static u32 wl_addr_gk20a[] = { + /* this list must be sorted (low to high) */ + 0x404468, /* gr_pri_mme_max_instructions */ + 0x418800, /* gr_pri_gpcs_setup_debug */ + 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ + 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ + 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ + 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ +}; + +static int gr_gk20a_init_access_map(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + void *data; + int err = 0; + u32 w, nr_pages = + DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, + PAGE_SIZE); + + data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].pages, + PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].size) >> + PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!data) { + gk20a_err(dev_from_gk20a(g), + "failed to map priv access map memory"); + err = -ENOMEM; + goto clean_up; + } + + memset(data, 0x0, PAGE_SIZE * nr_pages); + + for (w = 0; w < ARRAY_SIZE(wl_addr_gk20a); w++) { + u32 map_bit, map_byte, map_shift; + map_bit = wl_addr_gk20a[w] >> 2; + map_byte = map_bit >> 3; + map_shift = map_bit & 0x7; /* i.e. 0-7 */ + gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", + wl_addr_gk20a[w], map_byte, map_shift); + ((u8 *)data)[map_byte] |= 1 << map_shift; + } + +clean_up: + if (data) + vunmap(data); + return 0; +} + +static int gk20a_init_gr_setup_sw(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int err; + + gk20a_dbg_fn(""); + + if (gr->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + gr->g = g; + + err = gr_gk20a_init_gr_config(g, gr); + if (err) + goto clean_up; + + err = gr_gk20a_init_mmu_sw(g, gr); + if (err) + goto clean_up; + + err = gr_gk20a_init_map_tiles(g, gr); + if (err) + goto clean_up; + + if (tegra_cpu_is_asim()) + gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */ + else { + gk20a_dbg_info("total ram pages : %lu", totalram_pages); + gr->max_comptag_mem = totalram_pages + >> (10 - (PAGE_SHIFT - 10)); + } + err = g->ops.ltc.init_comptags(g, gr); + if (err) + goto clean_up; + + err = gr_gk20a_init_zcull(g, gr); + if (err) + goto clean_up; + + err = gr_gk20a_alloc_global_ctx_buffers(g); + if (err) + goto clean_up; + + err = gr_gk20a_init_access_map(g); + if (err) + goto clean_up; + + mutex_init(&gr->ctx_mutex); + spin_lock_init(&gr->ch_tlb_lock); + + gr->remove_support = gk20a_remove_gr_support; + gr->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; + +clean_up: + gk20a_err(dev_from_gk20a(g), "fail"); + gk20a_remove_gr_support(gr); + return err; +} + +int gk20a_init_gr_support(struct gk20a *g) +{ + u32 err; + + gk20a_dbg_fn(""); + + err = gk20a_init_gr_prepare(g); + if (err) + return err; + + /* this is required before gr_gk20a_init_ctx_state */ + mutex_init(&g->gr.fecs_mutex); + + err = gk20a_init_gr_reset_enable_hw(g); + if (err) + return err; + + err = gk20a_init_gr_setup_sw(g); + if (err) + return err; + + err = gk20a_init_gr_setup_hw(g); + if (err) + return err; + + return 0; +} + +#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc +#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280 +#define NVA297_SET_SHADER_EXCEPTIONS 0x1528 +#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528 + +#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 + +struct gr_isr_data { + u32 addr; + u32 data_lo; + u32 data_hi; + u32 curr_ctx; + u32 chid; + u32 offset; + u32 sub_chan; + u32 class_num; +}; + +void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data) +{ + gk20a_dbg_fn(""); + + if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) { + gk20a_writel(g, + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0); + gk20a_writel(g, + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0); + } else { + /* setup sm warp esr report masks */ + gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | + gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); + + /* setup sm global esr report mask */ + gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | + gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); + } +} + +static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc_index, ppc_index, stride, val, offset; + u32 cb_size = data * 4; + + gk20a_dbg_fn(""); + + if (cb_size > gr->attrib_cb_size) + cb_size = gr->attrib_cb_size; + + gk20a_writel(g, gr_ds_tga_constraintlogic_r(), + (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & + ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | + gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + stride = proj_gpc_stride_v() * gpc_index; + + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index); + + offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); + + val = set_field(val, + gr_gpc0_ppc0_cbm_cfg_size_m(), + gr_gpc0_ppc0_cbm_cfg_size_f(cb_size * + gr->pes_tpc_count[ppc_index][gpc_index])); + val = set_field(val, + gr_gpc0_ppc0_cbm_cfg_start_offset_m(), + (offset + 1)); + + gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index, val); + + val = set_field(val, + gr_gpc0_ppc0_cbm_cfg_start_offset_m(), + offset); + + gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index, val); + } + } +} + +static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) +{ + struct gr_gk20a *gr = &g->gr; + u32 gpc_index, ppc_index, stride, val; + u32 pd_ab_max_output; + u32 alpha_cb_size = data * 4; + + gk20a_dbg_fn(""); + /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) + return; */ + + if (alpha_cb_size > gr->alpha_cb_size) + alpha_cb_size = gr->alpha_cb_size; + + gk20a_writel(g, gr_ds_tga_constraintlogic_r(), + (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & + ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | + gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); + + pd_ab_max_output = alpha_cb_size * + gr_gpc0_ppc0_cbm_cfg_size_granularity_v() / + gr_pd_ab_dist_cfg1_max_output_granularity_v(); + + gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output)); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + stride = proj_gpc_stride_v() * gpc_index; + + for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; + ppc_index++) { + + val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index); + + val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), + gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * + gr->pes_tpc_count[ppc_index][gpc_index])); + + gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + + stride + + proj_ppc_in_gpc_stride_v() * ppc_index, val); + } + } +} + +void gk20a_gr_reset(struct gk20a *g) +{ + int err; + err = gk20a_init_gr_prepare(g); + BUG_ON(err); + err = gk20a_init_gr_reset_enable_hw(g); + BUG_ON(err); + err = gk20a_init_gr_setup_hw(g); + BUG_ON(err); +} + +static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr, + u32 class_num, u32 offset, u32 data) +{ + gk20a_dbg_fn(""); + + if (class_num == KEPLER_COMPUTE_A) { + switch (offset << 2) { + case NVA0C0_SET_SHADER_EXCEPTIONS: + gk20a_gr_set_shader_exceptions(g, data); + break; + default: + goto fail; + } + } + + if (class_num == KEPLER_C) { + switch (offset << 2) { + case NVA297_SET_SHADER_EXCEPTIONS: + gk20a_gr_set_shader_exceptions(g, data); + break; + case NVA297_SET_CIRCULAR_BUFFER_SIZE: + g->ops.gr.set_circular_buffer_size(g, data); + break; + case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE: + g->ops.gr.set_alpha_circular_buffer_size(g, data); + break; + default: + goto fail; + } + } + return 0; + +fail: + return -EINVAL; +} + +static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + gk20a_dbg_fn(""); + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_GR_SEMAPHORE_TIMEOUT); + gk20a_err(dev_from_gk20a(g), + "gr semaphore timeout\n"); + return -EINVAL; +} + +static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + gk20a_dbg_fn(""); + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_GR_ILLEGAL_NOTIFY); + /* This is an unrecoverable error, reset is needed */ + gk20a_err(dev_from_gk20a(g), + "gr semaphore timeout\n"); + return -EINVAL; +} + +static int gk20a_gr_handle_illegal_method(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + int ret = g->ops.gr.handle_sw_method(g, isr_data->addr, + isr_data->class_num, isr_data->offset, + isr_data->data_lo); + if (ret) + gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x" + ", offset 0x%08x address 0x%08x\n", + isr_data->class_num, isr_data->offset, isr_data->addr); + + return ret; +} + +static int gk20a_gr_handle_illegal_class(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + gk20a_dbg_fn(""); + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); + gk20a_err(dev_from_gk20a(g), + "invalid class 0x%08x, offset 0x%08x", + isr_data->class_num, isr_data->offset); + return -EINVAL; +} + +static int gk20a_gr_handle_class_error(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + gk20a_dbg_fn(""); + + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); + gk20a_err(dev_from_gk20a(g), + "class error 0x%08x, offset 0x%08x", + isr_data->class_num, isr_data->offset); + return -EINVAL; +} + +static int gk20a_gr_handle_semaphore_pending(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + + wake_up(&ch->semaphore_wq); + + return 0; +} + +#if defined(CONFIG_GK20A_CYCLE_STATS) +static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g, + u32 offset) +{ + /* support only 24-bit 4-byte aligned offsets */ + bool valid = !(offset & 0xFF000003); + /* whitelist check */ + valid = valid && + is_bar0_global_offset_whitelisted_gk20a(offset); + /* resource size check in case there was a problem + * with allocating the assumed size of bar0 */ + valid = valid && + offset < resource_size(g->reg_mem); + return valid; +} +#endif + +static int gk20a_gr_handle_notify_pending(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data->chid]; + +#if defined(CONFIG_GK20A_CYCLE_STATS) + void *virtual_address; + u32 buffer_size; + u32 offset; + u32 new_offset; + bool exit; + struct share_buffer_head *sh_hdr; + u32 raw_reg; + u64 mask_orig; + u64 v = 0; + struct gk20a_cyclestate_buffer_elem *op_elem; + /* GL will never use payload 0 for cycle state */ + if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0)) + return 0; + + mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex); + + virtual_address = ch->cyclestate.cyclestate_buffer; + buffer_size = ch->cyclestate.cyclestate_buffer_size; + offset = isr_data->data_lo; + exit = false; + while (!exit) { + if (offset >= buffer_size) { + WARN_ON(1); + break; + } + + sh_hdr = (struct share_buffer_head *) + ((char *)virtual_address + offset); + + if (sh_hdr->size < sizeof(struct share_buffer_head)) { + WARN_ON(1); + break; + } + new_offset = offset + sh_hdr->size; + + switch (sh_hdr->operation) { + case OP_END: + exit = true; + break; + + case BAR0_READ32: + case BAR0_WRITE32: + { + bool valid; + op_elem = + (struct gk20a_cyclestate_buffer_elem *) + sh_hdr; + valid = is_valid_cyclestats_bar0_offset_gk20a(g, + op_elem->offset_bar0); + if (!valid) { + gk20a_err(dev_from_gk20a(g), + "invalid cycletstats op offset: 0x%x\n", + op_elem->offset_bar0); + + sh_hdr->failed = exit = true; + break; + } + + + mask_orig = + ((1ULL << + (op_elem->last_bit + 1)) + -1)&~((1ULL << + op_elem->first_bit)-1); + + raw_reg = + gk20a_readl(g, + op_elem->offset_bar0); + + switch (sh_hdr->operation) { + case BAR0_READ32: + op_elem->data = + (raw_reg & mask_orig) + >> op_elem->first_bit; + break; + + case BAR0_WRITE32: + v = 0; + if ((unsigned int)mask_orig != + (unsigned int)~0) { + v = (unsigned int) + (raw_reg & ~mask_orig); + } + + v |= ((op_elem->data + << op_elem->first_bit) + & mask_orig); + + gk20a_writel(g, + op_elem->offset_bar0, + (unsigned int)v); + break; + default: + /* nop ok?*/ + break; + } + } + break; + + default: + /* no operation content case */ + exit = true; + break; + } + sh_hdr->completed = true; + offset = new_offset; + } + mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex); +#endif + gk20a_dbg_fn(""); + wake_up(&ch->notifier_wq); + return 0; +} + +/* Used by sw interrupt thread to translate current ctx to chid. + * For performance, we don't want to go through 128 channels every time. + * A small tlb is used here to cache translation */ +static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) +{ + struct fifo_gk20a *f = &g->fifo; + struct gr_gk20a *gr = &g->gr; + u32 chid = -1; + u32 i; + + spin_lock(&gr->ch_tlb_lock); + + /* check cache first */ + for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { + if (gr->chid_tlb[i].curr_ctx == curr_ctx) { + chid = gr->chid_tlb[i].hw_chid; + goto unlock; + } + } + + /* slow path */ + for (chid = 0; chid < f->num_channels; chid++) + if (f->channel[chid].in_use) { + if ((u32)(f->channel[chid].inst_block.cpu_pa >> + ram_in_base_shift_v()) == + gr_fecs_current_ctx_ptr_v(curr_ctx)) + break; + } + + if (chid >= f->num_channels) { + chid = -1; + goto unlock; + } + + /* add to free tlb entry */ + for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { + if (gr->chid_tlb[i].curr_ctx == 0) { + gr->chid_tlb[i].curr_ctx = curr_ctx; + gr->chid_tlb[i].hw_chid = chid; + goto unlock; + } + } + + /* no free entry, flush one */ + gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; + gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; + + gr->channel_tlb_flush_index = + (gr->channel_tlb_flush_index + 1) & + (GR_CHANNEL_MAP_TLB_SIZE - 1); + +unlock: + spin_unlock(&gr->ch_tlb_lock); + return chid; +} + +static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 delay = GR_IDLE_CHECK_DEFAULT; + bool mmu_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled(g); + u32 dbgr_control0; + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM"); + + /* assert stop trigger */ + dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); + dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); + gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0); + + /* wait for the sm to lock down */ + do { + u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); + u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r()); + u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r()); + bool locked_down = + (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == + gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); + bool error_pending = + (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) != + gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) || + ((global_esr & ~global_esr_mask) != 0); + + if (locked_down || !error_pending) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM"); + + /* de-assert stop trigger */ + dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); + gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0); + + return 0; + } + + /* if an mmu fault is pending and mmu debug mode is not + * enabled, the sm will never lock down. */ + if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) { + gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will" + " never lock down!"); + return -EFAULT; + } + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies) + || !tegra_platform_is_silicon()); + + gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM"); + + return -EAGAIN; +} + +bool gk20a_gr_sm_debugger_attached(struct gk20a *g) +{ + u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); + + /* check if an sm debugger is attached */ + if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) == + gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v()) + return true; + + return false; +} + +static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr) +{ + gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr); + + /* clear the warp hww */ + gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(), + gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f()); +} + +static struct channel_gk20a * +channel_from_hw_chid(struct gk20a *g, u32 hw_chid) +{ + return g->fifo.channel+hw_chid; +} + +static int gk20a_gr_handle_sm_exception(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + int ret = 0; + bool do_warp_sync = false; + /* these three interrupts don't require locking down the SM. They can + * be handled by usermode clients as they aren't fatal. Additionally, + * usermode clients may wish to allow some warps to execute while others + * are at breakpoints, as opposed to fatal errors where all warps should + * halt. */ + u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | + gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | + gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); + u32 global_esr, warp_esr; + bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g); + struct channel_gk20a *fault_ch; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); + warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r()); + + /* if an sm debugger is attached, disable forwarding of tpc exceptions. + * the debugger will reenable exceptions after servicing them. */ + if (sm_debugger_attached) { + u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r()); + tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(); + gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en); + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached"); + } + + /* if a debugger is present and an error has occurred, do a warp sync */ + if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) { + gk20a_dbg(gpu_dbg_intr, "warp sync needed"); + do_warp_sync = true; + } + + if (do_warp_sync) { + ret = gk20a_gr_lock_down_sm(g, global_mask); + if (ret) { + gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); + return ret; + } + } + + /* finally, signal any client waiting on an event */ + fault_ch = channel_from_hw_chid(g, isr_data->chid); + if (fault_ch) + gk20a_dbg_gpu_post_events(fault_ch); + + return ret; +} + +static int gk20a_gr_handle_tpc_exception(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + int ret = 0; + u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r()); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); + + /* check if an sm exeption is pending */ + if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) == + gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending"); + ret = gk20a_gr_handle_sm_exception(g, isr_data); + } + + return ret; +} + +static int gk20a_gr_handle_gpc_exception(struct gk20a *g, + struct gr_isr_data *isr_data) +{ + int ret = 0; + u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r()); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); + + /* check if tpc 0 has an exception */ + if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) == + gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending"); + ret = gk20a_gr_handle_tpc_exception(g, isr_data); + } + + return ret; +} + +int gk20a_gr_isr(struct gk20a *g) +{ + struct gr_isr_data isr_data; + u32 grfifo_ctl; + u32 obj_table; + int need_reset = 0; + u32 gr_intr = gk20a_readl(g, gr_intr_r()); + + gk20a_dbg_fn(""); + gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); + + if (!gr_intr) + return 0; + + grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); + grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); + grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1); + + gk20a_writel(g, gr_gpfifo_ctl_r(), + grfifo_ctl | gr_gpfifo_ctl_access_f(0) | + gr_gpfifo_ctl_semaphore_access_f(0)); + + isr_data.addr = gk20a_readl(g, gr_trapped_addr_r()); + isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r()); + isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r()); + isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); + isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr); + isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr); + obj_table = gk20a_readl(g, + gr_fe_object_table_r(isr_data.sub_chan)); + isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); + + isr_data.chid = + gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx); + if (isr_data.chid == -1) { + gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", + isr_data.curr_ctx); + goto clean_up; + } + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "channel %d: addr 0x%08x, " + "data 0x%08x 0x%08x," + "ctx 0x%08x, offset 0x%08x, " + "subchannel 0x%08x, class 0x%08x", + isr_data.chid, isr_data.addr, + isr_data.data_hi, isr_data.data_lo, + isr_data.curr_ctx, isr_data.offset, + isr_data.sub_chan, isr_data.class_num); + + if (gr_intr & gr_intr_notify_pending_f()) { + gk20a_gr_handle_notify_pending(g, &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_notify_reset_f()); + gr_intr &= ~gr_intr_notify_pending_f(); + } + + if (gr_intr & gr_intr_semaphore_pending_f()) { + gk20a_gr_handle_semaphore_pending(g, &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_semaphore_reset_f()); + gr_intr &= ~gr_intr_semaphore_pending_f(); + } + + if (gr_intr & gr_intr_semaphore_timeout_pending_f()) { + need_reset |= gk20a_gr_handle_semaphore_timeout_pending(g, + &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_semaphore_reset_f()); + gr_intr &= ~gr_intr_semaphore_pending_f(); + } + + if (gr_intr & gr_intr_illegal_notify_pending_f()) { + need_reset |= gk20a_gr_intr_illegal_notify_pending(g, + &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_illegal_notify_reset_f()); + gr_intr &= ~gr_intr_illegal_notify_pending_f(); + } + + if (gr_intr & gr_intr_illegal_method_pending_f()) { + need_reset |= gk20a_gr_handle_illegal_method(g, &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_illegal_method_reset_f()); + gr_intr &= ~gr_intr_illegal_method_pending_f(); + } + + if (gr_intr & gr_intr_illegal_class_pending_f()) { + need_reset |= gk20a_gr_handle_illegal_class(g, &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_illegal_class_reset_f()); + gr_intr &= ~gr_intr_illegal_class_pending_f(); + } + + if (gr_intr & gr_intr_class_error_pending_f()) { + need_reset |= gk20a_gr_handle_class_error(g, &isr_data); + gk20a_writel(g, gr_intr_r(), + gr_intr_class_error_reset_f()); + gr_intr &= ~gr_intr_class_error_pending_f(); + } + + /* this one happens if someone tries to hit a non-whitelisted + * register using set_falcon[4] */ + if (gr_intr & gr_intr_firmware_method_pending_f()) { + need_reset |= true; + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n"); + gk20a_writel(g, gr_intr_r(), + gr_intr_firmware_method_reset_f()); + gr_intr &= ~gr_intr_firmware_method_pending_f(); + } + + if (gr_intr & gr_intr_exception_pending_f()) { + u32 exception = gk20a_readl(g, gr_exception_r()); + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = &f->channel[isr_data.chid]; + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); + + if (exception & gr_exception_fe_m()) { + u32 fe = gk20a_readl(g, gr_fe_hww_esr_r()); + gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe); + gk20a_writel(g, gr_fe_hww_esr_r(), fe); + } + + /* check if a gpc exception has occurred */ + if (exception & gr_exception_gpc_m() && need_reset == 0) { + u32 exception1 = gk20a_readl(g, gr_exception1_r()); + u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r()); + + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending"); + + /* if no sm debugger is present, clean up the channel */ + if (!gk20a_gr_sm_debugger_attached(g)) { + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "SM debugger not attached, clearing interrupt"); + need_reset |= -EFAULT; + } else { + /* check if gpc 0 has an exception */ + if (exception1 & gr_exception1_gpc_0_pending_f()) + need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data); + /* clear the hwws, also causes tpc and gpc + * exceptions to be cleared */ + gk20a_gr_clear_sm_hww(g, global_esr); + } + + if (need_reset) + gk20a_set_error_notifier(ch, + NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY); + } + + gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); + gr_intr &= ~gr_intr_exception_pending_f(); + } + + if (need_reset) + gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); + +clean_up: + gk20a_writel(g, gr_gpfifo_ctl_r(), + grfifo_ctl | gr_gpfifo_ctl_access_f(1) | + gr_gpfifo_ctl_semaphore_access_f(1)); + + if (gr_intr) + gk20a_err(dev_from_gk20a(g), + "unhandled gr interrupt 0x%08x", gr_intr); + + return 0; +} + +int gk20a_gr_nonstall_isr(struct gk20a *g) +{ + u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r()); + u32 clear_intr = 0; + + gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr); + + if (gr_intr & gr_intr_nonstall_trap_pending_f()) { + gk20a_channel_semaphore_wakeup(g); + clear_intr |= gr_intr_nonstall_trap_pending_f(); + } + + gk20a_writel(g, gr_intr_nonstall_r(), clear_intr); + + return 0; +} + +int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size) +{ + BUG_ON(size == NULL); + return gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .mailbox.id = 0, + .mailbox.data = 0, + .mailbox.clr = ~0, + .method.data = 1, + .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(), + .mailbox.ret = size, + .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, + .mailbox.ok = 0, + .cond.fail = GR_IS_UCODE_OP_SKIP, + .mailbox.fail = 0}); +} + +int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) +{ + return gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a){ + .mailbox.id = 4, + .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) | + gr_fecs_current_ctx_valid_f(1) | + gr_fecs_current_ctx_target_vid_mem_f()), + .mailbox.clr = ~0, + .method.data = 1, + .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), + .mailbox.ret = NULL, + .cond.ok = GR_IS_UCODE_OP_EQUAL, + .mailbox.ok = 1, + .cond.fail = GR_IS_UCODE_OP_SKIP, + .mailbox.fail = 0}); +} + +int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va) +{ + return gr_gk20a_submit_fecs_method_op(g, + (struct fecs_method_op_gk20a) { + .mailbox.id = 4, + .mailbox.data = u64_lo32(pmu_va >> 8), + .mailbox.clr = ~0, + .method.data = 1, + .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(), + .mailbox.ret = NULL, + .cond.ok = GR_IS_UCODE_OP_EQUAL, + .mailbox.ok = 1, + .cond.fail = GR_IS_UCODE_OP_SKIP, + .mailbox.fail = 0}); +} + +int gk20a_gr_suspend(struct gk20a *g) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 ret = 0; + + gk20a_dbg_fn(""); + + ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); + if (ret) + return ret; + + gk20a_writel(g, gr_gpfifo_ctl_r(), + gr_gpfifo_ctl_access_disabled_f()); + + /* disable gr intr */ + gk20a_writel(g, gr_intr_r(), 0); + gk20a_writel(g, gr_intr_en_r(), 0); + + /* disable all exceptions */ + gk20a_writel(g, gr_exception_r(), 0); + gk20a_writel(g, gr_exception_en_r(), 0); + gk20a_writel(g, gr_exception1_r(), 0); + gk20a_writel(g, gr_exception1_en_r(), 0); + gk20a_writel(g, gr_exception2_r(), 0); + gk20a_writel(g, gr_exception2_en_r(), 0); + + gk20a_gr_flush_channel_tlb(&g->gr); + + gk20a_dbg_fn("done"); + return ret; +} + +static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, + u32 addr, + bool is_quad, u32 quad, + u32 *context_buffer, + u32 context_buffer_size, + u32 *priv_offset); + +/* This function will decode a priv address and return the partition type and numbers. */ +int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, + int *addr_type, /* enum ctxsw_addr_type */ + u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, + u32 *broadcast_flags) +{ + u32 gpc_addr; + u32 ppc_address; + u32 ppc_broadcast_addr; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + /* setup defaults */ + ppc_address = 0; + ppc_broadcast_addr = 0; + *addr_type = CTXSW_ADDR_TYPE_SYS; + *broadcast_flags = PRI_BROADCAST_FLAGS_NONE; + *gpc_num = 0; + *tpc_num = 0; + *ppc_num = 0; + *be_num = 0; + + if (pri_is_gpc_addr(addr)) { + *addr_type = CTXSW_ADDR_TYPE_GPC; + gpc_addr = pri_gpccs_addr_mask(addr); + if (pri_is_gpc_addr_shared(addr)) { + *addr_type = CTXSW_ADDR_TYPE_GPC; + *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; + } else + *gpc_num = pri_get_gpc_num(addr); + + if (pri_is_tpc_addr(gpc_addr)) { + *addr_type = CTXSW_ADDR_TYPE_TPC; + if (pri_is_tpc_addr_shared(gpc_addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; + return 0; + } + *tpc_num = pri_get_tpc_num(gpc_addr); + } + return 0; + } else if (pri_is_be_addr(addr)) { + *addr_type = CTXSW_ADDR_TYPE_BE; + if (pri_is_be_addr_shared(addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; + return 0; + } + *be_num = pri_get_be_num(addr); + return 0; + } else { + *addr_type = CTXSW_ADDR_TYPE_SYS; + return 0; + } + /* PPC!?!?!?! */ + + /*NOTREACHED*/ + return -EINVAL; +} + +static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, + u32 gpc_num, + u32 *priv_addr_table, u32 *t) +{ + u32 ppc_num; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) + priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr), + gpc_num, ppc_num); + + return 0; +} + +/* + * The context buffer is indexed using BE broadcast addresses and GPC/TPC + * unicast addresses. This function will convert a BE unicast address to a BE + * broadcast address and split a GPC/TPC broadcast address into a table of + * GPC/TPC addresses. The addresses generated by this function can be + * successfully processed by gr_gk20a_find_priv_offset_in_buffer + */ +static int gr_gk20a_create_priv_addr_table(struct gk20a *g, + u32 addr, + u32 *priv_addr_table, + u32 *num_registers) +{ + int addr_type; /*enum ctxsw_addr_type */ + u32 gpc_num, tpc_num, ppc_num, be_num; + u32 broadcast_flags; + u32 t; + int err; + + t = 0; + *num_registers = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + err = gr_gk20a_decode_priv_addr(g, addr, &addr_type, + &gpc_num, &tpc_num, &ppc_num, &be_num, + &broadcast_flags); + gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); + if (err) + return err; + + if ((addr_type == CTXSW_ADDR_TYPE_SYS) || + (addr_type == CTXSW_ADDR_TYPE_BE)) { + /* The BE broadcast registers are included in the compressed PRI + * table. Convert a BE unicast address to a broadcast address + * so that we can look up the offset. */ + if ((addr_type == CTXSW_ADDR_TYPE_BE) && + !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) + priv_addr_table[t++] = pri_be_shared_addr(addr); + else + priv_addr_table[t++] = addr; + + *num_registers = t; + return 0; + } + + /* The GPC/TPC unicast registers are included in the compressed PRI + * tables. Convert a GPC/TPC broadcast address to unicast addresses so + * that we can look up the offsets. */ + if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { + for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + + if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num++) + priv_addr_table[t++] = + pri_tpc_addr(pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + + else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { + err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, + priv_addr_table, &t); + if (err) + return err; + } else + priv_addr_table[t++] = + pri_gpc_addr(pri_gpccs_addr_mask(addr), + gpc_num); + } + } else { + if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num++) + priv_addr_table[t++] = + pri_tpc_addr(pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) + err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, + priv_addr_table, &t); + else + priv_addr_table[t++] = addr; + } + + *num_registers = t; + return 0; +} + +int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, + u32 addr, + u32 max_offsets, + u32 *offsets, u32 *offset_addrs, + u32 *num_offsets, + bool is_quad, u32 quad) +{ + u32 i; + u32 priv_offset = 0; + u32 *priv_registers; + u32 num_registers = 0; + int err = 0; + u32 potential_offsets = proj_scal_litter_num_gpcs_v() * + proj_scal_litter_num_tpc_per_gpc_v(); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + /* implementation is crossed-up if either of these happen */ + if (max_offsets > potential_offsets) + return -EINVAL; + + if (!g->gr.ctx_vars.golden_image_initialized) + return -ENODEV; + + priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL); + if (IS_ERR_OR_NULL(priv_registers)) { + gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets); + err = PTR_ERR(priv_registers); + goto cleanup; + } + memset(offsets, 0, sizeof(u32) * max_offsets); + memset(offset_addrs, 0, sizeof(u32) * max_offsets); + *num_offsets = 0; + + gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers); + + if ((max_offsets > 1) && (num_registers > max_offsets)) { + err = -EINVAL; + goto cleanup; + } + + if ((max_offsets == 1) && (num_registers > 1)) + num_registers = 1; + + if (!g->gr.ctx_vars.local_golden_image) { + gk20a_dbg_fn("no context switch header info to work with"); + err = -EINVAL; + goto cleanup; + } + + for (i = 0; i < num_registers; i++) { + err = gr_gk20a_find_priv_offset_in_buffer(g, + priv_registers[i], + is_quad, quad, + g->gr.ctx_vars.local_golden_image, + g->gr.ctx_vars.golden_image_size, + &priv_offset); + if (err) { + gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x", + addr); /*, grPriRegStr(addr)));*/ + goto cleanup; + } + + offsets[i] = priv_offset; + offset_addrs[i] = priv_registers[i]; + } + + *num_offsets = num_registers; + + cleanup: + + if (!IS_ERR_OR_NULL(priv_registers)) + kfree(priv_registers); + + return err; +} + +/* Setup some register tables. This looks hacky; our + * register/offset functions are just that, functions. + * So they can't be used as initializers... TBD: fix to + * generate consts at least on an as-needed basis. + */ +static const u32 _num_ovr_perf_regs = 17; +static u32 _ovr_perf_regs[17] = { 0, }; +/* Following are the blocks of registers that the ucode + stores in the extended region.*/ +/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ +static const u32 _num_sm_dsm_perf_regs = 5; +/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ +static const u32 _num_sm_dsm_perf_ctrl_regs = 4; +static u32 _sm_dsm_perf_regs[5]; +static u32 _sm_dsm_perf_ctrl_regs[4]; + +static void init_sm_dsm_reg_info(void) +{ + if (_ovr_perf_regs[0] != 0) + return; + + _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(); + _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(); + _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); + _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); + _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(); + _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(); + _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(); + _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(); + _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(); + _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(); + _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(); + _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(); + _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(); + _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(); + _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); + _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); + _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); + + + _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); + _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); + _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(); + _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(); + _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(); + + _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(); + _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(); + _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(); + _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(); + +} + +/* TBD: would like to handle this elsewhere, at a higher level. + * these are currently constructed in a "test-then-write" style + * which makes it impossible to know externally whether a ctx + * write will actually occur. so later we should put a lazy, + * map-and-hold system in the patch write state */ +int gr_gk20a_ctx_patch_smpc(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u32 addr, u32 data, + u8 *context) +{ + u32 num_gpc = g->gr.gpc_count; + u32 num_tpc; + u32 tpc, gpc, reg; + u32 chk_addr; + u32 vaddr_lo; + u32 vaddr_hi; + u32 tmp; + + init_sm_dsm_reg_info(); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + for (reg = 0; reg < _num_ovr_perf_regs; reg++) { + for (gpc = 0; gpc < num_gpc; gpc++) { + num_tpc = g->gr.gpc_tpc_count[gpc]; + for (tpc = 0; tpc < num_tpc; tpc++) { + chk_addr = ((proj_gpc_stride_v() * gpc) + + (proj_tpc_in_gpc_stride_v() * tpc) + + _ovr_perf_regs[reg]); + if (chk_addr != addr) + continue; + /* reset the patch count from previous + runs,if ucode has already processed + it */ + tmp = gk20a_mem_rd32(context + + ctxsw_prog_main_image_patch_count_o(), 0); + + if (!tmp) + ch_ctx->patch_ctx.data_count = 0; + + gr_gk20a_ctx_patch_write(g, ch_ctx, + addr, data, true); + + vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); + vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); + + gk20a_mem_wr32(context + + ctxsw_prog_main_image_patch_count_o(), + 0, ch_ctx->patch_ctx.data_count); + gk20a_mem_wr32(context + + ctxsw_prog_main_image_patch_adr_lo_o(), + 0, vaddr_lo); + gk20a_mem_wr32(context + + ctxsw_prog_main_image_patch_adr_hi_o(), + 0, vaddr_hi); + + /* we're not caching these on cpu side, + but later watch for it */ + + /* the l2 invalidate in the patch_write + * would be too early for this? */ + gk20a_mm_l2_invalidate(g); + return 0; + } + } + } + + return 0; +} + +static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) +{ + u32 reg; + u32 quad_ctrl; + u32 half_ctrl; + u32 tpc, gpc; + u32 gpc_tpc_addr; + u32 gpc_tpc_stride; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); + + gpc = pri_get_gpc_num(offset); + gpc_tpc_addr = pri_gpccs_addr_mask(offset); + tpc = pri_get_tpc_num(gpc_tpc_addr); + + quad_ctrl = quad & 0x1; /* first bit tells us quad */ + half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ + + gpc_tpc_stride = gpc * proj_gpc_stride_v() + + tpc * proj_tpc_in_gpc_stride_v(); + gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; + + reg = gk20a_readl(g, gpc_tpc_addr); + reg = set_field(reg, + gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), + gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); + + gk20a_writel(g, gpc_tpc_addr, reg); + + gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; + reg = gk20a_readl(g, gpc_tpc_addr); + reg = set_field(reg, + gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), + gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); + gk20a_writel(g, gpc_tpc_addr, reg); +} + +#define ILLEGAL_ID (~0) + +static inline bool check_main_image_header_magic(void *context) +{ + u32 magic = gk20a_mem_rd32(context + + ctxsw_prog_main_image_magic_value_o(), 0); + gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); + return magic == ctxsw_prog_main_image_magic_value_v_value_v(); +} +static inline bool check_local_header_magic(void *context) +{ + u32 magic = gk20a_mem_rd32(context + + ctxsw_prog_local_magic_value_o(), 0); + gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); + return magic == ctxsw_prog_local_magic_value_v_value_v(); + +} + +/* most likely dupe of ctxsw_gpccs_header__size_1_v() */ +static inline int ctxsw_prog_ucode_header_size_in_bytes(void) +{ + return 256; +} + +void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride) +{ + *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; + *sm_dsm_perf_regs = _sm_dsm_perf_regs; + *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); +} + +void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_ctrl_regs, + u32 **sm_dsm_perf_ctrl_regs, + u32 *ctrl_register_stride) +{ + *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; + *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; + *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); +} + +static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, + u32 addr, + bool is_quad, u32 quad, + u32 *context_buffer, + u32 context_buffer_size, + u32 *priv_offset) +{ + u32 i, data32; + u32 gpc_num, tpc_num; + u32 num_gpcs, num_tpcs; + u32 chk_addr; + u32 ext_priv_offset, ext_priv_size; + void *context; + u32 offset_to_segment, offset_to_segment_end; + u32 sm_dsm_perf_reg_id = ILLEGAL_ID; + u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; + u32 num_ext_gpccs_ext_buffer_segments; + u32 inter_seg_offset; + u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1); + u32 max_tpc_count; + u32 *sm_dsm_perf_ctrl_regs = NULL; + u32 num_sm_dsm_perf_ctrl_regs = 0; + u32 *sm_dsm_perf_regs = NULL; + u32 num_sm_dsm_perf_regs = 0; + u32 buffer_segments_size = 0; + u32 marker_size = 0; + u32 control_register_stride = 0; + u32 perf_register_stride = 0; + + /* Only have TPC registers in extended region, so if not a TPC reg, + then return error so caller can look elsewhere. */ + if (pri_is_gpc_addr(addr)) { + u32 gpc_addr = 0; + gpc_num = pri_get_gpc_num(addr); + gpc_addr = pri_gpccs_addr_mask(addr); + if (pri_is_tpc_addr(gpc_addr)) + tpc_num = pri_get_tpc_num(gpc_addr); + else + return -EINVAL; + + gk20a_dbg_info(" gpc = %d tpc = %d", + gpc_num, tpc_num); + } else + return -EINVAL; + + buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); + /* note below is in words/num_registers */ + marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; + + context = context_buffer; + /* sanity check main header */ + if (!check_main_image_header_magic(context)) { + gk20a_err(dev_from_gk20a(g), + "Invalid main header: magic value"); + return -EINVAL; + } + num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); + if (gpc_num >= num_gpcs) { + gk20a_err(dev_from_gk20a(g), + "GPC 0x%08x is greater than total count 0x%08x!\n", + gpc_num, num_gpcs); + return -EINVAL; + } + + data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); + ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); + if (0 == ext_priv_size) { + gk20a_dbg_info(" No extended memory in context buffer"); + return -EINVAL; + } + ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32); + + offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes(); + offset_to_segment_end = offset_to_segment + + (ext_priv_size * buffer_segments_size); + + /* check local header magic */ + context += ctxsw_prog_ucode_header_size_in_bytes(); + if (!check_local_header_magic(context)) { + gk20a_err(dev_from_gk20a(g), + "Invalid local header: magic value\n"); + return -EINVAL; + } + + /* + * See if the incoming register address is in the first table of + * registers. We check this by decoding only the TPC addr portion. + * If we get a hit on the TPC bit, we then double check the address + * by computing it from the base gpc/tpc strides. Then make sure + * it is a real match. + */ + g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs, + &sm_dsm_perf_regs, + &perf_register_stride); + + init_sm_dsm_reg_info(); + + for (i = 0; i < num_sm_dsm_perf_regs; i++) { + if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { + sm_dsm_perf_reg_id = i; + + gk20a_dbg_info("register match: 0x%08x", + sm_dsm_perf_regs[i]); + + chk_addr = (proj_gpc_base_v() + + (proj_gpc_stride_v() * gpc_num) + + proj_tpc_in_gpc_base_v() + + (proj_tpc_in_gpc_stride_v() * tpc_num) + + (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask)); + + if (chk_addr != addr) { + gk20a_err(dev_from_gk20a(g), + "Oops addr miss-match! : 0x%08x != 0x%08x\n", + addr, chk_addr); + return -EINVAL; + } + break; + } + } + + /* Didn't find reg in supported group 1. + * so try the second group now */ + g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs, + &sm_dsm_perf_ctrl_regs, + &control_register_stride); + + if (ILLEGAL_ID == sm_dsm_perf_reg_id) { + for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) { + if ((addr & tpc_gpc_mask) == + (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) { + sm_dsm_perf_ctrl_reg_id = i; + + gk20a_dbg_info("register match: 0x%08x", + sm_dsm_perf_ctrl_regs[i]); + + chk_addr = (proj_gpc_base_v() + + (proj_gpc_stride_v() * gpc_num) + + proj_tpc_in_gpc_base_v() + + (proj_tpc_in_gpc_stride_v() * tpc_num) + + (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & + tpc_gpc_mask)); + + if (chk_addr != addr) { + gk20a_err(dev_from_gk20a(g), + "Oops addr miss-match! : 0x%08x != 0x%08x\n", + addr, chk_addr); + return -EINVAL; + + } + + break; + } + } + } + + if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) && + (ILLEGAL_ID == sm_dsm_perf_reg_id)) + return -EINVAL; + + /* Skip the FECS extended header, nothing there for us now. */ + offset_to_segment += buffer_segments_size; + + /* skip through the GPCCS extended headers until we get to the data for + * our GPC. The size of each gpc extended segment is enough to hold the + * max tpc count for the gpcs,in 256b chunks. + */ + + max_tpc_count = proj_scal_litter_num_tpc_per_gpc_v(); + + num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2); + + offset_to_segment += (num_ext_gpccs_ext_buffer_segments * + buffer_segments_size * gpc_num); + + num_tpcs = g->gr.gpc_tpc_count[gpc_num]; + + /* skip the head marker to start with */ + inter_seg_offset = marker_size; + + if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) { + /* skip over control regs of TPC's before the one we want. + * then skip to the register in this tpc */ + inter_seg_offset = inter_seg_offset + + (tpc_num * control_register_stride) + + sm_dsm_perf_ctrl_reg_id; + } else { + /* skip all the control registers */ + inter_seg_offset = inter_seg_offset + + (num_tpcs * control_register_stride); + + /* skip the marker between control and counter segments */ + inter_seg_offset += marker_size; + + /* skip over counter regs of TPCs before the one we want */ + inter_seg_offset = inter_seg_offset + + (tpc_num * perf_register_stride) * + ctxsw_prog_extended_num_smpc_quadrants_v(); + + /* skip over the register for the quadrants we do not want. + * then skip to the register in this tpc */ + inter_seg_offset = inter_seg_offset + + (perf_register_stride * quad) + + sm_dsm_perf_reg_id; + } + + /* set the offset to the segment offset plus the inter segment offset to + * our register */ + offset_to_segment += (inter_seg_offset * 4); + + /* last sanity check: did we somehow compute an offset outside the + * extended buffer? */ + if (offset_to_segment > offset_to_segment_end) { + gk20a_err(dev_from_gk20a(g), + "Overflow ctxsw buffer! 0x%08x > 0x%08x\n", + offset_to_segment, offset_to_segment_end); + return -EINVAL; + } + + *priv_offset = offset_to_segment; + + return 0; +} + + +static int +gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, + int addr_type,/* enum ctxsw_addr_type */ + u32 pri_addr, + u32 gpc_num, u32 num_tpcs, + u32 num_ppcs, u32 ppc_mask, + u32 *priv_offset) +{ + u32 i; + u32 address, base_address; + u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; + u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; + struct aiv_gk20a *reg; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); + + if (!g->gr.ctx_vars.valid) + return -EINVAL; + + /* Process the SYS/BE segment. */ + if ((addr_type == CTXSW_ADDR_TYPE_SYS) || + (addr_type == CTXSW_ADDR_TYPE_BE)) { + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { + reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i]; + address = reg->addr; + sys_offset = reg->index; + + if (pri_addr == address) { + *priv_offset = sys_offset; + return 0; + } + } + } + + /* Process the TPC segment. */ + if (addr_type == CTXSW_ADDR_TYPE_TPC) { + for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) { + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { + reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; + address = reg->addr; + tpc_addr = pri_tpccs_addr_mask(address); + base_address = proj_gpc_base_v() + + (gpc_num * proj_gpc_stride_v()) + + proj_tpc_in_gpc_base_v() + + (tpc_num * proj_tpc_in_gpc_stride_v()); + address = base_address + tpc_addr; + /* + * The data for the TPCs is interleaved in the context buffer. + * Example with num_tpcs = 2 + * 0 1 2 3 4 5 6 7 8 9 10 11 ... + * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ... + */ + tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4); + + if (pri_addr == address) { + *priv_offset = tpc_offset; + return 0; + } + } + } + } + + /* Process the PPC segment. */ + if (addr_type == CTXSW_ADDR_TYPE_PPC) { + for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) { + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { + reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; + address = reg->addr; + ppc_addr = pri_ppccs_addr_mask(address); + base_address = proj_gpc_base_v() + + (gpc_num * proj_gpc_stride_v()) + + proj_ppc_in_gpc_base_v() + + (ppc_num * proj_ppc_in_gpc_stride_v()); + address = base_address + ppc_addr; + /* + * The data for the PPCs is interleaved in the context buffer. + * Example with numPpcs = 2 + * 0 1 2 3 4 5 6 7 8 9 10 11 ... + * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ... + */ + ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4); + + if (pri_addr == address) { + *priv_offset = ppc_offset; + return 0; + } + } + } + } + + + /* Process the GPC segment. */ + if (addr_type == CTXSW_ADDR_TYPE_GPC) { + for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { + reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i]; + + address = reg->addr; + gpc_addr = pri_gpccs_addr_mask(address); + gpc_offset = reg->index; + + base_address = proj_gpc_base_v() + + (gpc_num * proj_gpc_stride_v()); + address = base_address + gpc_addr; + + if (pri_addr == address) { + *priv_offset = gpc_offset; + return 0; + } + } + } + + return -EINVAL; +} + +static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, + void *context, + u32 *num_ppcs, u32 *ppc_mask, + u32 *reg_ppc_count) +{ + u32 data32; + u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); + + /* + * if there is only 1 PES_PER_GPC, then we put the PES registers + * in the GPC reglist, so we can't error out if ppc.count == 0 + */ + if ((!g->gr.ctx_vars.valid) || + ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && + (litter_num_pes_per_gpc > 1))) + return -EINVAL; + + data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); + + *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); + *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); + + *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count; + + return 0; +} + + + +/* + * This function will return the 32 bit offset for a priv register if it is + * present in the context buffer. + */ +static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, + u32 addr, + bool is_quad, u32 quad, + u32 *context_buffer, + u32 context_buffer_size, + u32 *priv_offset) +{ + struct gr_gk20a *gr = &g->gr; + u32 i, data32; + int err; + int addr_type; /*enum ctxsw_addr_type */ + u32 broadcast_flags; + u32 gpc_num, tpc_num, ppc_num, be_num; + u32 num_gpcs, num_tpcs, num_ppcs; + u32 offset; + u32 sys_priv_offset, gpc_priv_offset; + u32 ppc_mask, reg_list_ppc_count; + void *context; + u32 offset_to_segment; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + err = gr_gk20a_decode_priv_addr(g, addr, &addr_type, + &gpc_num, &tpc_num, &ppc_num, &be_num, + &broadcast_flags); + if (err) + return err; + + context = context_buffer; + if (!check_main_image_header_magic(context)) { + gk20a_err(dev_from_gk20a(g), + "Invalid main header: magic value"); + return -EINVAL; + } + num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); + + /* Parse the FECS local header. */ + context += ctxsw_prog_ucode_header_size_in_bytes(); + if (!check_local_header_magic(context)) { + gk20a_err(dev_from_gk20a(g), + "Invalid FECS local header: magic value\n"); + return -EINVAL; + } + data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); + sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); + + /* If found in Ext buffer, ok. + * If it failed and we expected to find it there (quad offset) + * then return the error. Otherwise continue on. + */ + err = gr_gk20a_find_priv_offset_in_ext_buffer(g, + addr, is_quad, quad, context_buffer, + context_buffer_size, priv_offset); + if (!err || (err && is_quad)) + return err; + + if ((addr_type == CTXSW_ADDR_TYPE_SYS) || + (addr_type == CTXSW_ADDR_TYPE_BE)) { + /* Find the offset in the FECS segment. */ + offset_to_segment = sys_priv_offset * + ctxsw_prog_ucode_header_size_in_bytes(); + + err = gr_gk20a_process_context_buffer_priv_segment(g, + addr_type, addr, + 0, 0, 0, 0, + &offset); + if (err) + return err; + + *priv_offset = (offset_to_segment + offset); + return 0; + } + + if ((gpc_num + 1) > num_gpcs) { + gk20a_err(dev_from_gk20a(g), + "GPC %d not in this context buffer.\n", + gpc_num); + return -EINVAL; + } + + /* Parse the GPCCS local header(s).*/ + for (i = 0; i < num_gpcs; i++) { + context += ctxsw_prog_ucode_header_size_in_bytes(); + if (!check_local_header_magic(context)) { + gk20a_err(dev_from_gk20a(g), + "Invalid GPCCS local header: magic value\n"); + return -EINVAL; + + } + data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); + gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); + + err = gr_gk20a_determine_ppc_configuration(g, context, + &num_ppcs, &ppc_mask, + ®_list_ppc_count); + if (err) + return err; + + num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); + + if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { + gk20a_err(dev_from_gk20a(g), + "GPC %d TPC %d not in this context buffer.\n", + gpc_num, tpc_num); + return -EINVAL; + } + + /* Find the offset in the GPCCS segment.*/ + if (i == gpc_num) { + offset_to_segment = gpc_priv_offset * + ctxsw_prog_ucode_header_size_in_bytes(); + + if (addr_type == CTXSW_ADDR_TYPE_TPC) { + /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/ + } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { + /* The ucode stores TPC data before PPC data. + * Advance offset past TPC data to PPC data. */ + offset_to_segment += + ((gr->ctx_vars.ctxsw_regs.tpc.count * + num_tpcs) << 2); + } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { + /* The ucode stores TPC/PPC data before GPC data. + * Advance offset past TPC/PPC data to GPC data. */ + /* note 1 PES_PER_GPC case */ + u32 litter_num_pes_per_gpc = + proj_scal_litter_num_pes_per_gpc_v(); + if (litter_num_pes_per_gpc > 1) { + offset_to_segment += + (((gr->ctx_vars.ctxsw_regs.tpc.count * + num_tpcs) << 2) + + ((reg_list_ppc_count * num_ppcs) << 2)); + } else { + offset_to_segment += + ((gr->ctx_vars.ctxsw_regs.tpc.count * + num_tpcs) << 2); + } + } else { + gk20a_err(dev_from_gk20a(g), + " Unknown address type.\n"); + return -EINVAL; + } + err = gr_gk20a_process_context_buffer_priv_segment(g, + addr_type, addr, + i, num_tpcs, + num_ppcs, ppc_mask, + &offset); + if (err) + return -EINVAL; + + *priv_offset = offset_to_segment + offset; + return 0; + } + } + + return -EINVAL; +} + + +int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, + struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops, + u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) +{ + struct gk20a *g = ch->g; + struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + void *ctx_ptr = NULL; + int curr_gr_chid, curr_gr_ctx; + bool ch_is_curr_ctx, restart_gr_ctxsw = false; + u32 i, j, offset, v; + u32 max_offsets = proj_scal_litter_num_gpcs_v() * + proj_scal_litter_num_tpc_per_gpc_v(); + u32 *offsets = NULL; + u32 *offset_addrs = NULL; + u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; + int err, pass; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", + num_ctx_wr_ops, num_ctx_rd_ops); + + /* disable channel switching. + * at that point the hardware state can be inspected to + * determine if the context we're interested in is current. + */ + err = gr_gk20a_disable_ctxsw(g); + if (err) { + gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw"); + /* this should probably be ctx-fatal... */ + goto cleanup; + } + + restart_gr_ctxsw = true; + + curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); + curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx); + ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); + if (ch_is_curr_ctx) { + for (pass = 0; pass < 2; pass++) { + ctx_op_nr = 0; + for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) { + /* only do ctx ops and only on the right pass */ + if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || + (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || + ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) + continue; + + /* if this is a quad access, setup for special access*/ + if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) + && g->ops.gr.access_smpc_reg) + g->ops.gr.access_smpc_reg(g, + ctx_ops[i].quad, + ctx_ops[i].offset); + offset = ctx_ops[i].offset; + + if (pass == 0) { /* write pass */ + v = gk20a_readl(g, offset); + v &= ~ctx_ops[i].and_n_mask_lo; + v |= ctx_ops[i].value_lo; + gk20a_writel(g, offset, v); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "direct wr: offset=0x%x v=0x%x", + offset, v); + + if (ctx_ops[i].op == REGOP(WRITE_64)) { + v = gk20a_readl(g, offset + 4); + v &= ~ctx_ops[i].and_n_mask_hi; + v |= ctx_ops[i].value_hi; + gk20a_writel(g, offset + 4, v); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "direct wr: offset=0x%x v=0x%x", + offset + 4, v); + } + + } else { /* read pass */ + ctx_ops[i].value_lo = + gk20a_readl(g, offset); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "direct rd: offset=0x%x v=0x%x", + offset, ctx_ops[i].value_lo); + + if (ctx_ops[i].op == REGOP(READ_64)) { + ctx_ops[i].value_hi = + gk20a_readl(g, offset + 4); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "direct rd: offset=0x%x v=0x%x", + offset, ctx_ops[i].value_lo); + } else + ctx_ops[i].value_hi = 0; + } + ctx_op_nr++; + } + } + goto cleanup; + } + + /* they're the same size, so just use one alloc for both */ + offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL); + if (!offsets) { + err = -ENOMEM; + goto cleanup; + } + offset_addrs = offsets + max_offsets; + + /* would have been a variant of gr_gk20a_apply_instmem_overrides */ + /* recoded in-place instead.*/ + ctx_ptr = vmap(ch_ctx->gr_ctx.pages, + PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!ctx_ptr) { + err = -ENOMEM; + goto cleanup; + } + + /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate. + * There should be no on-going/in-flight references by the gpu now. */ + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, true); + + /* write to appropriate place in context image, + * first have to figure out where that really is */ + + /* first pass is writes, second reads */ + for (pass = 0; pass < 2; pass++) { + ctx_op_nr = 0; + for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) { + u32 num_offsets; + + /* only do ctx ops and only on the right pass */ + if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || + (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || + ((pass == 1) && !reg_op_is_read(ctx_ops[i].op)))) + continue; + + err = gr_gk20a_get_ctx_buffer_offsets(g, + ctx_ops[i].offset, + max_offsets, + offsets, offset_addrs, + &num_offsets, + ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), + ctx_ops[i].quad); + if (err) { + gk20a_dbg(gpu_dbg_gpu_dbg, + "ctx op invalid offset: offset=0x%x", + ctx_ops[i].offset); + ctx_ops[i].status = + NVHOST_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; + continue; + } + + /* if this is a quad access, setup for special access*/ + if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) && + g->ops.gr.access_smpc_reg) + g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad, + ctx_ops[i].offset); + + for (j = 0; j < num_offsets; j++) { + /* sanity check, don't write outside, worst case */ + if (offsets[j] >= g->gr.ctx_vars.golden_image_size) + continue; + if (pass == 0) { /* write pass */ + v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0); + v &= ~ctx_ops[i].and_n_mask_lo; + v |= ctx_ops[i].value_lo; + gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "context wr: offset=0x%x v=0x%x", + offsets[j], v); + + if (ctx_ops[i].op == REGOP(WRITE_64)) { + v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0); + v &= ~ctx_ops[i].and_n_mask_hi; + v |= ctx_ops[i].value_hi; + gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "context wr: offset=0x%x v=0x%x", + offsets[j] + 4, v); + } + + /* check to see if we need to add a special WAR + for some of the SMPC perf regs */ + gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], + v, ctx_ptr); + + } else { /* read pass */ + ctx_ops[i].value_lo = + gk20a_mem_rd32(ctx_ptr + offsets[0], 0); + + gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", + offsets[0], ctx_ops[i].value_lo); + + if (ctx_ops[i].op == REGOP(READ_64)) { + ctx_ops[i].value_hi = + gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0); + + gk20a_dbg(gpu_dbg_gpu_dbg, + "context rd: offset=0x%x v=0x%x", + offsets[0] + 4, ctx_ops[i].value_hi); + } else + ctx_ops[i].value_hi = 0; + } + } + ctx_op_nr++; + } + } +#if 0 + /* flush cpu caches for the ctx buffer? only if cpu cached, of course. + * they aren't, yet */ + if (cached) { + FLUSH_CPU_DCACHE(ctx_ptr, + sg_phys(ch_ctx->gr_ctx.mem.ref), size); + } +#endif + + cleanup: + if (offsets) + kfree(offsets); + + if (ctx_ptr) + vunmap(ctx_ptr); + + if (restart_gr_ctxsw) { + int tmp_err = gr_gk20a_enable_ctxsw(g); + if (tmp_err) { + gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n"); + err = tmp_err; + } + } + + return err; +} + +static void gr_gk20a_cb_size_default(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + + gr->attrib_cb_default_size = + gr_gpc0_ppc0_cbm_cfg_size_default_v(); + gr->alpha_cb_default_size = + gr_gpc0_ppc0_cbm_cfg2_size_default_v(); +} + +static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int size; + + gr->attrib_cb_size = gr->attrib_cb_default_size; + gr->alpha_cb_size = gr->alpha_cb_default_size + + (gr->alpha_cb_default_size >> 1); + + size = gr->attrib_cb_size * + gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * + gr->max_tpc_count; + + size += gr->alpha_cb_size * + gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * + gr->max_tpc_count; + + return size; +} + +void gr_gk20a_commit_global_pagepool(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch) +{ + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), + gr_scc_pagepool_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), + gr_scc_pagepool_total_pages_f(size) | + gr_scc_pagepool_valid_true_f(), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), + gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), + gr_gpcs_gcc_pagepool_total_pages_f(size), patch); + + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), + gr_pd_pagepool_total_pages_f(size) | + gr_pd_pagepool_valid_true_f(), patch); +} + +void gk20a_init_gr(struct gpu_ops *gops) +{ + gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; + gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults; + gops->gr.cb_size_default = gr_gk20a_cb_size_default; + gops->gr.calc_global_ctx_buffer_size = + gr_gk20a_calc_global_ctx_buffer_size; + gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb; + gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb; + gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager; + gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool; + gops->gr.handle_sw_method = gr_gk20a_handle_sw_method; + gops->gr.set_alpha_circular_buffer_size = + gk20a_gr_set_circular_buffer_size; + gops->gr.set_circular_buffer_size = + gk20a_gr_set_alpha_circular_buffer_size; + gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; + gops->gr.is_valid_class = gr_gk20a_is_valid_class; + gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; + gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; + gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; + gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; + gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h new file mode 100644 index 00000000..7eb2923a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -0,0 +1,406 @@ +/* + * GK20A Graphics Engine + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __GR_GK20A_H__ +#define __GR_GK20A_H__ + +#include + +#include "gr_ctx_gk20a.h" + +#define GR_IDLE_CHECK_DEFAULT 100 /* usec */ +#define GR_IDLE_CHECK_MAX 5000 /* usec */ + +#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF +#define INVALID_MAX_WAYS 0xFFFFFFFF + +#define GK20A_FECS_UCODE_IMAGE "fecs.bin" +#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" + +enum /* global_ctx_buffer */ { + CIRCULAR = 0, + PAGEPOOL = 1, + ATTRIBUTE = 2, + CIRCULAR_VPR = 3, + PAGEPOOL_VPR = 4, + ATTRIBUTE_VPR = 5, + GOLDEN_CTX = 6, + PRIV_ACCESS_MAP = 7, + NR_GLOBAL_CTX_BUF = 8 +}; + +/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */ +enum /*global_ctx_buffer_va */ { + CIRCULAR_VA = 0, + PAGEPOOL_VA = 1, + ATTRIBUTE_VA = 2, + GOLDEN_CTX_VA = 3, + PRIV_ACCESS_MAP_VA = 4, + NR_GLOBAL_CTX_BUF_VA = 5 +}; + +enum { + WAIT_UCODE_LOOP, + WAIT_UCODE_TIMEOUT, + WAIT_UCODE_ERROR, + WAIT_UCODE_OK +}; + +enum { + GR_IS_UCODE_OP_EQUAL, + GR_IS_UCODE_OP_NOT_EQUAL, + GR_IS_UCODE_OP_AND, + GR_IS_UCODE_OP_LESSER, + GR_IS_UCODE_OP_LESSER_EQUAL, + GR_IS_UCODE_OP_SKIP +}; + +enum { + eUcodeHandshakeInitComplete = 1, + eUcodeHandshakeMethodFinished +}; + +enum { + ELCG_RUN, /* clk always run, i.e. disable elcg */ + ELCG_STOP, /* clk is stopped */ + ELCG_AUTO /* clk will run when non-idle, standard elcg mode */ +}; + +enum { + BLCG_RUN, /* clk always run, i.e. disable blcg */ + BLCG_AUTO /* clk will run when non-idle, standard blcg mode */ +}; + +#ifndef GR_GO_IDLE_BUNDLE +#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */ +#endif + +struct gr_channel_map_tlb_entry { + u32 curr_ctx; + u32 hw_chid; +}; + +struct gr_zcull_gk20a { + u32 aliquot_width; + u32 aliquot_height; + u32 aliquot_size; + u32 total_aliquots; + + u32 width_align_pixels; + u32 height_align_pixels; + u32 pixel_squares_by_aliquots; +}; + +struct gr_zcull_info { + u32 width_align_pixels; + u32 height_align_pixels; + u32 pixel_squares_by_aliquots; + u32 aliquot_total; + u32 region_byte_multiplier; + u32 region_header_size; + u32 subregion_header_size; + u32 subregion_width_align_pixels; + u32 subregion_height_align_pixels; + u32 subregion_count; +}; + +#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */ + +#define GK20A_STARTOF_ZBC_TABLE 1 /* index zero reserved to indicate "not ZBCd" */ +#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */ +#define GK20A_ZBC_TABLE_SIZE (16 - 1) + +#define GK20A_ZBC_TYPE_INVALID 0 +#define GK20A_ZBC_TYPE_COLOR 1 +#define GK20A_ZBC_TYPE_DEPTH 2 + +struct zbc_color_table { + u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 format; + u32 ref_cnt; +}; + +struct zbc_depth_table { + u32 depth; + u32 format; + u32 ref_cnt; +}; + +struct zbc_entry { + u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 depth; + u32 type; /* color or depth */ + u32 format; +}; + +struct zbc_query_params { + u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE]; + u32 depth; + u32 ref_cnt; + u32 format; + u32 type; /* color or depth */ + u32 index_size; /* [out] size, [in] index */ +}; + +struct gr_gk20a { + struct gk20a *g; + struct { + bool dynamic; + + u32 buffer_size; + u32 buffer_total_size; + + bool golden_image_initialized; + u32 golden_image_size; + u32 *local_golden_image; + + u32 zcull_ctxsw_image_size; + + u32 buffer_header_size; + + u32 priv_access_map_size; + + struct gr_ucode_gk20a ucode; + + struct av_list_gk20a sw_bundle_init; + struct av_list_gk20a sw_method_init; + struct aiv_list_gk20a sw_ctx_load; + struct av_list_gk20a sw_non_ctx_load; + struct { + struct aiv_list_gk20a sys; + struct aiv_list_gk20a gpc; + struct aiv_list_gk20a tpc; + struct aiv_list_gk20a zcull_gpc; + struct aiv_list_gk20a ppc; + struct aiv_list_gk20a pm_sys; + struct aiv_list_gk20a pm_gpc; + struct aiv_list_gk20a pm_tpc; + } ctxsw_regs; + int regs_base_index; + bool valid; + } ctx_vars; + + struct mutex ctx_mutex; /* protect golden ctx init */ + struct mutex fecs_mutex; /* protect fecs method */ + +#define GR_NETLIST_DYNAMIC -1 +#define GR_NETLIST_STATIC_A 'A' + int netlist; + + int initialized; + u32 num_fbps; + + u32 max_gpc_count; + u32 max_fbps_count; + u32 max_tpc_per_gpc_count; + u32 max_zcull_per_gpc_count; + u32 max_tpc_count; + + u32 sys_count; + u32 gpc_count; + u32 pe_count_per_gpc; + u32 ppc_count; + u32 *gpc_ppc_count; + u32 tpc_count; + u32 *gpc_tpc_count; + u32 zcb_count; + u32 *gpc_zcb_count; + u32 *pes_tpc_count[2]; + u32 *pes_tpc_mask[2]; + u32 *gpc_skip_mask; + + u32 bundle_cb_default_size; + u32 min_gpm_fifo_depth; + u32 bundle_cb_token_limit; + u32 attrib_cb_default_size; + u32 attrib_cb_size; + u32 alpha_cb_default_size; + u32 alpha_cb_size; + u32 timeslice_mode; + + struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; + + struct mmu_desc mmu_wr_mem; + u32 mmu_wr_mem_size; + struct mmu_desc mmu_rd_mem; + u32 mmu_rd_mem_size; + + u8 *map_tiles; + u32 map_tile_count; + u32 map_row_offset; + +#define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */ +#define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT) + + u32 max_comptag_mem; /* max memory size (MB) for comptag */ + struct compbit_store_desc compbit_store; + struct gk20a_allocator comp_tags; + + struct gr_zcull_gk20a zcull; + + struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE]; + struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE]; + + s32 max_default_color_index; + s32 max_default_depth_index; + + s32 max_used_color_index; + s32 max_used_depth_index; + + u32 status_disable_mask; + +#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */ + struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE]; + u32 channel_tlb_flush_index; + spinlock_t ch_tlb_lock; + + void (*remove_support)(struct gr_gk20a *gr); + bool sw_ready; + bool skip_ucode_init; +}; + +void gk20a_fecs_dump_falcon_stats(struct gk20a *g); + +struct gk20a_ctxsw_ucode_segment { + u32 offset; + u32 size; +}; + +struct gk20a_ctxsw_ucode_segments { + u32 boot_entry; + u32 boot_imem_offset; + struct gk20a_ctxsw_ucode_segment boot; + struct gk20a_ctxsw_ucode_segment code; + struct gk20a_ctxsw_ucode_segment data; +}; + +struct gk20a_ctxsw_ucode_info { + u64 *p_va; + struct inst_desc inst_blk_desc; + struct surface_mem_desc surface_desc; + u64 ucode_gpuva; + struct gk20a_ctxsw_ucode_segments fecs; + struct gk20a_ctxsw_ucode_segments gpccs; +}; + +struct gk20a_ctxsw_bootloader_desc { + u32 start_offset; + u32 size; + u32 imem_offset; + u32 entry_point; +}; + +struct gpu_ops; +void gk20a_init_gr(struct gpu_ops *gops); +int gk20a_init_gr_support(struct gk20a *g); +void gk20a_gr_reset(struct gk20a *g); + +int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); + +int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); + +struct nvhost_alloc_obj_ctx_args; +struct nvhost_free_obj_ctx_args; + +int gk20a_alloc_obj_ctx(struct channel_gk20a *c, + struct nvhost_alloc_obj_ctx_args *args); +int gk20a_free_obj_ctx(struct channel_gk20a *c, + struct nvhost_free_obj_ctx_args *args); +void gk20a_free_channel_ctx(struct channel_gk20a *c); + +int gk20a_gr_isr(struct gk20a *g); +int gk20a_gr_nonstall_isr(struct gk20a *g); + +/* zcull */ +u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr); +int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, + struct channel_gk20a *c, u64 zcull_va, u32 mode); +int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, + struct gr_zcull_info *zcull_params); +/* zbc */ +int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *zbc_val); +int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_query_params *query_params); +int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, + struct zbc_entry *zbc_val); +int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr); +int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr); + +/* pmu */ +int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); +int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr); +int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va); + +void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); +void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine); + +/* sm */ +bool gk20a_gr_sm_debugger_attached(struct gk20a *g); + +#define gr_gk20a_elpg_protected_call(g, func) \ + ({ \ + int err; \ + if (support_gk20a_pmu()) \ + gk20a_pmu_disable_elpg(g); \ + err = func; \ + if (support_gk20a_pmu()) \ + gk20a_pmu_enable_elpg(g); \ + err; \ + }) + +int gk20a_gr_suspend(struct gk20a *g); + +struct nvhost_dbg_gpu_reg_op; +int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, + struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops, + u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); +int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, + u32 addr, + u32 max_offsets, + u32 *offsets, u32 *offset_addrs, + u32 *num_offsets, + bool is_quad, u32 quad); +int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, + struct channel_gk20a *c, + bool enable_smpc_ctxsw); + +struct channel_ctx_gk20a; +int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, + u32 addr, u32 data, bool patch); +int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx); +int gr_gk20a_ctx_patch_write_end(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx); +void gr_gk20a_commit_global_pagepool(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + u64 addr, u32 size, bool patch); +void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); +void gr_gk20a_enable_hww_exceptions(struct gk20a *g); +void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride); +void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride); +int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); +#endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h new file mode 100644 index 00000000..a82a1ee7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h @@ -0,0 +1,179 @@ +/* + * GK20A Graphics Context Pri Register Addressing + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef _NVHOST_GR_PRI_GK20A_H_ +#define _NVHOST_GR_PRI_GK20A_H_ + +/* + * These convenience macros are generally for use in the management/modificaiton + * of the context state store for gr/compute contexts. + */ + +/* + * GPC pri addressing + */ +static inline u32 pri_gpccs_addr_width(void) +{ + return 15; /*from where?*/ +} +static inline u32 pri_gpccs_addr_mask(u32 addr) +{ + return addr & ((1 << pri_gpccs_addr_width()) - 1); +} +static inline u32 pri_gpc_addr(u32 addr, u32 gpc) +{ + return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr; +} +static inline bool pri_is_gpc_addr_shared(u32 addr) +{ + return (addr >= proj_gpc_shared_base_v()) && + (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v()); +} +static inline bool pri_is_gpc_addr(u32 addr) +{ + return ((addr >= proj_gpc_base_v()) && + (addr < proj_gpc_base_v() + + proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) || + pri_is_gpc_addr_shared(addr); +} +static inline u32 pri_get_gpc_num(u32 addr) +{ + u32 i, start; + u32 num_gpcs = proj_scal_litter_num_gpcs_v(); + + for (i = 0; i < num_gpcs; i++) { + start = proj_gpc_base_v() + (i * proj_gpc_stride_v()); + if ((addr >= start) && (addr < (start + proj_gpc_stride_v()))) + return i; + } + return 0; +} +/* + * TPC pri addressing + */ +static inline u32 pri_tpccs_addr_width(void) +{ + return 11; /* from where? */ +} +static inline u32 pri_tpccs_addr_mask(u32 addr) +{ + return addr & ((1 << pri_tpccs_addr_width()) - 1); +} +static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc) +{ + return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + + proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) + + addr; +} +static inline bool pri_is_tpc_addr_shared(u32 addr) +{ + return (addr >= proj_tpc_in_gpc_shared_base_v()) && + (addr < (proj_tpc_in_gpc_shared_base_v() + + proj_tpc_in_gpc_stride_v())); +} +static inline bool pri_is_tpc_addr(u32 addr) +{ + return ((addr >= proj_tpc_in_gpc_base_v()) && + (addr < proj_tpc_in_gpc_base_v() + (proj_scal_litter_num_tpc_per_gpc_v() * + proj_tpc_in_gpc_stride_v()))) + || + pri_is_tpc_addr_shared(addr); +} +static inline u32 pri_get_tpc_num(u32 addr) +{ + u32 i, start; + u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); + + for (i = 0; i < num_tpcs; i++) { + start = proj_tpc_in_gpc_base_v() + (i * proj_tpc_in_gpc_stride_v()); + if ((addr >= start) && (addr < (start + proj_tpc_in_gpc_stride_v()))) + return i; + } + return 0; +} + +/* + * BE pri addressing + */ +static inline u32 pri_becs_addr_width(void) +{ + return 10;/* from where? */ +} +static inline u32 pri_becs_addr_mask(u32 addr) +{ + return addr & ((1 << pri_becs_addr_width()) - 1); +} +static inline bool pri_is_be_addr_shared(u32 addr) +{ + return (addr >= proj_rop_shared_base_v()) && + (addr < proj_rop_shared_base_v() + proj_rop_stride_v()); +} +static inline u32 pri_be_shared_addr(u32 addr) +{ + return proj_rop_shared_base_v() + pri_becs_addr_mask(addr); +} +static inline bool pri_is_be_addr(u32 addr) +{ + return ((addr >= proj_rop_base_v()) && + (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) || + pri_is_be_addr_shared(addr); +} + +static inline u32 pri_get_be_num(u32 addr) +{ + u32 i, start; + u32 num_fbps = proj_scal_litter_num_fbps_v(); + for (i = 0; i < num_fbps; i++) { + start = proj_rop_base_v() + (i * proj_rop_stride_v()); + if ((addr >= start) && (addr < (start + proj_rop_stride_v()))) + return i; + } + return 0; +} + +/* + * PPC pri addressing + */ +static inline u32 pri_ppccs_addr_width(void) +{ + return 9; /* from where? */ +} +static inline u32 pri_ppccs_addr_mask(u32 addr) +{ + return addr & ((1 << pri_ppccs_addr_width()) - 1); +} +static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc) +{ + return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + + proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr; +} + +enum ctxsw_addr_type { + CTXSW_ADDR_TYPE_SYS = 0, + CTXSW_ADDR_TYPE_GPC = 1, + CTXSW_ADDR_TYPE_TPC = 2, + CTXSW_ADDR_TYPE_BE = 3, + CTXSW_ADDR_TYPE_PPC = 4 +}; + +#define PRI_BROADCAST_FLAGS_NONE 0 +#define PRI_BROADCAST_FLAGS_GPC BIT(0) +#define PRI_BROADCAST_FLAGS_TPC BIT(1) +#define PRI_BROADCAST_FLAGS_BE BIT(2) +#define PRI_BROADCAST_FLAGS_PPC BIT(3) + +#endif /*_NVHOST_GR_PRI_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c new file mode 100644 index 00000000..dea740c2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal.c @@ -0,0 +1,33 @@ +/* + * NVIDIA GPU HAL interface. + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "gk20a.h" +#include "hal_gk20a.h" + +int gpu_init_hal(struct gk20a *g) +{ + u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; + switch (ver) { + case GK20A_GPUID_GK20A: + gk20a_dbg_info("gk20a detected"); + gk20a_init_hal(&g->ops); + break; + default: + gk20a_err(&g->dev->dev, "no support for %x", ver); + return -ENODEV; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/hal.h b/drivers/gpu/nvgpu/gk20a/hal.h new file mode 100644 index 00000000..da02cf5f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal.h @@ -0,0 +1,25 @@ +/* + * NVIDIA GPU Hardware Abstraction Layer functions definitions. + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __HAL_GPU__ +#define __HAL_GPU__ + +#include + +struct gk20a; + +int gpu_init_hal(struct gk20a *g); + +#endif /* __HAL_GPU__ */ diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c new file mode 100644 index 00000000..b3e9b0e6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -0,0 +1,50 @@ +/* + * drivers/video/tegra/host/gk20a/hal_gk20a.c + * + * GK20A Tegra HAL interface. + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "hal_gk20a.h" +#include "ltc_gk20a.h" +#include "fb_gk20a.h" +#include "gk20a.h" +#include "gk20a_gating_reglist.h" +#include "channel_gk20a.h" + +struct gpu_ops gk20a_ops = { + .clock_gating = { + .slcg_gr_load_gating_prod = + gr_gk20a_slcg_gr_load_gating_prod, + .slcg_perf_load_gating_prod = + gr_gk20a_slcg_perf_load_gating_prod, + .blcg_gr_load_gating_prod = + gr_gk20a_blcg_gr_load_gating_prod, + .pg_gr_load_gating_prod = + gr_gk20a_pg_gr_load_gating_prod, + .slcg_therm_load_gating_prod = + gr_gk20a_slcg_therm_load_gating_prod, + } +}; + +int gk20a_init_hal(struct gpu_ops *gops) +{ + *gops = gk20a_ops; + gk20a_init_ltc(gops); + gk20a_init_gr(gops); + gk20a_init_fb(gops); + gk20a_init_fifo(gops); + gops->name = "gk20a"; + + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h new file mode 100644 index 00000000..db77a4a7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h @@ -0,0 +1,28 @@ +/* + * drivers/video/tegra/host/gk20a/hal_gk20a.h + * + * GK20A Hardware Abstraction Layer functions definitions. + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __HAL_GK20A__ +#define __HAL_GK20A__ + +#include + +struct gpu_ops; +struct gk20a; + +int gk20a_init_hal(struct gpu_ops *gops); + +#endif /* __HAL_GK20A__ */ diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h new file mode 100644 index 00000000..ebf8a873 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_bus_gk20a_h_ +#define _hw_bus_gk20a_h_ + +static inline u32 bus_bar1_block_r(void) +{ + return 0x00001704; +} +static inline u32 bus_bar1_block_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 bus_bar1_block_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 bus_bar1_block_mode_virtual_f(void) +{ + return 0x80000000; +} +static inline u32 bus_bar1_block_ptr_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 bus_intr_0_r(void) +{ + return 0x00001100; +} +static inline u32 bus_intr_0_pri_squash_m(void) +{ + return 0x1 << 1; +} +static inline u32 bus_intr_0_pri_fecserr_m(void) +{ + return 0x1 << 2; +} +static inline u32 bus_intr_0_pri_timeout_m(void) +{ + return 0x1 << 3; +} +static inline u32 bus_intr_en_0_r(void) +{ + return 0x00001140; +} +static inline u32 bus_intr_en_0_pri_squash_m(void) +{ + return 0x1 << 1; +} +static inline u32 bus_intr_en_0_pri_fecserr_m(void) +{ + return 0x1 << 2; +} +static inline u32 bus_intr_en_0_pri_timeout_m(void) +{ + return 0x1 << 3; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h new file mode 100644 index 00000000..573329f1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_ccsr_gk20a_h_ +#define _hw_ccsr_gk20a_h_ + +static inline u32 ccsr_channel_inst_r(u32 i) +{ + return 0x00800000 + i*8; +} +static inline u32 ccsr_channel_inst__size_1_v(void) +{ + return 0x00000080; +} +static inline u32 ccsr_channel_inst_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 ccsr_channel_inst_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 ccsr_channel_inst_bind_false_f(void) +{ + return 0x0; +} +static inline u32 ccsr_channel_inst_bind_true_f(void) +{ + return 0x80000000; +} +static inline u32 ccsr_channel_r(u32 i) +{ + return 0x00800004 + i*8; +} +static inline u32 ccsr_channel__size_1_v(void) +{ + return 0x00000080; +} +static inline u32 ccsr_channel_enable_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 ccsr_channel_enable_set_f(u32 v) +{ + return (v & 0x1) << 10; +} +static inline u32 ccsr_channel_enable_set_true_f(void) +{ + return 0x400; +} +static inline u32 ccsr_channel_enable_clr_true_f(void) +{ + return 0x800; +} +static inline u32 ccsr_channel_runlist_f(u32 v) +{ + return (v & 0xf) << 16; +} +static inline u32 ccsr_channel_status_v(u32 r) +{ + return (r >> 24) & 0xf; +} +static inline u32 ccsr_channel_busy_v(u32 r) +{ + return (r >> 28) & 0x1; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h new file mode 100644 index 00000000..66bf01b0 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_chiplet_pwr_gk20a_h_ +#define _hw_chiplet_pwr_gk20a_h_ + +static inline u32 chiplet_pwr_gpcs_weight_6_r(void) +{ + return 0x0010e018; +} +static inline u32 chiplet_pwr_gpcs_weight_7_r(void) +{ + return 0x0010e01c; +} +static inline u32 chiplet_pwr_gpcs_config_1_r(void) +{ + return 0x0010e03c; +} +static inline u32 chiplet_pwr_gpcs_config_1_ba_enable_yes_f(void) +{ + return 0x1; +} +static inline u32 chiplet_pwr_fbps_weight_0_r(void) +{ + return 0x0010e100; +} +static inline u32 chiplet_pwr_fbps_weight_1_r(void) +{ + return 0x0010e104; +} +static inline u32 chiplet_pwr_fbps_config_1_r(void) +{ + return 0x0010e13c; +} +static inline u32 chiplet_pwr_fbps_config_1_ba_enable_yes_f(void) +{ + return 0x1; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h new file mode 100644 index 00000000..e2a4f2f2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_ctxsw_prog_gk20a_h_ +#define _hw_ctxsw_prog_gk20a_h_ + +static inline u32 ctxsw_prog_fecs_header_v(void) +{ + return 0x00000100; +} +static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) +{ + return 0x00000008; +} +static inline u32 ctxsw_prog_main_image_patch_count_o(void) +{ + return 0x00000010; +} +static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void) +{ + return 0x00000014; +} +static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void) +{ + return 0x00000018; +} +static inline u32 ctxsw_prog_main_image_zcull_o(void) +{ + return 0x0000001c; +} +static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void) +{ + return 0x00000001; +} +static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void) +{ + return 0x00000002; +} +static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void) +{ + return 0x00000020; +} +static inline u32 ctxsw_prog_main_image_pm_o(void) +{ + return 0x00000028; +} +static inline u32 ctxsw_prog_main_image_pm_mode_m(void) +{ + return 0x7 << 0; +} +static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r) +{ + return (r >> 0) & 0x7; +} +static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) +{ + return 0x0; +} +static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void) +{ + return 0x7 << 3; +} +static inline u32 ctxsw_prog_main_image_pm_smpc_mode_v(u32 r) +{ + return (r >> 3) & 0x7; +} +static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void) +{ + return 0x0; +} +static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void) +{ + return 0x8; +} +static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) +{ + return 0x0000002c; +} +static inline u32 ctxsw_prog_main_image_num_save_ops_o(void) +{ + return 0x000000f4; +} +static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void) +{ + return 0x000000f8; +} +static inline u32 ctxsw_prog_main_image_magic_value_o(void) +{ + return 0x000000fc; +} +static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void) +{ + return 0x600dc0de; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void) +{ + return 0x000000a0; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void) +{ + return 0x0; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_none_f(void) +{ + return 0x1; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void) +{ + return 0x2; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void) +{ + return 0x000000a4; +} +static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void) +{ + return 0x000000a8; +} +static inline u32 ctxsw_prog_main_image_misc_options_o(void) +{ + return 0x0000003c; +} +static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void) +{ + return 0x1 << 3; +} +static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void) +{ + return 0x0; +} +static inline u32 ctxsw_prog_main_image_misc_options_verif_features_enabled_f(void) +{ + return 0x8; +} +static inline u32 ctxsw_prog_local_priv_register_ctl_o(void) +{ + return 0x0000000c; +} +static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 ctxsw_prog_local_image_ppc_info_o(void) +{ + return 0x000000f4; +} +static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r) +{ + return (r >> 16) & 0xffff; +} +static inline u32 ctxsw_prog_local_image_num_tpcs_o(void) +{ + return 0x000000f8; +} +static inline u32 ctxsw_prog_local_magic_value_o(void) +{ + return 0x000000fc; +} +static inline u32 ctxsw_prog_local_magic_value_v_value_v(void) +{ + return 0xad0becab; +} +static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void) +{ + return 0x000000ec; +} +static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r) +{ + return (r >> 16) & 0xff; +} +static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void) +{ + return 0x00000100; +} +static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void) +{ + return 0x00000004; +} +static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void) +{ + return 0x00000005; +} +static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void) +{ + return 0x00000004; +} +static inline u32 ctxsw_prog_extended_num_smpc_quadrants_v(void) +{ + return 0x00000004; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h new file mode 100644 index 00000000..b7edc29d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_fb_gk20a_h_ +#define _hw_fb_gk20a_h_ + +static inline u32 fb_mmu_ctrl_r(void) +{ + return 0x00100c80; +} +static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) +{ + return (r >> 15) & 0x1; +} +static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r) +{ + return (r >> 16) & 0xff; +} +static inline u32 fb_mmu_invalidate_pdb_r(void) +{ + return 0x00100cb8; +} +static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v) +{ + return (v & 0xfffffff) << 4; +} +static inline u32 fb_mmu_invalidate_r(void) +{ + return 0x00100cbc; +} +static inline u32 fb_mmu_invalidate_all_va_true_f(void) +{ + return 0x1; +} +static inline u32 fb_mmu_invalidate_all_pdb_true_f(void) +{ + return 0x2; +} +static inline u32 fb_mmu_invalidate_trigger_s(void) +{ + return 1; +} +static inline u32 fb_mmu_invalidate_trigger_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 fb_mmu_invalidate_trigger_m(void) +{ + return 0x1 << 31; +} +static inline u32 fb_mmu_invalidate_trigger_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 fb_mmu_invalidate_trigger_true_f(void) +{ + return 0x80000000; +} +static inline u32 fb_mmu_debug_wr_r(void) +{ + return 0x00100cc8; +} +static inline u32 fb_mmu_debug_wr_aperture_s(void) +{ + return 2; +} +static inline u32 fb_mmu_debug_wr_aperture_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 fb_mmu_debug_wr_aperture_m(void) +{ + return 0x3 << 0; +} +static inline u32 fb_mmu_debug_wr_aperture_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_debug_wr_vol_false_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_debug_wr_vol_true_v(void) +{ + return 0x00000001; +} +static inline u32 fb_mmu_debug_wr_vol_true_f(void) +{ + return 0x4; +} +static inline u32 fb_mmu_debug_wr_addr_v(u32 r) +{ + return (r >> 4) & 0xfffffff; +} +static inline u32 fb_mmu_debug_wr_addr_alignment_v(void) +{ + return 0x0000000c; +} +static inline u32 fb_mmu_debug_rd_r(void) +{ + return 0x00100ccc; +} +static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_debug_rd_vol_false_f(void) +{ + return 0x0; +} +static inline u32 fb_mmu_debug_rd_addr_v(u32 r) +{ + return (r >> 4) & 0xfffffff; +} +static inline u32 fb_mmu_debug_rd_addr_alignment_v(void) +{ + return 0x0000000c; +} +static inline u32 fb_mmu_debug_ctrl_r(void) +{ + return 0x00100cc4; +} +static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r) +{ + return (r >> 16) & 0x1; +} +static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 fb_mmu_vpr_info_r(void) +{ + return 0x00100cd0; +} +static inline u32 fb_mmu_vpr_info_fetch_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 fb_mmu_vpr_info_fetch_false_v(void) +{ + return 0x00000000; +} +static inline u32 fb_mmu_vpr_info_fetch_true_v(void) +{ + return 0x00000001; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h new file mode 100644 index 00000000..a39d3c51 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h @@ -0,0 +1,565 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_fifo_gk20a_h_ +#define _hw_fifo_gk20a_h_ + +static inline u32 fifo_bar1_base_r(void) +{ + return 0x00002254; +} +static inline u32 fifo_bar1_base_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 fifo_bar1_base_ptr_align_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 fifo_bar1_base_valid_false_f(void) +{ + return 0x0; +} +static inline u32 fifo_bar1_base_valid_true_f(void) +{ + return 0x10000000; +} +static inline u32 fifo_runlist_base_r(void) +{ + return 0x00002270; +} +static inline u32 fifo_runlist_base_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 fifo_runlist_base_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 fifo_runlist_r(void) +{ + return 0x00002274; +} +static inline u32 fifo_runlist_engine_f(u32 v) +{ + return (v & 0xf) << 20; +} +static inline u32 fifo_eng_runlist_base_r(u32 i) +{ + return 0x00002280 + i*8; +} +static inline u32 fifo_eng_runlist_base__size_1_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_eng_runlist_r(u32 i) +{ + return 0x00002284 + i*8; +} +static inline u32 fifo_eng_runlist__size_1_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_eng_runlist_length_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 fifo_eng_runlist_pending_true_f(void) +{ + return 0x100000; +} +static inline u32 fifo_eng_timeslice_r(u32 i) +{ + return 0x00002310 + i*4; +} +static inline u32 fifo_eng_timeslice_timeout_128_f(void) +{ + return 0x80; +} +static inline u32 fifo_eng_timeslice_timescale_3_f(void) +{ + return 0x3000; +} +static inline u32 fifo_eng_timeslice_enable_true_f(void) +{ + return 0x10000000; +} +static inline u32 fifo_pb_timeslice_r(u32 i) +{ + return 0x00002350 + i*4; +} +static inline u32 fifo_pb_timeslice_timeout_16_f(void) +{ + return 0x10; +} +static inline u32 fifo_pb_timeslice_timescale_0_f(void) +{ + return 0x0; +} +static inline u32 fifo_pb_timeslice_enable_true_f(void) +{ + return 0x10000000; +} +static inline u32 fifo_pbdma_map_r(u32 i) +{ + return 0x00002390 + i*4; +} +static inline u32 fifo_intr_0_r(void) +{ + return 0x00002100; +} +static inline u32 fifo_intr_0_bind_error_pending_f(void) +{ + return 0x1; +} +static inline u32 fifo_intr_0_bind_error_reset_f(void) +{ + return 0x1; +} +static inline u32 fifo_intr_0_pio_error_pending_f(void) +{ + return 0x10; +} +static inline u32 fifo_intr_0_pio_error_reset_f(void) +{ + return 0x10; +} +static inline u32 fifo_intr_0_sched_error_pending_f(void) +{ + return 0x100; +} +static inline u32 fifo_intr_0_sched_error_reset_f(void) +{ + return 0x100; +} +static inline u32 fifo_intr_0_chsw_error_pending_f(void) +{ + return 0x10000; +} +static inline u32 fifo_intr_0_chsw_error_reset_f(void) +{ + return 0x10000; +} +static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void) +{ + return 0x800000; +} +static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void) +{ + return 0x800000; +} +static inline u32 fifo_intr_0_lb_error_pending_f(void) +{ + return 0x1000000; +} +static inline u32 fifo_intr_0_lb_error_reset_f(void) +{ + return 0x1000000; +} +static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void) +{ + return 0x8000000; +} +static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void) +{ + return 0x8000000; +} +static inline u32 fifo_intr_0_mmu_fault_pending_f(void) +{ + return 0x10000000; +} +static inline u32 fifo_intr_0_pbdma_intr_pending_f(void) +{ + return 0x20000000; +} +static inline u32 fifo_intr_0_runlist_event_pending_f(void) +{ + return 0x40000000; +} +static inline u32 fifo_intr_0_channel_intr_pending_f(void) +{ + return 0x80000000; +} +static inline u32 fifo_intr_en_0_r(void) +{ + return 0x00002140; +} +static inline u32 fifo_intr_en_1_r(void) +{ + return 0x00002528; +} +static inline u32 fifo_intr_bind_error_r(void) +{ + return 0x0000252c; +} +static inline u32 fifo_intr_sched_error_r(void) +{ + return 0x0000254c; +} +static inline u32 fifo_intr_sched_error_code_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void) +{ + return 0x0000000a; +} +static inline u32 fifo_intr_chsw_error_r(void) +{ + return 0x0000256c; +} +static inline u32 fifo_intr_mmu_fault_id_r(void) +{ + return 0x0000259c; +} +static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void) +{ + return 0x0; +} +static inline u32 fifo_intr_mmu_fault_inst_r(u32 i) +{ + return 0x00002800 + i*16; +} +static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r) +{ + return (r >> 0) & 0xfffffff; +} +static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 fifo_intr_mmu_fault_lo_r(u32 i) +{ + return 0x00002804 + i*16; +} +static inline u32 fifo_intr_mmu_fault_hi_r(u32 i) +{ + return 0x00002808 + i*16; +} +static inline u32 fifo_intr_mmu_fault_info_r(u32 i) +{ + return 0x0000280c + i*16; +} +static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r) +{ + return (r >> 0) & 0xf; +} +static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r) +{ + return (r >> 6) & 0x1; +} +static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r) +{ + return (r >> 8) & 0x1f; +} +static inline u32 fifo_intr_pbdma_id_r(void) +{ + return 0x000025a0; +} +static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i) +{ + return (v & 0x1) << (0 + i*1); +} +static inline u32 fifo_intr_pbdma_id_status__size_1_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_intr_runlist_r(void) +{ + return 0x00002a00; +} +static inline u32 fifo_fb_timeout_r(void) +{ + return 0x00002a04; +} +static inline u32 fifo_fb_timeout_period_m(void) +{ + return 0x3fffffff << 0; +} +static inline u32 fifo_fb_timeout_period_max_f(void) +{ + return 0x3fffffff; +} +static inline u32 fifo_pb_timeout_r(void) +{ + return 0x00002a08; +} +static inline u32 fifo_pb_timeout_detection_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 fifo_eng_timeout_r(void) +{ + return 0x00002a0c; +} +static inline u32 fifo_eng_timeout_period_m(void) +{ + return 0x7fffffff << 0; +} +static inline u32 fifo_eng_timeout_period_max_f(void) +{ + return 0x7fffffff; +} +static inline u32 fifo_eng_timeout_detection_m(void) +{ + return 0x1 << 31; +} +static inline u32 fifo_eng_timeout_detection_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 fifo_eng_timeout_detection_disabled_f(void) +{ + return 0x0; +} +static inline u32 fifo_error_sched_disable_r(void) +{ + return 0x0000262c; +} +static inline u32 fifo_sched_disable_r(void) +{ + return 0x00002630; +} +static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i) +{ + return (v & 0x1) << (0 + i*1); +} +static inline u32 fifo_sched_disable_runlist_m(u32 i) +{ + return 0x1 << (0 + i*1); +} +static inline u32 fifo_sched_disable_true_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_preempt_r(void) +{ + return 0x00002634; +} +static inline u32 fifo_preempt_pending_true_f(void) +{ + return 0x100000; +} +static inline u32 fifo_preempt_type_channel_f(void) +{ + return 0x0; +} +static inline u32 fifo_preempt_chid_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 fifo_trigger_mmu_fault_r(u32 i) +{ + return 0x00002a30 + i*4; +} +static inline u32 fifo_trigger_mmu_fault_id_f(u32 v) +{ + return (v & 0x1f) << 0; +} +static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v) +{ + return (v & 0x1) << 8; +} +static inline u32 fifo_engine_status_r(u32 i) +{ + return 0x00002640 + i*8; +} +static inline u32 fifo_engine_status__size_1_v(void) +{ + return 0x00000002; +} +static inline u32 fifo_engine_status_id_v(u32 r) +{ + return (r >> 0) & 0xfff; +} +static inline u32 fifo_engine_status_id_type_v(u32 r) +{ + return (r >> 12) & 0x1; +} +static inline u32 fifo_engine_status_id_type_chid_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_engine_status_ctx_status_v(u32 r) +{ + return (r >> 13) & 0x7; +} +static inline u32 fifo_engine_status_ctx_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void) +{ + return 0x00000005; +} +static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void) +{ + return 0x00000006; +} +static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void) +{ + return 0x00000007; +} +static inline u32 fifo_engine_status_next_id_v(u32 r) +{ + return (r >> 16) & 0xfff; +} +static inline u32 fifo_engine_status_next_id_type_v(u32 r) +{ + return (r >> 28) & 0x1; +} +static inline u32 fifo_engine_status_next_id_type_chid_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_engine_status_faulted_v(u32 r) +{ + return (r >> 30) & 0x1; +} +static inline u32 fifo_engine_status_faulted_true_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_engine_status_engine_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 fifo_engine_status_engine_idle_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_engine_status_engine_busy_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_engine_status_ctxsw_v(u32 r) +{ + return (r >> 15) & 0x1; +} +static inline u32 fifo_engine_status_ctxsw_in_progress_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_engine_status_ctxsw_in_progress_f(void) +{ + return 0x8000; +} +static inline u32 fifo_pbdma_status_r(u32 i) +{ + return 0x00003080 + i*4; +} +static inline u32 fifo_pbdma_status__size_1_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_pbdma_status_id_v(u32 r) +{ + return (r >> 0) & 0xfff; +} +static inline u32 fifo_pbdma_status_id_type_v(u32 r) +{ + return (r >> 12) & 0x1; +} +static inline u32 fifo_pbdma_status_id_type_chid_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_pbdma_status_chan_status_v(u32 r) +{ + return (r >> 13) & 0x7; +} +static inline u32 fifo_pbdma_status_chan_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void) +{ + return 0x00000005; +} +static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void) +{ + return 0x00000006; +} +static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void) +{ + return 0x00000007; +} +static inline u32 fifo_pbdma_status_next_id_v(u32 r) +{ + return (r >> 16) & 0xfff; +} +static inline u32 fifo_pbdma_status_next_id_type_v(u32 r) +{ + return (r >> 28) & 0x1; +} +static inline u32 fifo_pbdma_status_next_id_type_chid_v(void) +{ + return 0x00000000; +} +static inline u32 fifo_pbdma_status_chsw_v(u32 r) +{ + return (r >> 15) & 0x1; +} +static inline u32 fifo_pbdma_status_chsw_in_progress_v(void) +{ + return 0x00000001; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h new file mode 100644 index 00000000..0aeb11f9 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_flush_gk20a_h_ +#define _hw_flush_gk20a_h_ + +static inline u32 flush_l2_system_invalidate_r(void) +{ + return 0x00070004; +} +static inline u32 flush_l2_system_invalidate_pending_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 flush_l2_system_invalidate_pending_busy_v(void) +{ + return 0x00000001; +} +static inline u32 flush_l2_system_invalidate_pending_busy_f(void) +{ + return 0x1; +} +static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 flush_l2_system_invalidate_outstanding_true_v(void) +{ + return 0x00000001; +} +static inline u32 flush_l2_flush_dirty_r(void) +{ + return 0x00070010; +} +static inline u32 flush_l2_flush_dirty_pending_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 flush_l2_flush_dirty_pending_empty_v(void) +{ + return 0x00000000; +} +static inline u32 flush_l2_flush_dirty_pending_empty_f(void) +{ + return 0x0; +} +static inline u32 flush_l2_flush_dirty_pending_busy_v(void) +{ + return 0x00000001; +} +static inline u32 flush_l2_flush_dirty_pending_busy_f(void) +{ + return 0x1; +} +static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 flush_l2_flush_dirty_outstanding_false_v(void) +{ + return 0x00000000; +} +static inline u32 flush_l2_flush_dirty_outstanding_false_f(void) +{ + return 0x0; +} +static inline u32 flush_l2_flush_dirty_outstanding_true_v(void) +{ + return 0x00000001; +} +static inline u32 flush_fb_flush_r(void) +{ + return 0x00070000; +} +static inline u32 flush_fb_flush_pending_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 flush_fb_flush_pending_busy_v(void) +{ + return 0x00000001; +} +static inline u32 flush_fb_flush_pending_busy_f(void) +{ + return 0x1; +} +static inline u32 flush_fb_flush_outstanding_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 flush_fb_flush_outstanding_true_v(void) +{ + return 0x00000001; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h new file mode 100644 index 00000000..e0118946 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h @@ -0,0 +1,1141 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_gmmu_gk20a_h_ +#define _hw_gmmu_gk20a_h_ + +static inline u32 gmmu_pde_aperture_big_w(void) +{ + return 0; +} +static inline u32 gmmu_pde_aperture_big_invalid_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pde_aperture_big_video_memory_f(void) +{ + return 0x1; +} +static inline u32 gmmu_pde_size_w(void) +{ + return 0; +} +static inline u32 gmmu_pde_size_full_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pde_address_big_sys_f(u32 v) +{ + return (v & 0xfffffff) << 4; +} +static inline u32 gmmu_pde_address_big_sys_w(void) +{ + return 0; +} +static inline u32 gmmu_pde_aperture_small_w(void) +{ + return 1; +} +static inline u32 gmmu_pde_aperture_small_invalid_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pde_aperture_small_video_memory_f(void) +{ + return 0x1; +} +static inline u32 gmmu_pde_vol_small_w(void) +{ + return 1; +} +static inline u32 gmmu_pde_vol_small_true_f(void) +{ + return 0x4; +} +static inline u32 gmmu_pde_vol_small_false_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pde_vol_big_w(void) +{ + return 1; +} +static inline u32 gmmu_pde_vol_big_true_f(void) +{ + return 0x8; +} +static inline u32 gmmu_pde_vol_big_false_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pde_address_small_sys_f(u32 v) +{ + return (v & 0xfffffff) << 4; +} +static inline u32 gmmu_pde_address_small_sys_w(void) +{ + return 1; +} +static inline u32 gmmu_pde_address_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 gmmu_pde__size_v(void) +{ + return 0x00000008; +} +static inline u32 gmmu_pte__size_v(void) +{ + return 0x00000008; +} +static inline u32 gmmu_pte_valid_w(void) +{ + return 0; +} +static inline u32 gmmu_pte_valid_true_f(void) +{ + return 0x1; +} +static inline u32 gmmu_pte_address_sys_f(u32 v) +{ + return (v & 0xfffffff) << 4; +} +static inline u32 gmmu_pte_address_sys_w(void) +{ + return 0; +} +static inline u32 gmmu_pte_vol_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_vol_true_f(void) +{ + return 0x1; +} +static inline u32 gmmu_pte_vol_false_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pte_aperture_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_aperture_video_memory_f(void) +{ + return 0x0; +} +static inline u32 gmmu_pte_read_only_w(void) +{ + return 0; +} +static inline u32 gmmu_pte_read_only_true_f(void) +{ + return 0x4; +} +static inline u32 gmmu_pte_write_disable_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_write_disable_true_f(void) +{ + return 0x80000000; +} +static inline u32 gmmu_pte_read_disable_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_read_disable_true_f(void) +{ + return 0x40000000; +} +static inline u32 gmmu_pte_comptagline_f(u32 v) +{ + return (v & 0x1ffff) << 12; +} +static inline u32 gmmu_pte_comptagline_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_address_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 gmmu_pte_kind_f(u32 v) +{ + return (v & 0xff) << 4; +} +static inline u32 gmmu_pte_kind_w(void) +{ + return 1; +} +static inline u32 gmmu_pte_kind_invalid_v(void) +{ + return 0x000000ff; +} +static inline u32 gmmu_pte_kind_pitch_v(void) +{ + return 0x00000000; +} +static inline u32 gmmu_pte_kind_z16_v(void) +{ + return 0x00000001; +} +static inline u32 gmmu_pte_kind_z16_2c_v(void) +{ + return 0x00000002; +} +static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void) +{ + return 0x00000003; +} +static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void) +{ + return 0x00000004; +} +static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void) +{ + return 0x00000005; +} +static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void) +{ + return 0x00000006; +} +static inline u32 gmmu_pte_kind_z16_2z_v(void) +{ + return 0x00000007; +} +static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void) +{ + return 0x00000008; +} +static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void) +{ + return 0x00000009; +} +static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void) +{ + return 0x0000000a; +} +static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void) +{ + return 0x0000000b; +} +static inline u32 gmmu_pte_kind_z16_4cz_v(void) +{ + return 0x0000000c; +} +static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void) +{ + return 0x0000000d; +} +static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void) +{ + return 0x0000000e; +} +static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void) +{ + return 0x0000000f; +} +static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void) +{ + return 0x00000010; +} +static inline u32 gmmu_pte_kind_s8z24_v(void) +{ + return 0x00000011; +} +static inline u32 gmmu_pte_kind_s8z24_1z_v(void) +{ + return 0x00000012; +} +static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void) +{ + return 0x00000013; +} +static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void) +{ + return 0x00000014; +} +static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void) +{ + return 0x00000015; +} +static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void) +{ + return 0x00000016; +} +static inline u32 gmmu_pte_kind_s8z24_2cz_v(void) +{ + return 0x00000017; +} +static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void) +{ + return 0x00000018; +} +static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void) +{ + return 0x00000019; +} +static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void) +{ + return 0x0000001a; +} +static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void) +{ + return 0x0000001b; +} +static inline u32 gmmu_pte_kind_s8z24_2cs_v(void) +{ + return 0x0000001c; +} +static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void) +{ + return 0x0000001d; +} +static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void) +{ + return 0x0000001e; +} +static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void) +{ + return 0x0000001f; +} +static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void) +{ + return 0x00000020; +} +static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void) +{ + return 0x00000021; +} +static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void) +{ + return 0x00000022; +} +static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void) +{ + return 0x00000023; +} +static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void) +{ + return 0x00000024; +} +static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void) +{ + return 0x00000025; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void) +{ + return 0x00000026; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void) +{ + return 0x00000027; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void) +{ + return 0x00000028; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void) +{ + return 0x00000029; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void) +{ + return 0x0000002e; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void) +{ + return 0x0000002f; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void) +{ + return 0x00000030; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void) +{ + return 0x00000031; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void) +{ + return 0x00000032; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void) +{ + return 0x00000033; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void) +{ + return 0x00000034; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void) +{ + return 0x00000035; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void) +{ + return 0x0000003a; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void) +{ + return 0x0000003b; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void) +{ + return 0x0000003c; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void) +{ + return 0x0000003d; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void) +{ + return 0x0000003e; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void) +{ + return 0x0000003f; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void) +{ + return 0x00000040; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void) +{ + return 0x00000041; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void) +{ + return 0x00000042; +} +static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void) +{ + return 0x00000043; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void) +{ + return 0x00000044; +} +static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void) +{ + return 0x00000045; +} +static inline u32 gmmu_pte_kind_z24s8_v(void) +{ + return 0x00000046; +} +static inline u32 gmmu_pte_kind_z24s8_1z_v(void) +{ + return 0x00000047; +} +static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void) +{ + return 0x00000048; +} +static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void) +{ + return 0x00000049; +} +static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void) +{ + return 0x0000004a; +} +static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void) +{ + return 0x0000004b; +} +static inline u32 gmmu_pte_kind_z24s8_2cs_v(void) +{ + return 0x0000004c; +} +static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void) +{ + return 0x0000004d; +} +static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void) +{ + return 0x0000004e; +} +static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void) +{ + return 0x0000004f; +} +static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void) +{ + return 0x00000050; +} +static inline u32 gmmu_pte_kind_z24s8_2cz_v(void) +{ + return 0x00000051; +} +static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void) +{ + return 0x00000052; +} +static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void) +{ + return 0x00000053; +} +static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void) +{ + return 0x00000054; +} +static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void) +{ + return 0x00000055; +} +static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void) +{ + return 0x00000056; +} +static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void) +{ + return 0x00000057; +} +static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void) +{ + return 0x00000058; +} +static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void) +{ + return 0x00000059; +} +static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void) +{ + return 0x0000005a; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void) +{ + return 0x0000005b; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void) +{ + return 0x0000005c; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void) +{ + return 0x0000005d; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void) +{ + return 0x0000005e; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void) +{ + return 0x00000063; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void) +{ + return 0x00000064; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void) +{ + return 0x00000065; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void) +{ + return 0x00000066; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void) +{ + return 0x00000067; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void) +{ + return 0x00000068; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void) +{ + return 0x00000069; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void) +{ + return 0x0000006a; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void) +{ + return 0x0000006f; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void) +{ + return 0x00000070; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void) +{ + return 0x00000071; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void) +{ + return 0x00000072; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void) +{ + return 0x00000073; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void) +{ + return 0x00000074; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void) +{ + return 0x00000075; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void) +{ + return 0x00000076; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void) +{ + return 0x00000077; +} +static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void) +{ + return 0x00000078; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void) +{ + return 0x00000079; +} +static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void) +{ + return 0x0000007a; +} +static inline u32 gmmu_pte_kind_zf32_v(void) +{ + return 0x0000007b; +} +static inline u32 gmmu_pte_kind_zf32_1z_v(void) +{ + return 0x0000007c; +} +static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void) +{ + return 0x0000007d; +} +static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void) +{ + return 0x0000007e; +} +static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void) +{ + return 0x0000007f; +} +static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void) +{ + return 0x00000080; +} +static inline u32 gmmu_pte_kind_zf32_2cs_v(void) +{ + return 0x00000081; +} +static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void) +{ + return 0x00000082; +} +static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void) +{ + return 0x00000083; +} +static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void) +{ + return 0x00000084; +} +static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void) +{ + return 0x00000085; +} +static inline u32 gmmu_pte_kind_zf32_2cz_v(void) +{ + return 0x00000086; +} +static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void) +{ + return 0x00000087; +} +static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void) +{ + return 0x00000088; +} +static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void) +{ + return 0x00000089; +} +static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void) +{ + return 0x0000008a; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void) +{ + return 0x0000008b; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void) +{ + return 0x0000008c; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void) +{ + return 0x0000008d; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void) +{ + return 0x0000008e; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void) +{ + return 0x0000008f; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void) +{ + return 0x00000090; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void) +{ + return 0x00000091; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void) +{ + return 0x00000092; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void) +{ + return 0x00000097; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void) +{ + return 0x00000098; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void) +{ + return 0x00000099; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void) +{ + return 0x0000009a; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void) +{ + return 0x0000009b; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void) +{ + return 0x0000009c; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void) +{ + return 0x0000009d; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void) +{ + return 0x0000009e; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void) +{ + return 0x0000009f; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void) +{ + return 0x000000a0; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void) +{ + return 0x000000a1; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void) +{ + return 0x000000a2; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void) +{ + return 0x000000a3; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void) +{ + return 0x000000a4; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void) +{ + return 0x000000a5; +} +static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void) +{ + return 0x000000a6; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void) +{ + return 0x000000a7; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void) +{ + return 0x000000a8; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void) +{ + return 0x000000a9; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void) +{ + return 0x000000aa; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void) +{ + return 0x000000ab; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void) +{ + return 0x000000ac; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void) +{ + return 0x000000ad; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void) +{ + return 0x000000ae; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void) +{ + return 0x000000b3; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void) +{ + return 0x000000b4; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void) +{ + return 0x000000b5; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void) +{ + return 0x000000b6; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void) +{ + return 0x000000b7; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void) +{ + return 0x000000b8; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void) +{ + return 0x000000b9; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void) +{ + return 0x000000ba; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void) +{ + return 0x000000bb; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void) +{ + return 0x000000bc; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void) +{ + return 0x000000bd; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void) +{ + return 0x000000be; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void) +{ + return 0x000000bf; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void) +{ + return 0x000000c0; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void) +{ + return 0x000000c1; +} +static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void) +{ + return 0x000000c2; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_v(void) +{ + return 0x000000c3; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void) +{ + return 0x000000c4; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void) +{ + return 0x000000c5; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void) +{ + return 0x000000c6; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void) +{ + return 0x000000c7; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void) +{ + return 0x000000c8; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void) +{ + return 0x000000ce; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void) +{ + return 0x000000cf; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void) +{ + return 0x000000d0; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void) +{ + return 0x000000d1; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void) +{ + return 0x000000d2; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void) +{ + return 0x000000d3; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void) +{ + return 0x000000d4; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void) +{ + return 0x000000d5; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void) +{ + return 0x000000d6; +} +static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void) +{ + return 0x000000d7; +} +static inline u32 gmmu_pte_kind_generic_16bx2_v(void) +{ + return 0x000000fe; +} +static inline u32 gmmu_pte_kind_c32_2c_v(void) +{ + return 0x000000d8; +} +static inline u32 gmmu_pte_kind_c32_2cbr_v(void) +{ + return 0x000000d9; +} +static inline u32 gmmu_pte_kind_c32_2cba_v(void) +{ + return 0x000000da; +} +static inline u32 gmmu_pte_kind_c32_2cra_v(void) +{ + return 0x000000db; +} +static inline u32 gmmu_pte_kind_c32_2bra_v(void) +{ + return 0x000000dc; +} +static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void) +{ + return 0x000000dd; +} +static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void) +{ + return 0x000000de; +} +static inline u32 gmmu_pte_kind_c32_ms2_2cra_v(void) +{ + return 0x000000cc; +} +static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void) +{ + return 0x000000df; +} +static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void) +{ + return 0x000000e0; +} +static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void) +{ + return 0x000000e1; +} +static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void) +{ + return 0x000000e2; +} +static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void) +{ + return 0x000000e3; +} +static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void) +{ + return 0x000000e4; +} +static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void) +{ + return 0x000000e5; +} +static inline u32 gmmu_pte_kind_c64_2c_v(void) +{ + return 0x000000e6; +} +static inline u32 gmmu_pte_kind_c64_2cbr_v(void) +{ + return 0x000000e7; +} +static inline u32 gmmu_pte_kind_c64_2cba_v(void) +{ + return 0x000000e8; +} +static inline u32 gmmu_pte_kind_c64_2cra_v(void) +{ + return 0x000000e9; +} +static inline u32 gmmu_pte_kind_c64_2bra_v(void) +{ + return 0x000000ea; +} +static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void) +{ + return 0x000000eb; +} +static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void) +{ + return 0x000000ec; +} +static inline u32 gmmu_pte_kind_c64_ms2_2cra_v(void) +{ + return 0x000000cd; +} +static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void) +{ + return 0x000000ed; +} +static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void) +{ + return 0x000000ee; +} +static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void) +{ + return 0x000000ef; +} +static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void) +{ + return 0x000000f0; +} +static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void) +{ + return 0x000000f1; +} +static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void) +{ + return 0x000000f2; +} +static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void) +{ + return 0x000000f3; +} +static inline u32 gmmu_pte_kind_c128_2c_v(void) +{ + return 0x000000f4; +} +static inline u32 gmmu_pte_kind_c128_2cr_v(void) +{ + return 0x000000f5; +} +static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void) +{ + return 0x000000f6; +} +static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void) +{ + return 0x000000f7; +} +static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void) +{ + return 0x000000f8; +} +static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void) +{ + return 0x000000f9; +} +static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void) +{ + return 0x000000fa; +} +static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void) +{ + return 0x000000fb; +} +static inline u32 gmmu_pte_kind_x8c24_v(void) +{ + return 0x000000fc; +} +static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void) +{ + return 0x000000fd; +} +static inline u32 gmmu_pte_kind_smsked_message_v(void) +{ + return 0x000000ca; +} +static inline u32 gmmu_pte_kind_smhost_message_v(void) +{ + return 0x000000cb; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h new file mode 100644 index 00000000..ece7602d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h @@ -0,0 +1,3173 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_gr_gk20a_h_ +#define _hw_gr_gk20a_h_ + +static inline u32 gr_intr_r(void) +{ + return 0x00400100; +} +static inline u32 gr_intr_notify_pending_f(void) +{ + return 0x1; +} +static inline u32 gr_intr_notify_reset_f(void) +{ + return 0x1; +} +static inline u32 gr_intr_semaphore_pending_f(void) +{ + return 0x2; +} +static inline u32 gr_intr_semaphore_reset_f(void) +{ + return 0x2; +} +static inline u32 gr_intr_semaphore_timeout_not_pending_f(void) +{ + return 0x0; +} +static inline u32 gr_intr_semaphore_timeout_pending_f(void) +{ + return 0x4; +} +static inline u32 gr_intr_semaphore_timeout_reset_f(void) +{ + return 0x4; +} +static inline u32 gr_intr_illegal_method_pending_f(void) +{ + return 0x10; +} +static inline u32 gr_intr_illegal_method_reset_f(void) +{ + return 0x10; +} +static inline u32 gr_intr_illegal_notify_pending_f(void) +{ + return 0x40; +} +static inline u32 gr_intr_illegal_notify_reset_f(void) +{ + return 0x40; +} +static inline u32 gr_intr_illegal_class_pending_f(void) +{ + return 0x20; +} +static inline u32 gr_intr_illegal_class_reset_f(void) +{ + return 0x20; +} +static inline u32 gr_intr_class_error_pending_f(void) +{ + return 0x100000; +} +static inline u32 gr_intr_class_error_reset_f(void) +{ + return 0x100000; +} +static inline u32 gr_intr_exception_pending_f(void) +{ + return 0x200000; +} +static inline u32 gr_intr_exception_reset_f(void) +{ + return 0x200000; +} +static inline u32 gr_intr_firmware_method_pending_f(void) +{ + return 0x100; +} +static inline u32 gr_intr_firmware_method_reset_f(void) +{ + return 0x100; +} +static inline u32 gr_intr_nonstall_r(void) +{ + return 0x00400120; +} +static inline u32 gr_intr_nonstall_trap_pending_f(void) +{ + return 0x2; +} +static inline u32 gr_intr_en_r(void) +{ + return 0x0040013c; +} +static inline u32 gr_exception_r(void) +{ + return 0x00400108; +} +static inline u32 gr_exception_fe_m(void) +{ + return 0x1 << 0; +} +static inline u32 gr_exception_gpc_m(void) +{ + return 0x1 << 24; +} +static inline u32 gr_exception1_r(void) +{ + return 0x00400118; +} +static inline u32 gr_exception1_gpc_0_pending_f(void) +{ + return 0x1; +} +static inline u32 gr_exception2_r(void) +{ + return 0x0040011c; +} +static inline u32 gr_exception_en_r(void) +{ + return 0x00400138; +} +static inline u32 gr_exception_en_fe_m(void) +{ + return 0x1 << 0; +} +static inline u32 gr_exception1_en_r(void) +{ + return 0x00400130; +} +static inline u32 gr_exception2_en_r(void) +{ + return 0x00400134; +} +static inline u32 gr_gpfifo_ctl_r(void) +{ + return 0x00400500; +} +static inline u32 gr_gpfifo_ctl_access_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 gr_gpfifo_ctl_access_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_gpfifo_ctl_access_enabled_f(void) +{ + return 0x1; +} +static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v) +{ + return (v & 0x1) << 16; +} +static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void) +{ + return 0x10000; +} +static inline u32 gr_trapped_addr_r(void) +{ + return 0x00400704; +} +static inline u32 gr_trapped_addr_mthd_v(u32 r) +{ + return (r >> 2) & 0xfff; +} +static inline u32 gr_trapped_addr_subch_v(u32 r) +{ + return (r >> 16) & 0x7; +} +static inline u32 gr_trapped_data_lo_r(void) +{ + return 0x00400708; +} +static inline u32 gr_trapped_data_hi_r(void) +{ + return 0x0040070c; +} +static inline u32 gr_status_r(void) +{ + return 0x00400700; +} +static inline u32 gr_status_fe_method_lower_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 gr_status_fe_method_lower_idle_v(void) +{ + return 0x00000000; +} +static inline u32 gr_status_mask_r(void) +{ + return 0x00400610; +} +static inline u32 gr_engine_status_r(void) +{ + return 0x0040060c; +} +static inline u32 gr_engine_status_value_busy_f(void) +{ + return 0x1; +} +static inline u32 gr_pipe_bundle_address_r(void) +{ + return 0x00400200; +} +static inline u32 gr_pipe_bundle_address_value_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_pipe_bundle_data_r(void) +{ + return 0x00400204; +} +static inline u32 gr_pipe_bundle_config_r(void) +{ + return 0x00400208; +} +static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 gr_fe_hww_esr_r(void) +{ + return 0x00404000; +} +static inline u32 gr_fe_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_fe_hww_esr_en_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_fe_go_idle_timeout_r(void) +{ + return 0x00404154; +} +static inline u32 gr_fe_go_idle_timeout_count_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fe_object_table_r(u32 i) +{ + return 0x00404200 + i*4; +} +static inline u32 gr_fe_object_table_nvclass_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_pri_mme_shadow_raw_index_r(void) +{ + return 0x00404488; +} +static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void) +{ + return 0x80000000; +} +static inline u32 gr_pri_mme_shadow_raw_data_r(void) +{ + return 0x0040448c; +} +static inline u32 gr_mme_hww_esr_r(void) +{ + return 0x00404490; +} +static inline u32 gr_mme_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_mme_hww_esr_en_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_memfmt_hww_esr_r(void) +{ + return 0x00404600; +} +static inline u32 gr_memfmt_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_memfmt_hww_esr_en_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_fecs_cpuctl_r(void) +{ + return 0x00409100; +} +static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 gr_fecs_dmactl_r(void) +{ + return 0x0040910c; +} +static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void) +{ + return 0x1 << 1; +} +static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void) +{ + return 0x1 << 2; +} +static inline u32 gr_fecs_os_r(void) +{ + return 0x00409080; +} +static inline u32 gr_fecs_idlestate_r(void) +{ + return 0x0040904c; +} +static inline u32 gr_fecs_mailbox0_r(void) +{ + return 0x00409040; +} +static inline u32 gr_fecs_mailbox1_r(void) +{ + return 0x00409044; +} +static inline u32 gr_fecs_irqstat_r(void) +{ + return 0x00409008; +} +static inline u32 gr_fecs_irqmode_r(void) +{ + return 0x0040900c; +} +static inline u32 gr_fecs_irqmask_r(void) +{ + return 0x00409018; +} +static inline u32 gr_fecs_irqdest_r(void) +{ + return 0x0040901c; +} +static inline u32 gr_fecs_curctx_r(void) +{ + return 0x00409050; +} +static inline u32 gr_fecs_nxtctx_r(void) +{ + return 0x00409054; +} +static inline u32 gr_fecs_engctl_r(void) +{ + return 0x004090a4; +} +static inline u32 gr_fecs_debug1_r(void) +{ + return 0x00409090; +} +static inline u32 gr_fecs_debuginfo_r(void) +{ + return 0x00409094; +} +static inline u32 gr_fecs_icd_cmd_r(void) +{ + return 0x00409200; +} +static inline u32 gr_fecs_icd_cmd_opc_s(void) +{ + return 4; +} +static inline u32 gr_fecs_icd_cmd_opc_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_fecs_icd_cmd_opc_m(void) +{ + return 0xf << 0; +} +static inline u32 gr_fecs_icd_cmd_opc_v(u32 r) +{ + return (r >> 0) & 0xf; +} +static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void) +{ + return 0x8; +} +static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void) +{ + return 0xe; +} +static inline u32 gr_fecs_icd_cmd_idx_f(u32 v) +{ + return (v & 0x1f) << 8; +} +static inline u32 gr_fecs_icd_rdata_r(void) +{ + return 0x0040920c; +} +static inline u32 gr_fecs_imemc_r(u32 i) +{ + return 0x00409180 + i*16; +} +static inline u32 gr_fecs_imemc_offs_f(u32 v) +{ + return (v & 0x3f) << 2; +} +static inline u32 gr_fecs_imemc_blk_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_fecs_imemc_aincw_f(u32 v) +{ + return (v & 0x1) << 24; +} +static inline u32 gr_fecs_imemd_r(u32 i) +{ + return 0x00409184 + i*16; +} +static inline u32 gr_fecs_imemt_r(u32 i) +{ + return 0x00409188 + i*16; +} +static inline u32 gr_fecs_imemt_tag_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_fecs_dmemc_r(u32 i) +{ + return 0x004091c0 + i*8; +} +static inline u32 gr_fecs_dmemc_offs_s(void) +{ + return 6; +} +static inline u32 gr_fecs_dmemc_offs_f(u32 v) +{ + return (v & 0x3f) << 2; +} +static inline u32 gr_fecs_dmemc_offs_m(void) +{ + return 0x3f << 2; +} +static inline u32 gr_fecs_dmemc_offs_v(u32 r) +{ + return (r >> 2) & 0x3f; +} +static inline u32 gr_fecs_dmemc_blk_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_fecs_dmemc_aincw_f(u32 v) +{ + return (v & 0x1) << 24; +} +static inline u32 gr_fecs_dmemd_r(u32 i) +{ + return 0x004091c4 + i*8; +} +static inline u32 gr_fecs_dmatrfbase_r(void) +{ + return 0x00409110; +} +static inline u32 gr_fecs_dmatrfmoffs_r(void) +{ + return 0x00409114; +} +static inline u32 gr_fecs_dmatrffboffs_r(void) +{ + return 0x0040911c; +} +static inline u32 gr_fecs_dmatrfcmd_r(void) +{ + return 0x00409118; +} +static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 gr_fecs_bootvec_r(void) +{ + return 0x00409104; +} +static inline u32 gr_fecs_bootvec_vec_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_fecs_falcon_hwcfg_r(void) +{ + return 0x00409108; +} +static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void) +{ + return 0x0041a108; +} +static inline u32 gr_fecs_falcon_rm_r(void) +{ + return 0x00409084; +} +static inline u32 gr_fecs_current_ctx_r(void) +{ + return 0x00409b00; +} +static inline u32 gr_fecs_current_ctx_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 gr_fecs_current_ctx_ptr_v(u32 r) +{ + return (r >> 0) & 0xfffffff; +} +static inline u32 gr_fecs_current_ctx_target_s(void) +{ + return 2; +} +static inline u32 gr_fecs_current_ctx_target_f(u32 v) +{ + return (v & 0x3) << 28; +} +static inline u32 gr_fecs_current_ctx_target_m(void) +{ + return 0x3 << 28; +} +static inline u32 gr_fecs_current_ctx_target_v(u32 r) +{ + return (r >> 28) & 0x3; +} +static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_current_ctx_valid_s(void) +{ + return 1; +} +static inline u32 gr_fecs_current_ctx_valid_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 gr_fecs_current_ctx_valid_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_fecs_current_ctx_valid_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 gr_fecs_current_ctx_valid_false_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_method_data_r(void) +{ + return 0x00409500; +} +static inline u32 gr_fecs_method_push_r(void) +{ + return 0x00409504; +} +static inline u32 gr_fecs_method_push_adr_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void) +{ + return 0x00000003; +} +static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void) +{ + return 0x3; +} +static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void) +{ + return 0x00000010; +} +static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void) +{ + return 0x00000009; +} +static inline u32 gr_fecs_method_push_adr_restore_golden_v(void) +{ + return 0x00000015; +} +static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void) +{ + return 0x00000016; +} +static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void) +{ + return 0x00000025; +} +static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void) +{ + return 0x00000030; +} +static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void) +{ + return 0x00000031; +} +static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void) +{ + return 0x00000032; +} +static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void) +{ + return 0x00000038; +} +static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void) +{ + return 0x00000039; +} +static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void) +{ + return 0x21; +} +static inline u32 gr_fecs_host_int_enable_r(void) +{ + return 0x00409c24; +} +static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void) +{ + return 0x10000; +} +static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void) +{ + return 0x20000; +} +static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void) +{ + return 0x40000; +} +static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void) +{ + return 0x80000; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_r(void) +{ + return 0x00409614; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void) +{ + return 0x10; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void) +{ + return 0x20; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void) +{ + return 0x40; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void) +{ + return 0x100; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void) +{ + return 0x200; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void) +{ + return 1; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v) +{ + return (v & 0x1) << 10; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void) +{ + return 0x1 << 10; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r) +{ + return (r >> 10) & 0x1; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void) +{ + return 0x0; +} +static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void) +{ + return 0x400; +} +static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void) +{ + return 0x0040960c; +} +static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i) +{ + return 0x00409800 + i*4; +} +static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void) +{ + return 0x00000008; +} +static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void) +{ + return 0x00000001; +} +static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void) +{ + return 0x00000002; +} +static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i) +{ + return 0x00409820 + i*4; +} +static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i) +{ + return 0x00409840 + i*4; +} +static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_fecs_fs_r(void) +{ + return 0x00409604; +} +static inline u32 gr_fecs_fs_num_available_gpcs_s(void) +{ + return 5; +} +static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v) +{ + return (v & 0x1f) << 0; +} +static inline u32 gr_fecs_fs_num_available_gpcs_m(void) +{ + return 0x1f << 0; +} +static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 gr_fecs_fs_num_available_fbps_s(void) +{ + return 5; +} +static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v) +{ + return (v & 0x1f) << 16; +} +static inline u32 gr_fecs_fs_num_available_fbps_m(void) +{ + return 0x1f << 16; +} +static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r) +{ + return (r >> 16) & 0x1f; +} +static inline u32 gr_fecs_cfg_r(void) +{ + return 0x00409620; +} +static inline u32 gr_fecs_cfg_imem_sz_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_fecs_rc_lanes_r(void) +{ + return 0x00409880; +} +static inline u32 gr_fecs_rc_lanes_num_chains_s(void) +{ + return 6; +} +static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v) +{ + return (v & 0x3f) << 0; +} +static inline u32 gr_fecs_rc_lanes_num_chains_m(void) +{ + return 0x3f << 0; +} +static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r) +{ + return (r >> 0) & 0x3f; +} +static inline u32 gr_fecs_ctxsw_status_1_r(void) +{ + return 0x00409400; +} +static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void) +{ + return 1; +} +static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v) +{ + return (v & 0x1) << 12; +} +static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void) +{ + return 0x1 << 12; +} +static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r) +{ + return (r >> 12) & 0x1; +} +static inline u32 gr_fecs_arb_ctx_adr_r(void) +{ + return 0x00409a24; +} +static inline u32 gr_fecs_new_ctx_r(void) +{ + return 0x00409b04; +} +static inline u32 gr_fecs_new_ctx_ptr_s(void) +{ + return 28; +} +static inline u32 gr_fecs_new_ctx_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 gr_fecs_new_ctx_ptr_m(void) +{ + return 0xfffffff << 0; +} +static inline u32 gr_fecs_new_ctx_ptr_v(u32 r) +{ + return (r >> 0) & 0xfffffff; +} +static inline u32 gr_fecs_new_ctx_target_s(void) +{ + return 2; +} +static inline u32 gr_fecs_new_ctx_target_f(u32 v) +{ + return (v & 0x3) << 28; +} +static inline u32 gr_fecs_new_ctx_target_m(void) +{ + return 0x3 << 28; +} +static inline u32 gr_fecs_new_ctx_target_v(u32 r) +{ + return (r >> 28) & 0x3; +} +static inline u32 gr_fecs_new_ctx_valid_s(void) +{ + return 1; +} +static inline u32 gr_fecs_new_ctx_valid_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 gr_fecs_new_ctx_valid_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_fecs_new_ctx_valid_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 gr_fecs_arb_ctx_ptr_r(void) +{ + return 0x00409a0c; +} +static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void) +{ + return 28; +} +static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void) +{ + return 0xfffffff << 0; +} +static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r) +{ + return (r >> 0) & 0xfffffff; +} +static inline u32 gr_fecs_arb_ctx_ptr_target_s(void) +{ + return 2; +} +static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v) +{ + return (v & 0x3) << 28; +} +static inline u32 gr_fecs_arb_ctx_ptr_target_m(void) +{ + return 0x3 << 28; +} +static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r) +{ + return (r >> 28) & 0x3; +} +static inline u32 gr_fecs_arb_ctx_cmd_r(void) +{ + return 0x00409a10; +} +static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void) +{ + return 5; +} +static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v) +{ + return (v & 0x1f) << 0; +} +static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void) +{ + return 0x1f << 0; +} +static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 gr_rstr2d_gpc_map0_r(void) +{ + return 0x0040780c; +} +static inline u32 gr_rstr2d_gpc_map1_r(void) +{ + return 0x00407810; +} +static inline u32 gr_rstr2d_gpc_map2_r(void) +{ + return 0x00407814; +} +static inline u32 gr_rstr2d_gpc_map3_r(void) +{ + return 0x00407818; +} +static inline u32 gr_rstr2d_gpc_map4_r(void) +{ + return 0x0040781c; +} +static inline u32 gr_rstr2d_gpc_map5_r(void) +{ + return 0x00407820; +} +static inline u32 gr_rstr2d_map_table_cfg_r(void) +{ + return 0x004078bc; +} +static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_pd_hww_esr_r(void) +{ + return 0x00406018; +} +static inline u32 gr_pd_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_pd_hww_esr_en_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i) +{ + return 0x00406028 + i*4; +} +static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v) +{ + return (v & 0xf) << 4; +} +static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v) +{ + return (v & 0xf) << 8; +} +static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v) +{ + return (v & 0xf) << 12; +} +static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v) +{ + return (v & 0xf) << 16; +} +static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v) +{ + return (v & 0xf) << 20; +} +static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v) +{ + return (v & 0xf) << 24; +} +static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v) +{ + return (v & 0xf) << 28; +} +static inline u32 gr_pd_ab_dist_cfg0_r(void) +{ + return 0x004064c0; +} +static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void) +{ + return 0x80000000; +} +static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void) +{ + return 0x0; +} +static inline u32 gr_pd_ab_dist_cfg1_r(void) +{ + return 0x004064c4; +} +static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void) +{ + return 0xffff; +} +static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v) +{ + return (v & 0x7ff) << 16; +} +static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void) +{ + return 0x00000080; +} +static inline u32 gr_pd_ab_dist_cfg2_r(void) +{ + return 0x004064c8; +} +static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void) +{ + return 0x00000100; +} +static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v) +{ + return (v & 0xfff) << 16; +} +static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void) +{ + return 0x00000020; +} +static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void) +{ + return 0x00000062; +} +static inline u32 gr_pd_pagepool_r(void) +{ + return 0x004064cc; +} +static inline u32 gr_pd_pagepool_total_pages_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_pd_pagepool_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 gr_pd_dist_skip_table_r(u32 i) +{ + return 0x004064d0 + i*4; +} +static inline u32 gr_pd_dist_skip_table__size_1_v(void) +{ + return 0x00000008; +} +static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v) +{ + return (v & 0xff) << 16; +} +static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 gr_pd_alpha_ratio_table_r(u32 i) +{ + return 0x00406800 + i*4; +} +static inline u32 gr_pd_alpha_ratio_table__size_1_v(void) +{ + return 0x00000100; +} +static inline u32 gr_pd_alpha_ratio_table_gpc_4n0_mask_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_pd_alpha_ratio_table_gpc_4n1_mask_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_pd_alpha_ratio_table_gpc_4n2_mask_f(u32 v) +{ + return (v & 0xff) << 16; +} +static inline u32 gr_pd_alpha_ratio_table_gpc_4n3_mask_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 gr_pd_beta_ratio_table_r(u32 i) +{ + return 0x00406c00 + i*4; +} +static inline u32 gr_pd_beta_ratio_table__size_1_v(void) +{ + return 0x00000100; +} +static inline u32 gr_pd_beta_ratio_table_gpc_4n0_mask_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_pd_beta_ratio_table_gpc_4n1_mask_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_pd_beta_ratio_table_gpc_4n2_mask_f(u32 v) +{ + return (v & 0xff) << 16; +} +static inline u32 gr_pd_beta_ratio_table_gpc_4n3_mask_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 gr_ds_debug_r(void) +{ + return 0x00405800; +} +static inline u32 gr_ds_debug_timeslice_mode_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_debug_timeslice_mode_enable_f(void) +{ + return 0x8000000; +} +static inline u32 gr_ds_zbc_color_r_r(void) +{ + return 0x00405804; +} +static inline u32 gr_ds_zbc_color_r_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_ds_zbc_color_g_r(void) +{ + return 0x00405808; +} +static inline u32 gr_ds_zbc_color_g_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_ds_zbc_color_b_r(void) +{ + return 0x0040580c; +} +static inline u32 gr_ds_zbc_color_b_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_ds_zbc_color_a_r(void) +{ + return 0x00405810; +} +static inline u32 gr_ds_zbc_color_a_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_ds_zbc_color_fmt_r(void) +{ + return 0x00405814; +} +static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v) +{ + return (v & 0x7f) << 0; +} +static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void) +{ + return 0x00000001; +} +static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void) +{ + return 0x00000002; +} +static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void) +{ + return 0x00000004; +} +static inline u32 gr_ds_zbc_z_r(void) +{ + return 0x00405818; +} +static inline u32 gr_ds_zbc_z_val_s(void) +{ + return 32; +} +static inline u32 gr_ds_zbc_z_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_ds_zbc_z_val_m(void) +{ + return 0xffffffff << 0; +} +static inline u32 gr_ds_zbc_z_val_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 gr_ds_zbc_z_val__init_v(void) +{ + return 0x00000000; +} +static inline u32 gr_ds_zbc_z_val__init_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_zbc_z_fmt_r(void) +{ + return 0x0040581c; +} +static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void) +{ + return 0x00000001; +} +static inline u32 gr_ds_zbc_tbl_index_r(void) +{ + return 0x00405820; +} +static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_ds_zbc_tbl_ld_r(void) +{ + return 0x00405824; +} +static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void) +{ + return 0x1; +} +static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void) +{ + return 0x0; +} +static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void) +{ + return 0x4; +} +static inline u32 gr_ds_tga_constraintlogic_r(void) +{ + return 0x00405830; +} +static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v) +{ + return (v & 0xfff) << 16; +} +static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 gr_ds_hww_esr_r(void) +{ + return 0x00405840; +} +static inline u32 gr_ds_hww_esr_reset_s(void) +{ + return 1; +} +static inline u32 gr_ds_hww_esr_reset_f(u32 v) +{ + return (v & 0x1) << 30; +} +static inline u32 gr_ds_hww_esr_reset_m(void) +{ + return 0x1 << 30; +} +static inline u32 gr_ds_hww_esr_reset_v(u32 r) +{ + return (r >> 30) & 0x1; +} +static inline u32 gr_ds_hww_esr_reset_task_v(void) +{ + return 0x00000001; +} +static inline u32 gr_ds_hww_esr_reset_task_f(void) +{ + return 0x40000000; +} +static inline u32 gr_ds_hww_esr_en_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 gr_ds_hww_report_mask_r(void) +{ + return 0x00405844; +} +static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void) +{ + return 0x1; +} +static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void) +{ + return 0x2; +} +static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void) +{ + return 0x4; +} +static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void) +{ + return 0x8; +} +static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void) +{ + return 0x10; +} +static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void) +{ + return 0x20; +} +static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void) +{ + return 0x40; +} +static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void) +{ + return 0x80; +} +static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void) +{ + return 0x100; +} +static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void) +{ + return 0x200; +} +static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void) +{ + return 0x400; +} +static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void) +{ + return 0x800; +} +static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void) +{ + return 0x1000; +} +static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void) +{ + return 0x2000; +} +static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void) +{ + return 0x4000; +} +static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void) +{ + return 0x8000; +} +static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void) +{ + return 0x10000; +} +static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void) +{ + return 0x20000; +} +static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void) +{ + return 0x40000; +} +static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void) +{ + return 0x80000; +} +static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void) +{ + return 0x100000; +} +static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void) +{ + return 0x200000; +} +static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void) +{ + return 0x400000; +} +static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void) +{ + return 0x800000; +} +static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i) +{ + return 0x00405870 + i*4; +} +static inline u32 gr_scc_bundle_cb_base_r(void) +{ + return 0x00408004; +} +static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void) +{ + return 0x00000008; +} +static inline u32 gr_scc_bundle_cb_size_r(void) +{ + return 0x00408008; +} +static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v) +{ + return (v & 0x7ff) << 0; +} +static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void) +{ + return 0x00000018; +} +static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void) +{ + return 0x00000100; +} +static inline u32 gr_scc_bundle_cb_size_valid_false_v(void) +{ + return 0x00000000; +} +static inline u32 gr_scc_bundle_cb_size_valid_false_f(void) +{ + return 0x0; +} +static inline u32 gr_scc_bundle_cb_size_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 gr_scc_pagepool_base_r(void) +{ + return 0x0040800c; +} +static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void) +{ + return 0x00000008; +} +static inline u32 gr_scc_pagepool_r(void) +{ + return 0x00408010; +} +static inline u32 gr_scc_pagepool_total_pages_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void) +{ + return 0x00000000; +} +static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void) +{ + return 0x00000080; +} +static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void) +{ + return 0x00000100; +} +static inline u32 gr_scc_pagepool_max_valid_pages_s(void) +{ + return 8; +} +static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_scc_pagepool_max_valid_pages_m(void) +{ + return 0xff << 8; +} +static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r) +{ + return (r >> 8) & 0xff; +} +static inline u32 gr_scc_pagepool_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 gr_scc_init_r(void) +{ + return 0x0040802c; +} +static inline u32 gr_scc_init_ram_trigger_f(void) +{ + return 0x1; +} +static inline u32 gr_scc_hww_esr_r(void) +{ + return 0x00408030; +} +static inline u32 gr_scc_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_scc_hww_esr_en_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_sked_hww_esr_r(void) +{ + return 0x00407020; +} +static inline u32 gr_sked_hww_esr_reset_active_f(void) +{ + return 0x40000000; +} +static inline u32 gr_cwd_fs_r(void) +{ + return 0x00405b00; +} +static inline u32 gr_cwd_fs_num_gpcs_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_cwd_fs_num_tpcs_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_gpc0_fs_gpc_r(void) +{ + return 0x00502608; +} +static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r) +{ + return (r >> 16) & 0x1f; +} +static inline u32 gr_gpc0_cfg_r(void) +{ + return 0x00502620; +} +static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_gpccs_rc_lanes_r(void) +{ + return 0x00502880; +} +static inline u32 gr_gpccs_rc_lanes_num_chains_s(void) +{ + return 6; +} +static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v) +{ + return (v & 0x3f) << 0; +} +static inline u32 gr_gpccs_rc_lanes_num_chains_m(void) +{ + return 0x3f << 0; +} +static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r) +{ + return (r >> 0) & 0x3f; +} +static inline u32 gr_gpccs_rc_lane_size_r(u32 i) +{ + return 0x00502910 + i*0; +} +static inline u32 gr_gpccs_rc_lane_size__size_1_v(void) +{ + return 0x00000010; +} +static inline u32 gr_gpccs_rc_lane_size_v_s(void) +{ + return 24; +} +static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +static inline u32 gr_gpccs_rc_lane_size_v_m(void) +{ + return 0xffffff << 0; +} +static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r) +{ + return (r >> 0) & 0xffffff; +} +static inline u32 gr_gpccs_rc_lane_size_v_0_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpccs_rc_lane_size_v_0_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_zcull_fs_r(void) +{ + return 0x00500910; +} +static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v) +{ + return (v & 0x1ff) << 0; +} +static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v) +{ + return (v & 0xf) << 16; +} +static inline u32 gr_gpc0_zcull_ram_addr_r(void) +{ + return 0x00500914; +} +static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v) +{ + return (v & 0xf) << 8; +} +static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void) +{ + return 0x00500918; +} +static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void) +{ + return 0x00800000; +} +static inline u32 gr_gpc0_zcull_total_ram_size_r(void) +{ + return 0x00500920; +} +static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_zcull_zcsize_r(u32 i) +{ + return 0x00500a04 + i*32; +} +static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void) +{ + return 0x00000040; +} +static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void) +{ + return 0x00000010; +} +static inline u32 gr_gpc0_gpm_pd_active_tpcs_r(void) +{ + return 0x00500c08; +} +static inline u32 gr_gpc0_gpm_pd_active_tpcs_num_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i) +{ + return 0x00500c10 + i*4; +} +static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i) +{ + return 0x00500c30 + i*4; +} +static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_gpc0_gpm_sd_active_tpcs_r(void) +{ + return 0x00500c8c; +} +static inline u32 gr_gpc0_gpm_sd_active_tpcs_num_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void) +{ + return 0x00504088; +} +static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_r(void) +{ + return 0x005044e8; +} +static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_value_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_tpc0_sm_cfg_r(void) +{ + return 0x00504698; +} +static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) +{ + return 0x00503018; +} +static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void) +{ + return 0x1 << 0; +} +static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void) +{ + return 0x1; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_r(void) +{ + return 0x005030c0; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_m(void) +{ + return 0xffff << 0; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_size_f(u32 v) +{ + return (v & 0xfff) << 16; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_size_m(void) +{ + return 0xfff << 16; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_size_v(u32 r) +{ + return (r >> 16) & 0xfff; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_size_default_v(void) +{ + return 0x00000240; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_size_granularity_v(void) +{ + return 0x00000020; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(u32 v) +{ + return (v & 0x1) << 28; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_r(void) +{ + return 0x005030e4; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_f(u32 v) +{ + return (v & 0xfff) << 16; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_m(void) +{ + return 0xfff << 16; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_v(u32 r) +{ + return (r >> 16) & 0xfff; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_default_v(void) +{ + return 0x00000648; +} +static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v(void) +{ + return 0x00000020; +} +static inline u32 gr_gpccs_falcon_addr_r(void) +{ + return 0x0041a0ac; +} +static inline u32 gr_gpccs_falcon_addr_lsb_s(void) +{ + return 6; +} +static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v) +{ + return (v & 0x3f) << 0; +} +static inline u32 gr_gpccs_falcon_addr_lsb_m(void) +{ + return 0x3f << 0; +} +static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r) +{ + return (r >> 0) & 0x3f; +} +static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void) +{ + return 0x0; +} +static inline u32 gr_gpccs_falcon_addr_msb_s(void) +{ + return 6; +} +static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v) +{ + return (v & 0x3f) << 6; +} +static inline u32 gr_gpccs_falcon_addr_msb_m(void) +{ + return 0x3f << 6; +} +static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r) +{ + return (r >> 6) & 0x3f; +} +static inline u32 gr_gpccs_falcon_addr_msb_init_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpccs_falcon_addr_msb_init_f(void) +{ + return 0x0; +} +static inline u32 gr_gpccs_falcon_addr_ext_s(void) +{ + return 12; +} +static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 gr_gpccs_falcon_addr_ext_m(void) +{ + return 0xfff << 0; +} +static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r) +{ + return (r >> 0) & 0xfff; +} +static inline u32 gr_gpccs_cpuctl_r(void) +{ + return 0x0041a100; +} +static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 gr_gpccs_dmactl_r(void) +{ + return 0x0041a10c; +} +static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void) +{ + return 0x1 << 1; +} +static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void) +{ + return 0x1 << 2; +} +static inline u32 gr_gpccs_imemc_r(u32 i) +{ + return 0x0041a180 + i*16; +} +static inline u32 gr_gpccs_imemc_offs_f(u32 v) +{ + return (v & 0x3f) << 2; +} +static inline u32 gr_gpccs_imemc_blk_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_gpccs_imemc_aincw_f(u32 v) +{ + return (v & 0x1) << 24; +} +static inline u32 gr_gpccs_imemd_r(u32 i) +{ + return 0x0041a184 + i*16; +} +static inline u32 gr_gpccs_imemt_r(u32 i) +{ + return 0x0041a188 + i*16; +} +static inline u32 gr_gpccs_imemt__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 gr_gpccs_imemt_tag_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 gr_gpccs_dmemc_r(u32 i) +{ + return 0x0041a1c0 + i*8; +} +static inline u32 gr_gpccs_dmemc_offs_f(u32 v) +{ + return (v & 0x3f) << 2; +} +static inline u32 gr_gpccs_dmemc_blk_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_gpccs_dmemc_aincw_f(u32 v) +{ + return (v & 0x1) << 24; +} +static inline u32 gr_gpccs_dmemd_r(u32 i) +{ + return 0x0041a1c4 + i*8; +} +static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i) +{ + return 0x0041a800 + i*4; +} +static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_r(void) +{ + return 0x00418808; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_s(void) +{ + return 32; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_m(void) +{ + return 0xffffffff << 0; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_r(void) +{ + return 0x0041880c; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_s(void) +{ + return 11; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_f(u32 v) +{ + return (v & 0x7ff) << 0; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_m(void) +{ + return 0x7ff << 0; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_v(u32 r) +{ + return (r >> 0) & 0x7ff; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_v(void) +{ + return 0x00000018; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_f(void) +{ + return 0x18; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_s(void) +{ + return 1; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 gr_gpcs_setup_attrib_cb_base_r(void) +{ + return 0x00418810; +} +static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void) +{ + return 0x0000000c; +} +static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void) +{ + return 0x80000000; +} +static inline u32 gr_crstr_gpc_map0_r(void) +{ + return 0x00418b08; +} +static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_gpc_map1_r(void) +{ + return 0x00418b0c; +} +static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_gpc_map2_r(void) +{ + return 0x00418b10; +} +static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_gpc_map3_r(void) +{ + return 0x00418b14; +} +static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_gpc_map4_r(void) +{ + return 0x00418b18; +} +static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_gpc_map5_r(void) +{ + return 0x00418b1c; +} +static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v) +{ + return (v & 0x7) << 10; +} +static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v) +{ + return (v & 0x7) << 15; +} +static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v) +{ + return (v & 0x7) << 25; +} +static inline u32 gr_crstr_map_table_cfg_r(void) +{ + return 0x00418bb8; +} +static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void) +{ + return 0x00418980; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v) +{ + return (v & 0x7) << 4; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v) +{ + return (v & 0x7) << 16; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v) +{ + return (v & 0x7) << 28; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void) +{ + return 0x00418984; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v) +{ + return (v & 0x7) << 4; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v) +{ + return (v & 0x7) << 16; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v) +{ + return (v & 0x7) << 28; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void) +{ + return 0x00418988; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v) +{ + return (v & 0x7) << 4; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v) +{ + return (v & 0x7) << 16; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void) +{ + return 3; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v) +{ + return (v & 0x7) << 28; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void) +{ + return 0x7 << 28; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r) +{ + return (r >> 28) & 0x7; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void) +{ + return 0x0041898c; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v) +{ + return (v & 0x7) << 4; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v) +{ + return (v & 0x7) << 16; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v) +{ + return (v & 0x7) << 28; +} +static inline u32 gr_gpcs_gpm_pd_cfg_r(void) +{ + return 0x00418c6c; +} +static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void) +{ + return 0x1; +} +static inline u32 gr_gpcs_gcc_pagepool_base_r(void) +{ + return 0x00419004; +} +static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 gr_gpcs_gcc_pagepool_r(void) +{ + return 0x00419008; +} +static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_gpcs_tpcs_pe_vaf_r(void) +{ + return 0x0041980c; +} +static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void) +{ + return 0x10; +} +static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void) +{ + return 0x00419848; +} +static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v) +{ + return (v & 0x1) << 28; +} +static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void) +{ + return 0x10000000; +} +static inline u32 gr_gpcs_tpcs_l1c_pm_r(void) +{ + return 0x00419ca8; +} +static inline u32 gr_gpcs_tpcs_l1c_pm_enable_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_gpcs_tpcs_l1c_pm_enable_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_gpcs_tpcs_l1c_cfg_r(void) +{ + return 0x00419cb8; +} +static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(void) +{ + return 0x1 << 31; +} +static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void) +{ + return 0x00419c00; +} +static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void) +{ + return 0x8; +} +static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void) +{ + return 0x00419e00; +} +static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void) +{ + return 0x1 << 7; +} +static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void) +{ + return 0x80; +} +static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void) +{ + return 0x1 << 15; +} +static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void) +{ + return 0x8000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void) +{ + return 0x00419e44; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void) +{ + return 0x2; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void) +{ + return 0x4; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void) +{ + return 0x8; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void) +{ + return 0x10; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void) +{ + return 0x20; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void) +{ + return 0x40; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void) +{ + return 0x80; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void) +{ + return 0x100; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void) +{ + return 0x200; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void) +{ + return 0x400; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void) +{ + return 0x800; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void) +{ + return 0x1000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void) +{ + return 0x2000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void) +{ + return 0x4000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void) +{ + return 0x8000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void) +{ + return 0x10000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void) +{ + return 0x20000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void) +{ + return 0x40000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void) +{ + return 0x80000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void) +{ + return 0x100000; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void) +{ + return 0x00419e4c; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void) +{ + return 0x1; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void) +{ + return 0x2; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void) +{ + return 0x4; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void) +{ + return 0x8; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void) +{ + return 0x10; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void) +{ + return 0x20; +} +static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void) +{ + return 0x40; +} +static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void) +{ + return 0x0050450c; +} +static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void) +{ + return 0x2; +} +static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void) +{ + return 0x00502c94; +} +static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void) +{ + return 0x10000; +} +static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void) +{ + return 0x0; +} +static inline u32 gr_gpcs_gpccs_gpc_exception_r(void) +{ + return 0x0041ac90; +} +static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r) +{ + return (r >> 16) & 0xff; +} +static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void) +{ + return 0x00419d08; +} +static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void) +{ + return 0x00504610; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void) +{ + return 0x80000000; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) +{ + return 0x0050460c; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void) +{ + return 0x00000001; +} +static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void) +{ + return 0x00504650; +} +static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void) +{ + return 0x10; +} +static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void) +{ + return 0x20; +} +static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void) +{ + return 0x40; +} +static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void) +{ + return 0x00504648; +} +static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void) +{ + return 0x0; +} +static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) +{ + return 0x00504770; +} +static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void) +{ + return 0x00419f70; +} +static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(void) +{ + return 0x1 << 1; +} +static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f(void) +{ + return 0x2; +} +static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void) +{ + return 0x1 << 4; +} +static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void) +{ + return 0x0050477c; +} +static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void) +{ + return 0x00419f7c; +} +static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void) +{ + return 0x1 << 0; +} +static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(void) +{ + return 0x1 << 16; +} +static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f(void) +{ + return 0x10000; +} +static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void) +{ + return 0x00419ed0; +} +static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void) +{ + return 0x0041be08; +} +static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void) +{ + return 0x4; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void) +{ + return 0x0041bf00; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void) +{ + return 0x0041bf04; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void) +{ + return 0x0041bf08; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void) +{ + return 0x0041bf0c; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void) +{ + return 0x0041bf10; +} +static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void) +{ + return 0x0041bf14; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void) +{ + return 0x0041bfd0; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v) +{ + return (v & 0x1f) << 16; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v) +{ + return (v & 0x7) << 21; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v) +{ + return (v & 0x1f) << 24; +} +static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void) +{ + return 0x0041bfd4; +} +static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void) +{ + return 0x0041bfe4; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v) +{ + return (v & 0x1f) << 0; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v) +{ + return (v & 0x1f) << 5; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v) +{ + return (v & 0x1f) << 10; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v) +{ + return (v & 0x1f) << 15; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v) +{ + return (v & 0x1f) << 20; +} +static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v) +{ + return (v & 0x1f) << 25; +} +static inline u32 gr_gpcs_ppcs_cbm_cfg_r(void) +{ + return 0x0041bec0; +} +static inline u32 gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(void) +{ + return 0x00000001; +} +static inline u32 gr_bes_zrop_settings_r(void) +{ + return 0x00408850; +} +static inline u32 gr_bes_zrop_settings_num_active_fbps_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_bes_crop_settings_r(void) +{ + return 0x00408958; +} +static inline u32 gr_bes_crop_settings_num_active_fbps_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void) +{ + return 0x00000020; +} +static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void) +{ + return 0x00000020; +} +static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void) +{ + return 0x000000c0; +} +static inline u32 gr_zcull_subregion_qty_v(void) +{ + return 0x00000010; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void) +{ + return 0x00504604; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void) +{ + return 0x00504608; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void) +{ + return 0x0050465c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void) +{ + return 0x00504660; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void) +{ + return 0x00504664; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void) +{ + return 0x00504668; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void) +{ + return 0x0050466c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void) +{ + return 0x00504658; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(void) +{ + return 0x00504670; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void) +{ + return 0x00504694; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void) +{ + return 0x00504730; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void) +{ + return 0x00504734; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void) +{ + return 0x00504738; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void) +{ + return 0x0050473c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void) +{ + return 0x00504740; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void) +{ + return 0x00504744; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void) +{ + return 0x00504748; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void) +{ + return 0x0050474c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(void) +{ + return 0x00504674; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(void) +{ + return 0x00504678; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(void) +{ + return 0x0050467c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(void) +{ + return 0x00504680; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(void) +{ + return 0x00504684; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(void) +{ + return 0x00504688; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(void) +{ + return 0x0050468c; +} +static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(void) +{ + return 0x00504690; +} +static inline u32 gr_fe_pwr_mode_r(void) +{ + return 0x00404170; +} +static inline u32 gr_fe_pwr_mode_mode_auto_f(void) +{ + return 0x0; +} +static inline u32 gr_fe_pwr_mode_mode_force_on_f(void) +{ + return 0x2; +} +static inline u32 gr_fe_pwr_mode_req_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 gr_fe_pwr_mode_req_send_f(void) +{ + return 0x10; +} +static inline u32 gr_fe_pwr_mode_req_done_v(void) +{ + return 0x00000000; +} +static inline u32 gr_gpc0_tpc0_l1c_dbg_r(void) +{ + return 0x005044b0; +} +static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void) +{ + return 0x8000000; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h new file mode 100644 index 00000000..65221b59 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_ltc_gk20a_h_ +#define _hw_ltc_gk20a_h_ + +static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) +{ + return 0x001410c8; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void) +{ + return 0x00141104; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r) +{ + return (r >> 16) & 0x3; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void) +{ + return 0x00000000; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void) +{ + return 0x00000001; +} +static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void) +{ + return 0x00000002; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void) +{ + return 0x0017e8c8; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void) +{ + return 0x00000001; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void) +{ + return 0x4; +} +static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void) +{ + return 0x0017e8c8; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void) +{ + return 0x0017e8cc; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v) +{ + return (v & 0x1ffff) << 0; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void) +{ + return 0x0017e8d0; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v) +{ + return (v & 0x1ffff) << 0; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void) +{ + return 0x0001ffff; +} +static inline u32 ltc_ltcs_ltss_cbc_base_r(void) +{ + return 0x0017e8d4; +} +static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void) +{ + return 0x0000000b; +} +static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r) +{ + return (r >> 0) & 0x3ffffff; +} +static inline u32 ltc_ltcs_ltss_cbc_param_r(void) +{ + return 0x0017e8dc; +} +static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r) +{ + return (r >> 24) & 0xf; +} +static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(u32 r) +{ + return (r >> 28) & 0xf; +} +static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void) +{ + return 0x0017e91c; +} +static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v) +{ + return (v & 0x1f) << 16; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void) +{ + return 0x0017ea44; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i) +{ + return 0x0017ea48 + i*4; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void) +{ + return 0x0017ea58; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void) +{ + return 32; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void) +{ + return 0xffffffff << 0; +} +static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void) +{ + return 0x0017e924; +} +static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void) +{ + return 0x10000000; +} +static inline u32 ltc_ltss_g_elpg_r(void) +{ + return 0x0017e828; +} +static inline u32 ltc_ltss_g_elpg_flush_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 ltc_ltss_g_elpg_flush_pending_v(void) +{ + return 0x00000001; +} +static inline u32 ltc_ltss_g_elpg_flush_pending_f(void) +{ + return 0x1; +} +static inline u32 ltc_ltc0_ltss_intr_r(void) +{ + return 0x00140820; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h new file mode 100644 index 00000000..1692bb54 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_mc_gk20a_h_ +#define _hw_mc_gk20a_h_ + +static inline u32 mc_boot_0_r(void) +{ + return 0x00000000; +} +static inline u32 mc_boot_0_architecture_v(u32 r) +{ + return (r >> 24) & 0x1f; +} +static inline u32 mc_boot_0_implementation_v(u32 r) +{ + return (r >> 20) & 0xf; +} +static inline u32 mc_boot_0_major_revision_v(u32 r) +{ + return (r >> 4) & 0xf; +} +static inline u32 mc_boot_0_minor_revision_v(u32 r) +{ + return (r >> 0) & 0xf; +} +static inline u32 mc_intr_0_r(void) +{ + return 0x00000100; +} +static inline u32 mc_intr_0_pfifo_pending_f(void) +{ + return 0x100; +} +static inline u32 mc_intr_0_pgraph_pending_f(void) +{ + return 0x1000; +} +static inline u32 mc_intr_0_pmu_pending_f(void) +{ + return 0x1000000; +} +static inline u32 mc_intr_0_ltc_pending_f(void) +{ + return 0x2000000; +} +static inline u32 mc_intr_0_priv_ring_pending_f(void) +{ + return 0x40000000; +} +static inline u32 mc_intr_0_pbus_pending_f(void) +{ + return 0x10000000; +} +static inline u32 mc_intr_1_r(void) +{ + return 0x00000104; +} +static inline u32 mc_intr_mask_0_r(void) +{ + return 0x00000640; +} +static inline u32 mc_intr_mask_0_pmu_enabled_f(void) +{ + return 0x1000000; +} +static inline u32 mc_intr_mask_1_r(void) +{ + return 0x00000644; +} +static inline u32 mc_intr_mask_1_pmu_enabled_f(void) +{ + return 0x1000000; +} +static inline u32 mc_intr_en_0_r(void) +{ + return 0x00000140; +} +static inline u32 mc_intr_en_0_inta_disabled_f(void) +{ + return 0x0; +} +static inline u32 mc_intr_en_0_inta_hardware_f(void) +{ + return 0x1; +} +static inline u32 mc_intr_en_1_r(void) +{ + return 0x00000144; +} +static inline u32 mc_intr_en_1_inta_disabled_f(void) +{ + return 0x0; +} +static inline u32 mc_intr_en_1_inta_hardware_f(void) +{ + return 0x1; +} +static inline u32 mc_enable_r(void) +{ + return 0x00000200; +} +static inline u32 mc_enable_xbar_enabled_f(void) +{ + return 0x4; +} +static inline u32 mc_enable_l2_enabled_f(void) +{ + return 0x8; +} +static inline u32 mc_enable_pmedia_s(void) +{ + return 1; +} +static inline u32 mc_enable_pmedia_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 mc_enable_pmedia_m(void) +{ + return 0x1 << 4; +} +static inline u32 mc_enable_pmedia_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 mc_enable_priv_ring_enabled_f(void) +{ + return 0x20; +} +static inline u32 mc_enable_ce0_m(void) +{ + return 0x1 << 6; +} +static inline u32 mc_enable_pfifo_enabled_f(void) +{ + return 0x100; +} +static inline u32 mc_enable_pgraph_enabled_f(void) +{ + return 0x1000; +} +static inline u32 mc_enable_pwr_v(u32 r) +{ + return (r >> 13) & 0x1; +} +static inline u32 mc_enable_pwr_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 mc_enable_pwr_enabled_f(void) +{ + return 0x2000; +} +static inline u32 mc_enable_pfb_enabled_f(void) +{ + return 0x100000; +} +static inline u32 mc_enable_ce2_m(void) +{ + return 0x1 << 21; +} +static inline u32 mc_enable_ce2_enabled_f(void) +{ + return 0x200000; +} +static inline u32 mc_enable_blg_enabled_f(void) +{ + return 0x8000000; +} +static inline u32 mc_enable_perfmon_enabled_f(void) +{ + return 0x10000000; +} +static inline u32 mc_enable_hub_enabled_f(void) +{ + return 0x20000000; +} +static inline u32 mc_enable_pb_r(void) +{ + return 0x00000204; +} +static inline u32 mc_enable_pb_0_s(void) +{ + return 1; +} +static inline u32 mc_enable_pb_0_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 mc_enable_pb_0_m(void) +{ + return 0x1 << 0; +} +static inline u32 mc_enable_pb_0_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 mc_enable_pb_0_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 mc_enable_pb_sel_f(u32 v, u32 i) +{ + return (v & 0x1) << (0 + i*1); +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h new file mode 100644 index 00000000..df1a6d48 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_pbdma_gk20a_h_ +#define _hw_pbdma_gk20a_h_ + +static inline u32 pbdma_gp_entry1_r(void) +{ + return 0x10000004; +} +static inline u32 pbdma_gp_entry1_get_hi_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 pbdma_gp_entry1_length_f(u32 v) +{ + return (v & 0x1fffff) << 10; +} +static inline u32 pbdma_gp_entry1_length_v(u32 r) +{ + return (r >> 10) & 0x1fffff; +} +static inline u32 pbdma_gp_base_r(u32 i) +{ + return 0x00040048 + i*8192; +} +static inline u32 pbdma_gp_base__size_1_v(void) +{ + return 0x00000001; +} +static inline u32 pbdma_gp_base_offset_f(u32 v) +{ + return (v & 0x1fffffff) << 3; +} +static inline u32 pbdma_gp_base_rsvd_s(void) +{ + return 3; +} +static inline u32 pbdma_gp_base_hi_r(u32 i) +{ + return 0x0004004c + i*8192; +} +static inline u32 pbdma_gp_base_hi_offset_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 pbdma_gp_base_hi_limit2_f(u32 v) +{ + return (v & 0x1f) << 16; +} +static inline u32 pbdma_gp_fetch_r(u32 i) +{ + return 0x00040050 + i*8192; +} +static inline u32 pbdma_gp_get_r(u32 i) +{ + return 0x00040014 + i*8192; +} +static inline u32 pbdma_gp_put_r(u32 i) +{ + return 0x00040000 + i*8192; +} +static inline u32 pbdma_pb_fetch_r(u32 i) +{ + return 0x00040054 + i*8192; +} +static inline u32 pbdma_pb_fetch_hi_r(u32 i) +{ + return 0x00040058 + i*8192; +} +static inline u32 pbdma_get_r(u32 i) +{ + return 0x00040018 + i*8192; +} +static inline u32 pbdma_get_hi_r(u32 i) +{ + return 0x0004001c + i*8192; +} +static inline u32 pbdma_put_r(u32 i) +{ + return 0x0004005c + i*8192; +} +static inline u32 pbdma_put_hi_r(u32 i) +{ + return 0x00040060 + i*8192; +} +static inline u32 pbdma_formats_r(u32 i) +{ + return 0x0004009c + i*8192; +} +static inline u32 pbdma_formats_gp_fermi0_f(void) +{ + return 0x0; +} +static inline u32 pbdma_formats_pb_fermi1_f(void) +{ + return 0x100; +} +static inline u32 pbdma_formats_mp_fermi0_f(void) +{ + return 0x0; +} +static inline u32 pbdma_syncpointa_r(u32 i) +{ + return 0x000400a4 + i*8192; +} +static inline u32 pbdma_syncpointa_payload_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 pbdma_syncpointb_r(u32 i) +{ + return 0x000400a8 + i*8192; +} +static inline u32 pbdma_syncpointb_op_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 pbdma_syncpointb_op_wait_v(void) +{ + return 0x00000000; +} +static inline u32 pbdma_syncpointb_wait_switch_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 pbdma_syncpointb_wait_switch_en_v(void) +{ + return 0x00000001; +} +static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r) +{ + return (r >> 8) & 0xff; +} +static inline u32 pbdma_pb_header_r(u32 i) +{ + return 0x00040084 + i*8192; +} +static inline u32 pbdma_pb_header_priv_user_f(void) +{ + return 0x0; +} +static inline u32 pbdma_pb_header_method_zero_f(void) +{ + return 0x0; +} +static inline u32 pbdma_pb_header_subchannel_zero_f(void) +{ + return 0x0; +} +static inline u32 pbdma_pb_header_level_main_f(void) +{ + return 0x0; +} +static inline u32 pbdma_pb_header_first_true_f(void) +{ + return 0x400000; +} +static inline u32 pbdma_pb_header_type_inc_f(void) +{ + return 0x20000000; +} +static inline u32 pbdma_subdevice_r(u32 i) +{ + return 0x00040094 + i*8192; +} +static inline u32 pbdma_subdevice_id_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 pbdma_subdevice_status_active_f(void) +{ + return 0x10000000; +} +static inline u32 pbdma_subdevice_channel_dma_enable_f(void) +{ + return 0x20000000; +} +static inline u32 pbdma_method0_r(u32 i) +{ + return 0x000400c0 + i*8192; +} +static inline u32 pbdma_data0_r(u32 i) +{ + return 0x000400c4 + i*8192; +} +static inline u32 pbdma_target_r(u32 i) +{ + return 0x000400ac + i*8192; +} +static inline u32 pbdma_target_engine_sw_f(void) +{ + return 0x1f; +} +static inline u32 pbdma_acquire_r(u32 i) +{ + return 0x00040030 + i*8192; +} +static inline u32 pbdma_acquire_retry_man_2_f(void) +{ + return 0x2; +} +static inline u32 pbdma_acquire_retry_exp_2_f(void) +{ + return 0x100; +} +static inline u32 pbdma_acquire_timeout_exp_max_f(void) +{ + return 0x7800; +} +static inline u32 pbdma_acquire_timeout_man_max_f(void) +{ + return 0x7fff8000; +} +static inline u32 pbdma_acquire_timeout_en_disable_f(void) +{ + return 0x0; +} +static inline u32 pbdma_status_r(u32 i) +{ + return 0x00040100 + i*8192; +} +static inline u32 pbdma_channel_r(u32 i) +{ + return 0x00040120 + i*8192; +} +static inline u32 pbdma_signature_r(u32 i) +{ + return 0x00040010 + i*8192; +} +static inline u32 pbdma_signature_hw_valid_f(void) +{ + return 0xface; +} +static inline u32 pbdma_signature_sw_zero_f(void) +{ + return 0x0; +} +static inline u32 pbdma_userd_r(u32 i) +{ + return 0x00040008 + i*8192; +} +static inline u32 pbdma_userd_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 pbdma_userd_addr_f(u32 v) +{ + return (v & 0x7fffff) << 9; +} +static inline u32 pbdma_userd_hi_r(u32 i) +{ + return 0x0004000c + i*8192; +} +static inline u32 pbdma_userd_hi_addr_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 pbdma_hce_ctrl_r(u32 i) +{ + return 0x000400e4 + i*8192; +} +static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void) +{ + return 0x20; +} +static inline u32 pbdma_intr_0_r(u32 i) +{ + return 0x00040108 + i*8192; +} +static inline u32 pbdma_intr_0_memreq_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 pbdma_intr_0_memreq_pending_f(void) +{ + return 0x1; +} +static inline u32 pbdma_intr_0_memack_timeout_pending_f(void) +{ + return 0x2; +} +static inline u32 pbdma_intr_0_memack_extra_pending_f(void) +{ + return 0x4; +} +static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void) +{ + return 0x8; +} +static inline u32 pbdma_intr_0_memdat_extra_pending_f(void) +{ + return 0x10; +} +static inline u32 pbdma_intr_0_memflush_pending_f(void) +{ + return 0x20; +} +static inline u32 pbdma_intr_0_memop_pending_f(void) +{ + return 0x40; +} +static inline u32 pbdma_intr_0_lbconnect_pending_f(void) +{ + return 0x80; +} +static inline u32 pbdma_intr_0_lbreq_pending_f(void) +{ + return 0x100; +} +static inline u32 pbdma_intr_0_lback_timeout_pending_f(void) +{ + return 0x200; +} +static inline u32 pbdma_intr_0_lback_extra_pending_f(void) +{ + return 0x400; +} +static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void) +{ + return 0x800; +} +static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void) +{ + return 0x1000; +} +static inline u32 pbdma_intr_0_gpfifo_pending_f(void) +{ + return 0x2000; +} +static inline u32 pbdma_intr_0_gpptr_pending_f(void) +{ + return 0x4000; +} +static inline u32 pbdma_intr_0_gpentry_pending_f(void) +{ + return 0x8000; +} +static inline u32 pbdma_intr_0_gpcrc_pending_f(void) +{ + return 0x10000; +} +static inline u32 pbdma_intr_0_pbptr_pending_f(void) +{ + return 0x20000; +} +static inline u32 pbdma_intr_0_pbentry_pending_f(void) +{ + return 0x40000; +} +static inline u32 pbdma_intr_0_pbcrc_pending_f(void) +{ + return 0x80000; +} +static inline u32 pbdma_intr_0_xbarconnect_pending_f(void) +{ + return 0x100000; +} +static inline u32 pbdma_intr_0_method_pending_f(void) +{ + return 0x200000; +} +static inline u32 pbdma_intr_0_methodcrc_pending_f(void) +{ + return 0x400000; +} +static inline u32 pbdma_intr_0_device_pending_f(void) +{ + return 0x800000; +} +static inline u32 pbdma_intr_0_semaphore_pending_f(void) +{ + return 0x2000000; +} +static inline u32 pbdma_intr_0_acquire_pending_f(void) +{ + return 0x4000000; +} +static inline u32 pbdma_intr_0_pri_pending_f(void) +{ + return 0x8000000; +} +static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void) +{ + return 0x20000000; +} +static inline u32 pbdma_intr_0_pbseg_pending_f(void) +{ + return 0x40000000; +} +static inline u32 pbdma_intr_0_signature_pending_f(void) +{ + return 0x80000000; +} +static inline u32 pbdma_intr_1_r(u32 i) +{ + return 0x00040148 + i*8192; +} +static inline u32 pbdma_intr_en_0_r(u32 i) +{ + return 0x0004010c + i*8192; +} +static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void) +{ + return 0x100; +} +static inline u32 pbdma_intr_en_1_r(u32 i) +{ + return 0x0004014c + i*8192; +} +static inline u32 pbdma_intr_stall_r(u32 i) +{ + return 0x0004013c + i*8192; +} +static inline u32 pbdma_intr_stall_lbreq_enabled_f(void) +{ + return 0x100; +} +static inline u32 pbdma_udma_nop_r(void) +{ + return 0x00000008; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h new file mode 100644 index 00000000..d4007613 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_pri_ringmaster_gk20a_h_ +#define _hw_pri_ringmaster_gk20a_h_ + +static inline u32 pri_ringmaster_command_r(void) +{ + return 0x0012004c; +} +static inline u32 pri_ringmaster_command_cmd_m(void) +{ + return 0x3f << 0; +} +static inline u32 pri_ringmaster_command_cmd_v(u32 r) +{ + return (r >> 0) & 0x3f; +} +static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringmaster_command_cmd_start_ring_f(void) +{ + return 0x1; +} +static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void) +{ + return 0x2; +} +static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void) +{ + return 0x3; +} +static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void) +{ + return 0x0; +} +static inline u32 pri_ringmaster_command_data_r(void) +{ + return 0x00120048; +} +static inline u32 pri_ringmaster_start_results_r(void) +{ + return 0x00120050; +} +static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void) +{ + return 0x00000001; +} +static inline u32 pri_ringmaster_intr_status0_r(void) +{ + return 0x00120058; +} +static inline u32 pri_ringmaster_intr_status1_r(void) +{ + return 0x0012005c; +} +static inline u32 pri_ringmaster_global_ctl_r(void) +{ + return 0x00120060; +} +static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void) +{ + return 0x1; +} +static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void) +{ + return 0x0; +} +static inline u32 pri_ringmaster_enum_fbp_r(void) +{ + return 0x00120074; +} +static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 pri_ringmaster_enum_gpc_r(void) +{ + return 0x00120078; +} +static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h new file mode 100644 index 00000000..db16a8de --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h @@ -0,0 +1,226 @@ +/* + * drivers/video/tegra/host/gk20a/hw_pri_ringstation_fbp_gk20a.h + * + * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + /* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ + +#ifndef __hw_pri_ringstation_fbp_gk20a_h__ +#define __hw_pri_ringstation_fbp_gk20a_h__ +/*This file is autogenerated. Do not edit. */ + +static inline u32 pri_ringstation_fbp_master_config_r(u32 i) +{ + return 0x00124300+((i)*4); +} +static inline u32 pri_ringstation_fbp_master_config__size_1_v(void) +{ + return 64; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_s(void) +{ + return 18; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_f(u32 v) +{ + return (v & 0x3ffff) << 0; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_m(void) +{ + return 0x3ffff << 0; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_v(u32 r) +{ + return (r >> 0) & 0x3ffff; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_i_v(void) +{ + return 0x00000064; +} +static inline u32 pri_ringstation_fbp_master_config_timeout_i_f(void) +{ + return 0x64; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_s(void) +{ + return 1; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_f(u32 v) +{ + return (v & 0x1) << 30; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_m(void) +{ + return 0x1 << 30; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_v(u32 r) +{ + return (r >> 30) & 0x1; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_error_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_error_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_v(void) +{ + return 0x00000001; +} +static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_f(void) +{ + return 0x40000000; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_s(void) +{ + return 1; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_m(void) +{ + return 0x1 << 31; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_error_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_error_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_v(void) +{ + return 0x00000001; +} +static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_f(void) +{ + return 0x80000000; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_m(void) +{ + return 0x7 << 20; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_v(u32 r) +{ + return (r >> 20) & 0x7; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_m(void) +{ + return 0x7 << 24; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_v(u32 r) +{ + return (r >> 24) & 0x7; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_f(u32 v) +{ + return (v & 0x7) << 27; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_m(void) +{ + return 0x7 << 27; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_v(u32 r) +{ + return (r >> 27) & 0x7; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_f(void) +{ + return 0x0; +} + +#endif /* __hw_pri_ringstation_fbp_gk20a_h__ */ diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h new file mode 100644 index 00000000..e8aad933 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h @@ -0,0 +1,226 @@ +/* + * drivers/video/tegra/host/gk20a/hw_pri_ringstation_gpc_gk20a.h + * + * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + /* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ + +#ifndef __hw_pri_ringstation_gpc_gk20a_h__ +#define __hw_pri_ringstation_gpc_gk20a_h__ +/*This file is autogenerated. Do not edit. */ + +static inline u32 pri_ringstation_gpc_master_config_r(u32 i) +{ + return 0x00128300+((i)*4); +} +static inline u32 pri_ringstation_gpc_master_config__size_1_v(void) +{ + return 64; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_s(void) +{ + return 18; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_f(u32 v) +{ + return (v & 0x3ffff) << 0; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_m(void) +{ + return 0x3ffff << 0; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_v(u32 r) +{ + return (r >> 0) & 0x3ffff; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_i_v(void) +{ + return 0x00000064; +} +static inline u32 pri_ringstation_gpc_master_config_timeout_i_f(void) +{ + return 0x64; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_s(void) +{ + return 1; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_f(u32 v) +{ + return (v & 0x1) << 30; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_m(void) +{ + return 0x1 << 30; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_v(u32 r) +{ + return (r >> 30) & 0x1; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_error_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_error_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_v(void) +{ + return 0x00000001; +} +static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_f(void) +{ + return 0x40000000; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_s(void) +{ + return 1; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_m(void) +{ + return 0x1 << 31; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_error_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_error_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_v(void) +{ + return 0x00000001; +} +static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_f(void) +{ + return 0x80000000; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_f(u32 v) +{ + return (v & 0x7) << 20; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_m(void) +{ + return 0x7 << 20; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_v(u32 r) +{ + return (r >> 20) & 0x7; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_f(u32 v) +{ + return (v & 0x7) << 24; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_m(void) +{ + return 0x7 << 24; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_v(u32 r) +{ + return (r >> 24) & 0x7; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_f(void) +{ + return 0x0; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_s(void) +{ + return 3; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_f(u32 v) +{ + return (v & 0x7) << 27; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_m(void) +{ + return 0x7 << 27; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_v(u32 r) +{ + return (r >> 27) & 0x7; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_v(void) +{ + return 0x00000000; +} +static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_f(void) +{ + return 0x0; +} + +#endif /* __hw_pri_ringstation_gpc_gk20a_h__ */ diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h new file mode 100644 index 00000000..c281dd54 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_pri_ringstation_sys_gk20a_h_ +#define _hw_pri_ringstation_sys_gk20a_h_ + +static inline u32 pri_ringstation_sys_master_config_r(u32 i) +{ + return 0x00122300 + i*4; +} +static inline u32 pri_ringstation_sys_decode_config_r(void) +{ + return 0x00122204; +} +static inline u32 pri_ringstation_sys_decode_config_ring_m(void) +{ + return 0x7 << 0; +} +static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void) +{ + return 0x1; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h new file mode 100644 index 00000000..93c55c30 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_proj_gk20a_h_ +#define _hw_proj_gk20a_h_ + +static inline u32 proj_gpc_base_v(void) +{ + return 0x00500000; +} +static inline u32 proj_gpc_shared_base_v(void) +{ + return 0x00418000; +} +static inline u32 proj_gpc_stride_v(void) +{ + return 0x00008000; +} +static inline u32 proj_ltc_stride_v(void) +{ + return 0x00002000; +} +static inline u32 proj_lts_stride_v(void) +{ + return 0x00000400; +} +static inline u32 proj_ppc_in_gpc_base_v(void) +{ + return 0x00003000; +} +static inline u32 proj_ppc_in_gpc_stride_v(void) +{ + return 0x00000200; +} +static inline u32 proj_rop_base_v(void) +{ + return 0x00410000; +} +static inline u32 proj_rop_shared_base_v(void) +{ + return 0x00408800; +} +static inline u32 proj_rop_stride_v(void) +{ + return 0x00000400; +} +static inline u32 proj_tpc_in_gpc_base_v(void) +{ + return 0x00004000; +} +static inline u32 proj_tpc_in_gpc_stride_v(void) +{ + return 0x00000800; +} +static inline u32 proj_tpc_in_gpc_shared_base_v(void) +{ + return 0x00001800; +} +static inline u32 proj_host_num_pbdma_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_fbps_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_gpcs_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_pes_per_gpc_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void) +{ + return 0x00000001; +} +static inline u32 proj_scal_litter_num_zcull_banks_v(void) +{ + return 0x00000004; +} +static inline u32 proj_scal_max_gpcs_v(void) +{ + return 0x00000020; +} +static inline u32 proj_scal_max_tpc_per_gpc_v(void) +{ + return 0x00000008; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h new file mode 100644 index 00000000..d7d26b80 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_pwr_gk20a_h_ +#define _hw_pwr_gk20a_h_ + +static inline u32 pwr_falcon_irqsset_r(void) +{ + return 0x0010a000; +} +static inline u32 pwr_falcon_irqsset_swgen0_set_f(void) +{ + return 0x40; +} +static inline u32 pwr_falcon_irqsclr_r(void) +{ + return 0x0010a004; +} +static inline u32 pwr_falcon_irqstat_r(void) +{ + return 0x0010a008; +} +static inline u32 pwr_falcon_irqstat_halt_true_f(void) +{ + return 0x10; +} +static inline u32 pwr_falcon_irqstat_exterr_true_f(void) +{ + return 0x20; +} +static inline u32 pwr_falcon_irqstat_swgen0_true_f(void) +{ + return 0x40; +} +static inline u32 pwr_falcon_irqmode_r(void) +{ + return 0x0010a00c; +} +static inline u32 pwr_falcon_irqmset_r(void) +{ + return 0x0010a010; +} +static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 pwr_falcon_irqmset_mthd_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 pwr_falcon_irqmset_halt_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 pwr_falcon_irqmset_exterr_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v) +{ + return (v & 0x1) << 6; +} +static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v) +{ + return (v & 0x1) << 7; +} +static inline u32 pwr_falcon_irqmclr_r(void) +{ + return 0x0010a014; +} +static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 pwr_falcon_irqmclr_halt_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v) +{ + return (v & 0x1) << 6; +} +static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v) +{ + return (v & 0x1) << 7; +} +static inline u32 pwr_falcon_irqmclr_ext_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 pwr_falcon_irqmask_r(void) +{ + return 0x0010a018; +} +static inline u32 pwr_falcon_irqdest_r(void) +{ + return 0x0010a01c; +} +static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v) +{ + return (v & 0x1) << 6; +} +static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v) +{ + return (v & 0x1) << 7; +} +static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v) +{ + return (v & 0x1) << 16; +} +static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v) +{ + return (v & 0x1) << 17; +} +static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v) +{ + return (v & 0x1) << 18; +} +static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v) +{ + return (v & 0x1) << 19; +} +static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v) +{ + return (v & 0x1) << 20; +} +static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v) +{ + return (v & 0x1) << 21; +} +static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v) +{ + return (v & 0x1) << 22; +} +static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v) +{ + return (v & 0x1) << 23; +} +static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 pwr_falcon_curctx_r(void) +{ + return 0x0010a050; +} +static inline u32 pwr_falcon_nxtctx_r(void) +{ + return 0x0010a054; +} +static inline u32 pwr_falcon_mailbox0_r(void) +{ + return 0x0010a040; +} +static inline u32 pwr_falcon_mailbox1_r(void) +{ + return 0x0010a044; +} +static inline u32 pwr_falcon_itfen_r(void) +{ + return 0x0010a048; +} +static inline u32 pwr_falcon_itfen_ctxen_enable_f(void) +{ + return 0x1; +} +static inline u32 pwr_falcon_idlestate_r(void) +{ + return 0x0010a04c; +} +static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r) +{ + return (r >> 1) & 0x7fff; +} +static inline u32 pwr_falcon_os_r(void) +{ + return 0x0010a080; +} +static inline u32 pwr_falcon_engctl_r(void) +{ + return 0x0010a0a4; +} +static inline u32 pwr_falcon_cpuctl_r(void) +{ + return 0x0010a100; +} +static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 pwr_falcon_bootvec_r(void) +{ + return 0x0010a104; +} +static inline u32 pwr_falcon_bootvec_vec_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 pwr_falcon_dmactl_r(void) +{ + return 0x0010a10c; +} +static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void) +{ + return 0x1 << 1; +} +static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void) +{ + return 0x1 << 2; +} +static inline u32 pwr_falcon_hwcfg_r(void) +{ + return 0x0010a108; +} +static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r) +{ + return (r >> 0) & 0x1ff; +} +static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r) +{ + return (r >> 9) & 0x1ff; +} +static inline u32 pwr_falcon_dmatrfbase_r(void) +{ + return 0x0010a110; +} +static inline u32 pwr_falcon_dmatrfmoffs_r(void) +{ + return 0x0010a114; +} +static inline u32 pwr_falcon_dmatrfcmd_r(void) +{ + return 0x0010a118; +} +static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v) +{ + return (v & 0x7) << 8; +} +static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v) +{ + return (v & 0x7) << 12; +} +static inline u32 pwr_falcon_dmatrffboffs_r(void) +{ + return 0x0010a11c; +} +static inline u32 pwr_falcon_exterraddr_r(void) +{ + return 0x0010a168; +} +static inline u32 pwr_falcon_exterrstat_r(void) +{ + return 0x0010a16c; +} +static inline u32 pwr_falcon_exterrstat_valid_m(void) +{ + return 0x1 << 31; +} +static inline u32 pwr_falcon_exterrstat_valid_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 pwr_falcon_exterrstat_valid_true_v(void) +{ + return 0x00000001; +} +static inline u32 pwr_pmu_falcon_icd_cmd_r(void) +{ + return 0x0010a200; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void) +{ + return 4; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v) +{ + return (v & 0xf) << 0; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void) +{ + return 0xf << 0; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r) +{ + return (r >> 0) & 0xf; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void) +{ + return 0x8; +} +static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void) +{ + return 0xe; +} +static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v) +{ + return (v & 0x1f) << 8; +} +static inline u32 pwr_pmu_falcon_icd_rdata_r(void) +{ + return 0x0010a20c; +} +static inline u32 pwr_falcon_dmemc_r(u32 i) +{ + return 0x0010a1c0 + i*8; +} +static inline u32 pwr_falcon_dmemc_offs_f(u32 v) +{ + return (v & 0x3f) << 2; +} +static inline u32 pwr_falcon_dmemc_offs_m(void) +{ + return 0x3f << 2; +} +static inline u32 pwr_falcon_dmemc_blk_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 pwr_falcon_dmemc_blk_m(void) +{ + return 0xff << 8; +} +static inline u32 pwr_falcon_dmemc_aincw_f(u32 v) +{ + return (v & 0x1) << 24; +} +static inline u32 pwr_falcon_dmemc_aincr_f(u32 v) +{ + return (v & 0x1) << 25; +} +static inline u32 pwr_falcon_dmemd_r(u32 i) +{ + return 0x0010a1c4 + i*8; +} +static inline u32 pwr_pmu_new_instblk_r(void) +{ + return 0x0010a480; +} +static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v) +{ + return (v & 0xfffffff) << 0; +} +static inline u32 pwr_pmu_new_instblk_target_fb_f(void) +{ + return 0x0; +} +static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void) +{ + return 0x20000000; +} +static inline u32 pwr_pmu_new_instblk_valid_f(u32 v) +{ + return (v & 0x1) << 30; +} +static inline u32 pwr_pmu_mutex_id_r(void) +{ + return 0x0010a488; +} +static inline u32 pwr_pmu_mutex_id_value_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 pwr_pmu_mutex_id_value_init_v(void) +{ + return 0x00000000; +} +static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void) +{ + return 0x000000ff; +} +static inline u32 pwr_pmu_mutex_id_release_r(void) +{ + return 0x0010a48c; +} +static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 pwr_pmu_mutex_id_release_value_m(void) +{ + return 0xff << 0; +} +static inline u32 pwr_pmu_mutex_id_release_value_init_v(void) +{ + return 0x00000000; +} +static inline u32 pwr_pmu_mutex_id_release_value_init_f(void) +{ + return 0x0; +} +static inline u32 pwr_pmu_mutex_r(u32 i) +{ + return 0x0010a580 + i*4; +} +static inline u32 pwr_pmu_mutex__size_1_v(void) +{ + return 0x00000010; +} +static inline u32 pwr_pmu_mutex_value_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 pwr_pmu_mutex_value_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 pwr_pmu_mutex_value_initial_lock_f(void) +{ + return 0x0; +} +static inline u32 pwr_pmu_queue_head_r(u32 i) +{ + return 0x0010a4a0 + i*4; +} +static inline u32 pwr_pmu_queue_head__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 pwr_pmu_queue_head_address_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 pwr_pmu_queue_head_address_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 pwr_pmu_queue_tail_r(u32 i) +{ + return 0x0010a4b0 + i*4; +} +static inline u32 pwr_pmu_queue_tail__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 pwr_pmu_queue_tail_address_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 pwr_pmu_queue_tail_address_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 pwr_pmu_msgq_head_r(void) +{ + return 0x0010a4c8; +} +static inline u32 pwr_pmu_msgq_head_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 pwr_pmu_msgq_head_val_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 pwr_pmu_msgq_tail_r(void) +{ + return 0x0010a4cc; +} +static inline u32 pwr_pmu_msgq_tail_val_f(u32 v) +{ + return (v & 0xffffffff) << 0; +} +static inline u32 pwr_pmu_msgq_tail_val_v(u32 r) +{ + return (r >> 0) & 0xffffffff; +} +static inline u32 pwr_pmu_idle_mask_r(u32 i) +{ + return 0x0010a504 + i*16; +} +static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void) +{ + return 0x1; +} +static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void) +{ + return 0x200000; +} +static inline u32 pwr_pmu_idle_count_r(u32 i) +{ + return 0x0010a508 + i*16; +} +static inline u32 pwr_pmu_idle_count_value_f(u32 v) +{ + return (v & 0x7fffffff) << 0; +} +static inline u32 pwr_pmu_idle_count_value_v(u32 r) +{ + return (r >> 0) & 0x7fffffff; +} +static inline u32 pwr_pmu_idle_count_reset_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 pwr_pmu_idle_ctrl_r(u32 i) +{ + return 0x0010a50c + i*16; +} +static inline u32 pwr_pmu_idle_ctrl_value_m(void) +{ + return 0x3 << 0; +} +static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void) +{ + return 0x2; +} +static inline u32 pwr_pmu_idle_ctrl_value_always_f(void) +{ + return 0x3; +} +static inline u32 pwr_pmu_idle_ctrl_filter_m(void) +{ + return 0x1 << 2; +} +static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) +{ + return 0x0; +} +static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) +{ + return 0x0010a9f0 + i*8; +} +static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i) +{ + return 0x0010a9f4 + i*8; +} +static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i) +{ + return 0x0010aa30 + i*8; +} +static inline u32 pwr_pmu_debug_r(u32 i) +{ + return 0x0010a5c0 + i*4; +} +static inline u32 pwr_pmu_debug__size_1_v(void) +{ + return 0x00000004; +} +static inline u32 pwr_pmu_mailbox_r(u32 i) +{ + return 0x0010a450 + i*4; +} +static inline u32 pwr_pmu_mailbox__size_1_v(void) +{ + return 0x0000000c; +} +static inline u32 pwr_pmu_bar0_addr_r(void) +{ + return 0x0010a7a0; +} +static inline u32 pwr_pmu_bar0_data_r(void) +{ + return 0x0010a7a4; +} +static inline u32 pwr_pmu_bar0_ctl_r(void) +{ + return 0x0010a7ac; +} +static inline u32 pwr_pmu_bar0_timeout_r(void) +{ + return 0x0010a7a8; +} +static inline u32 pwr_pmu_bar0_fecs_error_r(void) +{ + return 0x0010a988; +} +static inline u32 pwr_pmu_bar0_error_status_r(void) +{ + return 0x0010a7b0; +} +static inline u32 pwr_pmu_pg_idlefilth_r(u32 i) +{ + return 0x0010a6c0 + i*4; +} +static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i) +{ + return 0x0010a6e8 + i*4; +} +static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i) +{ + return 0x0010a710 + i*4; +} +static inline u32 pwr_pmu_pg_intren_r(u32 i) +{ + return 0x0010a760 + i*4; +} +static inline u32 pwr_fbif_transcfg_r(u32 i) +{ + return 0x0010a600 + i*4; +} +static inline u32 pwr_fbif_transcfg_target_local_fb_f(void) +{ + return 0x0; +} +static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void) +{ + return 0x1; +} +static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void) +{ + return 0x2; +} +static inline u32 pwr_fbif_transcfg_mem_type_s(void) +{ + return 1; +} +static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 pwr_fbif_transcfg_mem_type_m(void) +{ + return 0x1 << 2; +} +static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void) +{ + return 0x0; +} +static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void) +{ + return 0x4; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h new file mode 100644 index 00000000..7eff3881 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_ram_gk20a_h_ +#define _hw_ram_gk20a_h_ + +static inline u32 ram_in_ramfc_s(void) +{ + return 4096; +} +static inline u32 ram_in_ramfc_w(void) +{ + return 0; +} +static inline u32 ram_in_page_dir_base_target_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 ram_in_page_dir_base_target_w(void) +{ + return 128; +} +static inline u32 ram_in_page_dir_base_target_vid_mem_f(void) +{ + return 0x0; +} +static inline u32 ram_in_page_dir_base_vol_w(void) +{ + return 128; +} +static inline u32 ram_in_page_dir_base_vol_true_f(void) +{ + return 0x4; +} +static inline u32 ram_in_page_dir_base_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 ram_in_page_dir_base_lo_w(void) +{ + return 128; +} +static inline u32 ram_in_page_dir_base_hi_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 ram_in_page_dir_base_hi_w(void) +{ + return 129; +} +static inline u32 ram_in_adr_limit_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 ram_in_adr_limit_lo_w(void) +{ + return 130; +} +static inline u32 ram_in_adr_limit_hi_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 ram_in_adr_limit_hi_w(void) +{ + return 131; +} +static inline u32 ram_in_engine_cs_w(void) +{ + return 132; +} +static inline u32 ram_in_engine_cs_wfi_v(void) +{ + return 0x00000000; +} +static inline u32 ram_in_engine_cs_wfi_f(void) +{ + return 0x0; +} +static inline u32 ram_in_engine_cs_fg_v(void) +{ + return 0x00000001; +} +static inline u32 ram_in_engine_cs_fg_f(void) +{ + return 0x8; +} +static inline u32 ram_in_gr_cs_w(void) +{ + return 132; +} +static inline u32 ram_in_gr_cs_wfi_f(void) +{ + return 0x0; +} +static inline u32 ram_in_gr_wfi_target_w(void) +{ + return 132; +} +static inline u32 ram_in_gr_wfi_mode_w(void) +{ + return 132; +} +static inline u32 ram_in_gr_wfi_mode_physical_v(void) +{ + return 0x00000000; +} +static inline u32 ram_in_gr_wfi_mode_physical_f(void) +{ + return 0x0; +} +static inline u32 ram_in_gr_wfi_mode_virtual_v(void) +{ + return 0x00000001; +} +static inline u32 ram_in_gr_wfi_mode_virtual_f(void) +{ + return 0x4; +} +static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 ram_in_gr_wfi_ptr_lo_w(void) +{ + return 132; +} +static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 ram_in_gr_wfi_ptr_hi_w(void) +{ + return 133; +} +static inline u32 ram_in_base_shift_v(void) +{ + return 0x0000000c; +} +static inline u32 ram_in_alloc_size_v(void) +{ + return 0x00001000; +} +static inline u32 ram_fc_size_val_v(void) +{ + return 0x00000200; +} +static inline u32 ram_fc_gp_put_w(void) +{ + return 0; +} +static inline u32 ram_fc_userd_w(void) +{ + return 2; +} +static inline u32 ram_fc_userd_hi_w(void) +{ + return 3; +} +static inline u32 ram_fc_signature_w(void) +{ + return 4; +} +static inline u32 ram_fc_gp_get_w(void) +{ + return 5; +} +static inline u32 ram_fc_pb_get_w(void) +{ + return 6; +} +static inline u32 ram_fc_pb_get_hi_w(void) +{ + return 7; +} +static inline u32 ram_fc_pb_top_level_get_w(void) +{ + return 8; +} +static inline u32 ram_fc_pb_top_level_get_hi_w(void) +{ + return 9; +} +static inline u32 ram_fc_acquire_w(void) +{ + return 12; +} +static inline u32 ram_fc_semaphorea_w(void) +{ + return 14; +} +static inline u32 ram_fc_semaphoreb_w(void) +{ + return 15; +} +static inline u32 ram_fc_semaphorec_w(void) +{ + return 16; +} +static inline u32 ram_fc_semaphored_w(void) +{ + return 17; +} +static inline u32 ram_fc_gp_base_w(void) +{ + return 18; +} +static inline u32 ram_fc_gp_base_hi_w(void) +{ + return 19; +} +static inline u32 ram_fc_gp_fetch_w(void) +{ + return 20; +} +static inline u32 ram_fc_pb_fetch_w(void) +{ + return 21; +} +static inline u32 ram_fc_pb_fetch_hi_w(void) +{ + return 22; +} +static inline u32 ram_fc_pb_put_w(void) +{ + return 23; +} +static inline u32 ram_fc_pb_put_hi_w(void) +{ + return 24; +} +static inline u32 ram_fc_pb_header_w(void) +{ + return 33; +} +static inline u32 ram_fc_pb_count_w(void) +{ + return 34; +} +static inline u32 ram_fc_subdevice_w(void) +{ + return 37; +} +static inline u32 ram_fc_formats_w(void) +{ + return 39; +} +static inline u32 ram_fc_syncpointa_w(void) +{ + return 41; +} +static inline u32 ram_fc_syncpointb_w(void) +{ + return 42; +} +static inline u32 ram_fc_target_w(void) +{ + return 43; +} +static inline u32 ram_fc_hce_ctrl_w(void) +{ + return 57; +} +static inline u32 ram_fc_chid_w(void) +{ + return 58; +} +static inline u32 ram_fc_chid_id_f(u32 v) +{ + return (v & 0xfff) << 0; +} +static inline u32 ram_fc_chid_id_w(void) +{ + return 0; +} +static inline u32 ram_fc_eng_timeslice_w(void) +{ + return 62; +} +static inline u32 ram_fc_pb_timeslice_w(void) +{ + return 63; +} +static inline u32 ram_userd_base_shift_v(void) +{ + return 0x00000009; +} +static inline u32 ram_userd_chan_size_v(void) +{ + return 0x00000200; +} +static inline u32 ram_userd_put_w(void) +{ + return 16; +} +static inline u32 ram_userd_get_w(void) +{ + return 17; +} +static inline u32 ram_userd_ref_w(void) +{ + return 18; +} +static inline u32 ram_userd_put_hi_w(void) +{ + return 19; +} +static inline u32 ram_userd_ref_threshold_w(void) +{ + return 20; +} +static inline u32 ram_userd_top_level_get_w(void) +{ + return 22; +} +static inline u32 ram_userd_top_level_get_hi_w(void) +{ + return 23; +} +static inline u32 ram_userd_get_hi_w(void) +{ + return 24; +} +static inline u32 ram_userd_gp_get_w(void) +{ + return 34; +} +static inline u32 ram_userd_gp_put_w(void) +{ + return 35; +} +static inline u32 ram_userd_gp_top_level_get_w(void) +{ + return 22; +} +static inline u32 ram_userd_gp_top_level_get_hi_w(void) +{ + return 23; +} +static inline u32 ram_rl_entry_size_v(void) +{ + return 0x00000008; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h new file mode 100644 index 00000000..b1e6658d --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h @@ -0,0 +1,2150 @@ +/* + * drivers/video/tegra/host/gk20a/hw_sim_gk20a.h + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + /* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ + +#ifndef __hw_sim_gk20a_h__ +#define __hw_sim_gk20a_h__ +/*This file is autogenerated. Do not edit. */ + +static inline u32 sim_send_ring_r(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_target_s(void) +{ + return 2; +} +static inline u32 sim_send_ring_target_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 sim_send_ring_target_m(void) +{ + return 0x3 << 0; +} +static inline u32 sim_send_ring_target_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 sim_send_ring_target_phys_init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_target_phys_init_f(void) +{ + return 0x1; +} +static inline u32 sim_send_ring_target_phys__init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_target_phys__init_f(void) +{ + return 0x1; +} +static inline u32 sim_send_ring_target_phys__prod_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_target_phys__prod_f(void) +{ + return 0x1; +} +static inline u32 sim_send_ring_target_phys_nvm_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_target_phys_nvm_f(void) +{ + return 0x1; +} +static inline u32 sim_send_ring_target_phys_pci_v(void) +{ + return 0x00000002; +} +static inline u32 sim_send_ring_target_phys_pci_f(void) +{ + return 0x2; +} +static inline u32 sim_send_ring_target_phys_pci_coherent_v(void) +{ + return 0x00000003; +} +static inline u32 sim_send_ring_target_phys_pci_coherent_f(void) +{ + return 0x3; +} +static inline u32 sim_send_ring_status_s(void) +{ + return 1; +} +static inline u32 sim_send_ring_status_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_send_ring_status_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_send_ring_status_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_send_ring_status_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_status_init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_status__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_status__init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_status__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_status__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_status_invalid_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_status_invalid_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_status_valid_f(void) +{ + return 0x8; +} +static inline u32 sim_send_ring_size_s(void) +{ + return 2; +} +static inline u32 sim_send_ring_size_f(u32 v) +{ + return (v & 0x3) << 4; +} +static inline u32 sim_send_ring_size_m(void) +{ + return 0x3 << 4; +} +static inline u32 sim_send_ring_size_v(u32 r) +{ + return (r >> 4) & 0x3; +} +static inline u32 sim_send_ring_size_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_size_init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_size__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_size__init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_size__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_size__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_size_4kb_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_size_4kb_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_size_8kb_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_size_8kb_f(void) +{ + return 0x10; +} +static inline u32 sim_send_ring_size_12kb_v(void) +{ + return 0x00000002; +} +static inline u32 sim_send_ring_size_12kb_f(void) +{ + return 0x20; +} +static inline u32 sim_send_ring_size_16kb_v(void) +{ + return 0x00000003; +} +static inline u32 sim_send_ring_size_16kb_f(void) +{ + return 0x30; +} +static inline u32 sim_send_ring_gp_in_ring_s(void) +{ + return 1; +} +static inline u32 sim_send_ring_gp_in_ring_f(u32 v) +{ + return (v & 0x1) << 11; +} +static inline u32 sim_send_ring_gp_in_ring_m(void) +{ + return 0x1 << 11; +} +static inline u32 sim_send_ring_gp_in_ring_v(u32 r) +{ + return (r >> 11) & 0x1; +} +static inline u32 sim_send_ring_gp_in_ring__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_gp_in_ring__init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_gp_in_ring__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_gp_in_ring__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_gp_in_ring_no_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_gp_in_ring_no_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_gp_in_ring_yes_v(void) +{ + return 0x00000001; +} +static inline u32 sim_send_ring_gp_in_ring_yes_f(void) +{ + return 0x800; +} +static inline u32 sim_send_ring_addr_lo_s(void) +{ + return 20; +} +static inline u32 sim_send_ring_addr_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 sim_send_ring_addr_lo_m(void) +{ + return 0xfffff << 12; +} +static inline u32 sim_send_ring_addr_lo_v(u32 r) +{ + return (r >> 12) & 0xfffff; +} +static inline u32 sim_send_ring_addr_lo__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_addr_lo__init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_addr_lo__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_addr_lo__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_hi_r(void) +{ + return 0x00000004; +} +static inline u32 sim_send_ring_hi_addr_s(void) +{ + return 20; +} +static inline u32 sim_send_ring_hi_addr_f(u32 v) +{ + return (v & 0xfffff) << 0; +} +static inline u32 sim_send_ring_hi_addr_m(void) +{ + return 0xfffff << 0; +} +static inline u32 sim_send_ring_hi_addr_v(u32 r) +{ + return (r >> 0) & 0xfffff; +} +static inline u32 sim_send_ring_hi_addr__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_hi_addr__init_f(void) +{ + return 0x0; +} +static inline u32 sim_send_ring_hi_addr__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_send_ring_hi_addr__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_send_put_r(void) +{ + return 0x00000008; +} +static inline u32 sim_send_put_pointer_s(void) +{ + return 29; +} +static inline u32 sim_send_put_pointer_f(u32 v) +{ + return (v & 0x1fffffff) << 3; +} +static inline u32 sim_send_put_pointer_m(void) +{ + return 0x1fffffff << 3; +} +static inline u32 sim_send_put_pointer_v(u32 r) +{ + return (r >> 3) & 0x1fffffff; +} +static inline u32 sim_send_get_r(void) +{ + return 0x0000000c; +} +static inline u32 sim_send_get_pointer_s(void) +{ + return 29; +} +static inline u32 sim_send_get_pointer_f(u32 v) +{ + return (v & 0x1fffffff) << 3; +} +static inline u32 sim_send_get_pointer_m(void) +{ + return 0x1fffffff << 3; +} +static inline u32 sim_send_get_pointer_v(u32 r) +{ + return (r >> 3) & 0x1fffffff; +} +static inline u32 sim_recv_ring_r(void) +{ + return 0x00000010; +} +static inline u32 sim_recv_ring_target_s(void) +{ + return 2; +} +static inline u32 sim_recv_ring_target_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 sim_recv_ring_target_m(void) +{ + return 0x3 << 0; +} +static inline u32 sim_recv_ring_target_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 sim_recv_ring_target_phys_init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_target_phys_init_f(void) +{ + return 0x1; +} +static inline u32 sim_recv_ring_target_phys__init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_target_phys__init_f(void) +{ + return 0x1; +} +static inline u32 sim_recv_ring_target_phys__prod_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_target_phys__prod_f(void) +{ + return 0x1; +} +static inline u32 sim_recv_ring_target_phys_nvm_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_target_phys_nvm_f(void) +{ + return 0x1; +} +static inline u32 sim_recv_ring_target_phys_pci_v(void) +{ + return 0x00000002; +} +static inline u32 sim_recv_ring_target_phys_pci_f(void) +{ + return 0x2; +} +static inline u32 sim_recv_ring_target_phys_pci_coherent_v(void) +{ + return 0x00000003; +} +static inline u32 sim_recv_ring_target_phys_pci_coherent_f(void) +{ + return 0x3; +} +static inline u32 sim_recv_ring_status_s(void) +{ + return 1; +} +static inline u32 sim_recv_ring_status_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_recv_ring_status_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_recv_ring_status_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_recv_ring_status_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_status_init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_status__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_status__init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_status__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_status__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_status_invalid_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_status_invalid_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_status_valid_f(void) +{ + return 0x8; +} +static inline u32 sim_recv_ring_size_s(void) +{ + return 2; +} +static inline u32 sim_recv_ring_size_f(u32 v) +{ + return (v & 0x3) << 4; +} +static inline u32 sim_recv_ring_size_m(void) +{ + return 0x3 << 4; +} +static inline u32 sim_recv_ring_size_v(u32 r) +{ + return (r >> 4) & 0x3; +} +static inline u32 sim_recv_ring_size_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_size_init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_size__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_size__init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_size__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_size__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_size_4kb_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_size_4kb_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_size_8kb_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_size_8kb_f(void) +{ + return 0x10; +} +static inline u32 sim_recv_ring_size_12kb_v(void) +{ + return 0x00000002; +} +static inline u32 sim_recv_ring_size_12kb_f(void) +{ + return 0x20; +} +static inline u32 sim_recv_ring_size_16kb_v(void) +{ + return 0x00000003; +} +static inline u32 sim_recv_ring_size_16kb_f(void) +{ + return 0x30; +} +static inline u32 sim_recv_ring_gp_in_ring_s(void) +{ + return 1; +} +static inline u32 sim_recv_ring_gp_in_ring_f(u32 v) +{ + return (v & 0x1) << 11; +} +static inline u32 sim_recv_ring_gp_in_ring_m(void) +{ + return 0x1 << 11; +} +static inline u32 sim_recv_ring_gp_in_ring_v(u32 r) +{ + return (r >> 11) & 0x1; +} +static inline u32 sim_recv_ring_gp_in_ring__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_gp_in_ring__init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_gp_in_ring__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_gp_in_ring__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_gp_in_ring_no_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_gp_in_ring_no_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_gp_in_ring_yes_v(void) +{ + return 0x00000001; +} +static inline u32 sim_recv_ring_gp_in_ring_yes_f(void) +{ + return 0x800; +} +static inline u32 sim_recv_ring_addr_lo_s(void) +{ + return 20; +} +static inline u32 sim_recv_ring_addr_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 sim_recv_ring_addr_lo_m(void) +{ + return 0xfffff << 12; +} +static inline u32 sim_recv_ring_addr_lo_v(u32 r) +{ + return (r >> 12) & 0xfffff; +} +static inline u32 sim_recv_ring_addr_lo__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_addr_lo__init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_addr_lo__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_addr_lo__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_hi_r(void) +{ + return 0x00000014; +} +static inline u32 sim_recv_ring_hi_addr_s(void) +{ + return 20; +} +static inline u32 sim_recv_ring_hi_addr_f(u32 v) +{ + return (v & 0xfffff) << 0; +} +static inline u32 sim_recv_ring_hi_addr_m(void) +{ + return 0xfffff << 0; +} +static inline u32 sim_recv_ring_hi_addr_v(u32 r) +{ + return (r >> 0) & 0xfffff; +} +static inline u32 sim_recv_ring_hi_addr__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_hi_addr__init_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_ring_hi_addr__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_recv_ring_hi_addr__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_recv_put_r(void) +{ + return 0x00000018; +} +static inline u32 sim_recv_put_pointer_s(void) +{ + return 11; +} +static inline u32 sim_recv_put_pointer_f(u32 v) +{ + return (v & 0x7ff) << 3; +} +static inline u32 sim_recv_put_pointer_m(void) +{ + return 0x7ff << 3; +} +static inline u32 sim_recv_put_pointer_v(u32 r) +{ + return (r >> 3) & 0x7ff; +} +static inline u32 sim_recv_get_r(void) +{ + return 0x0000001c; +} +static inline u32 sim_recv_get_pointer_s(void) +{ + return 11; +} +static inline u32 sim_recv_get_pointer_f(u32 v) +{ + return (v & 0x7ff) << 3; +} +static inline u32 sim_recv_get_pointer_m(void) +{ + return 0x7ff << 3; +} +static inline u32 sim_recv_get_pointer_v(u32 r) +{ + return (r >> 3) & 0x7ff; +} +static inline u32 sim_config_r(void) +{ + return 0x00000020; +} +static inline u32 sim_config_mode_s(void) +{ + return 1; +} +static inline u32 sim_config_mode_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 sim_config_mode_m(void) +{ + return 0x1 << 0; +} +static inline u32 sim_config_mode_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 sim_config_mode_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_config_mode_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_config_mode_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_mode_enabled_f(void) +{ + return 0x1; +} +static inline u32 sim_config_channels_s(void) +{ + return 7; +} +static inline u32 sim_config_channels_f(u32 v) +{ + return (v & 0x7f) << 1; +} +static inline u32 sim_config_channels_m(void) +{ + return 0x7f << 1; +} +static inline u32 sim_config_channels_v(u32 r) +{ + return (r >> 1) & 0x7f; +} +static inline u32 sim_config_channels_none_v(void) +{ + return 0x00000000; +} +static inline u32 sim_config_channels_none_f(void) +{ + return 0x0; +} +static inline u32 sim_config_cached_only_s(void) +{ + return 1; +} +static inline u32 sim_config_cached_only_f(u32 v) +{ + return (v & 0x1) << 8; +} +static inline u32 sim_config_cached_only_m(void) +{ + return 0x1 << 8; +} +static inline u32 sim_config_cached_only_v(u32 r) +{ + return (r >> 8) & 0x1; +} +static inline u32 sim_config_cached_only_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_config_cached_only_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_config_cached_only_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_cached_only_enabled_f(void) +{ + return 0x100; +} +static inline u32 sim_config_validity_s(void) +{ + return 2; +} +static inline u32 sim_config_validity_f(u32 v) +{ + return (v & 0x3) << 9; +} +static inline u32 sim_config_validity_m(void) +{ + return 0x3 << 9; +} +static inline u32 sim_config_validity_v(u32 r) +{ + return (r >> 9) & 0x3; +} +static inline u32 sim_config_validity__init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_validity__init_f(void) +{ + return 0x200; +} +static inline u32 sim_config_validity_valid_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_validity_valid_f(void) +{ + return 0x200; +} +static inline u32 sim_config_simulation_s(void) +{ + return 2; +} +static inline u32 sim_config_simulation_f(u32 v) +{ + return (v & 0x3) << 12; +} +static inline u32 sim_config_simulation_m(void) +{ + return 0x3 << 12; +} +static inline u32 sim_config_simulation_v(u32 r) +{ + return (r >> 12) & 0x3; +} +static inline u32 sim_config_simulation_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_config_simulation_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_config_simulation_fmodel_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_simulation_fmodel_f(void) +{ + return 0x1000; +} +static inline u32 sim_config_simulation_rtlsim_v(void) +{ + return 0x00000002; +} +static inline u32 sim_config_simulation_rtlsim_f(void) +{ + return 0x2000; +} +static inline u32 sim_config_secondary_display_s(void) +{ + return 1; +} +static inline u32 sim_config_secondary_display_f(u32 v) +{ + return (v & 0x1) << 14; +} +static inline u32 sim_config_secondary_display_m(void) +{ + return 0x1 << 14; +} +static inline u32 sim_config_secondary_display_v(u32 r) +{ + return (r >> 14) & 0x1; +} +static inline u32 sim_config_secondary_display_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_config_secondary_display_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_config_secondary_display_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_config_secondary_display_enabled_f(void) +{ + return 0x4000; +} +static inline u32 sim_config_num_heads_s(void) +{ + return 8; +} +static inline u32 sim_config_num_heads_f(u32 v) +{ + return (v & 0xff) << 17; +} +static inline u32 sim_config_num_heads_m(void) +{ + return 0xff << 17; +} +static inline u32 sim_config_num_heads_v(u32 r) +{ + return (r >> 17) & 0xff; +} +static inline u32 sim_event_ring_r(void) +{ + return 0x00000030; +} +static inline u32 sim_event_ring_target_s(void) +{ + return 2; +} +static inline u32 sim_event_ring_target_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 sim_event_ring_target_m(void) +{ + return 0x3 << 0; +} +static inline u32 sim_event_ring_target_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 sim_event_ring_target_phys_init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_target_phys_init_f(void) +{ + return 0x1; +} +static inline u32 sim_event_ring_target_phys__init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_target_phys__init_f(void) +{ + return 0x1; +} +static inline u32 sim_event_ring_target_phys__prod_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_target_phys__prod_f(void) +{ + return 0x1; +} +static inline u32 sim_event_ring_target_phys_nvm_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_target_phys_nvm_f(void) +{ + return 0x1; +} +static inline u32 sim_event_ring_target_phys_pci_v(void) +{ + return 0x00000002; +} +static inline u32 sim_event_ring_target_phys_pci_f(void) +{ + return 0x2; +} +static inline u32 sim_event_ring_target_phys_pci_coherent_v(void) +{ + return 0x00000003; +} +static inline u32 sim_event_ring_target_phys_pci_coherent_f(void) +{ + return 0x3; +} +static inline u32 sim_event_ring_status_s(void) +{ + return 1; +} +static inline u32 sim_event_ring_status_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_event_ring_status_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_event_ring_status_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_event_ring_status_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_status_init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_status__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_status__init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_status__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_status__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_status_invalid_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_status_invalid_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_status_valid_f(void) +{ + return 0x8; +} +static inline u32 sim_event_ring_size_s(void) +{ + return 2; +} +static inline u32 sim_event_ring_size_f(u32 v) +{ + return (v & 0x3) << 4; +} +static inline u32 sim_event_ring_size_m(void) +{ + return 0x3 << 4; +} +static inline u32 sim_event_ring_size_v(u32 r) +{ + return (r >> 4) & 0x3; +} +static inline u32 sim_event_ring_size_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_size_init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_size__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_size__init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_size__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_size__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_size_4kb_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_size_4kb_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_size_8kb_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_size_8kb_f(void) +{ + return 0x10; +} +static inline u32 sim_event_ring_size_12kb_v(void) +{ + return 0x00000002; +} +static inline u32 sim_event_ring_size_12kb_f(void) +{ + return 0x20; +} +static inline u32 sim_event_ring_size_16kb_v(void) +{ + return 0x00000003; +} +static inline u32 sim_event_ring_size_16kb_f(void) +{ + return 0x30; +} +static inline u32 sim_event_ring_gp_in_ring_s(void) +{ + return 1; +} +static inline u32 sim_event_ring_gp_in_ring_f(u32 v) +{ + return (v & 0x1) << 11; +} +static inline u32 sim_event_ring_gp_in_ring_m(void) +{ + return 0x1 << 11; +} +static inline u32 sim_event_ring_gp_in_ring_v(u32 r) +{ + return (r >> 11) & 0x1; +} +static inline u32 sim_event_ring_gp_in_ring__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_gp_in_ring__init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_gp_in_ring__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_gp_in_ring__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_gp_in_ring_no_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_gp_in_ring_no_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_gp_in_ring_yes_v(void) +{ + return 0x00000001; +} +static inline u32 sim_event_ring_gp_in_ring_yes_f(void) +{ + return 0x800; +} +static inline u32 sim_event_ring_addr_lo_s(void) +{ + return 20; +} +static inline u32 sim_event_ring_addr_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 sim_event_ring_addr_lo_m(void) +{ + return 0xfffff << 12; +} +static inline u32 sim_event_ring_addr_lo_v(u32 r) +{ + return (r >> 12) & 0xfffff; +} +static inline u32 sim_event_ring_addr_lo__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_addr_lo__init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_addr_lo__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_addr_lo__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_hi_v(void) +{ + return 0x00000034; +} +static inline u32 sim_event_ring_hi_addr_s(void) +{ + return 20; +} +static inline u32 sim_event_ring_hi_addr_f(u32 v) +{ + return (v & 0xfffff) << 0; +} +static inline u32 sim_event_ring_hi_addr_m(void) +{ + return 0xfffff << 0; +} +static inline u32 sim_event_ring_hi_addr_v(u32 r) +{ + return (r >> 0) & 0xfffff; +} +static inline u32 sim_event_ring_hi_addr__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_hi_addr__init_f(void) +{ + return 0x0; +} +static inline u32 sim_event_ring_hi_addr__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_event_ring_hi_addr__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_event_put_r(void) +{ + return 0x00000038; +} +static inline u32 sim_event_put_pointer_s(void) +{ + return 30; +} +static inline u32 sim_event_put_pointer_f(u32 v) +{ + return (v & 0x3fffffff) << 2; +} +static inline u32 sim_event_put_pointer_m(void) +{ + return 0x3fffffff << 2; +} +static inline u32 sim_event_put_pointer_v(u32 r) +{ + return (r >> 2) & 0x3fffffff; +} +static inline u32 sim_event_get_r(void) +{ + return 0x0000003c; +} +static inline u32 sim_event_get_pointer_s(void) +{ + return 30; +} +static inline u32 sim_event_get_pointer_f(u32 v) +{ + return (v & 0x3fffffff) << 2; +} +static inline u32 sim_event_get_pointer_m(void) +{ + return 0x3fffffff << 2; +} +static inline u32 sim_event_get_pointer_v(u32 r) +{ + return (r >> 2) & 0x3fffffff; +} +static inline u32 sim_status_r(void) +{ + return 0x00000028; +} +static inline u32 sim_status_send_put_s(void) +{ + return 1; +} +static inline u32 sim_status_send_put_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 sim_status_send_put_m(void) +{ + return 0x1 << 0; +} +static inline u32 sim_status_send_put_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 sim_status_send_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_send_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_send_put_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_send_put_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_send_put_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_send_put_pending_f(void) +{ + return 0x1; +} +static inline u32 sim_status_send_get_s(void) +{ + return 1; +} +static inline u32 sim_status_send_get_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 sim_status_send_get_m(void) +{ + return 0x1 << 1; +} +static inline u32 sim_status_send_get_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 sim_status_send_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_send_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_send_get_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_send_get_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_send_get_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_send_get_pending_f(void) +{ + return 0x2; +} +static inline u32 sim_status_send_get_clear_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_send_get_clear_f(void) +{ + return 0x2; +} +static inline u32 sim_status_recv_put_s(void) +{ + return 1; +} +static inline u32 sim_status_recv_put_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 sim_status_recv_put_m(void) +{ + return 0x1 << 2; +} +static inline u32 sim_status_recv_put_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 sim_status_recv_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_recv_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_recv_put_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_recv_put_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_recv_put_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_recv_put_pending_f(void) +{ + return 0x4; +} +static inline u32 sim_status_recv_put_clear_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_recv_put_clear_f(void) +{ + return 0x4; +} +static inline u32 sim_status_recv_get_s(void) +{ + return 1; +} +static inline u32 sim_status_recv_get_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_status_recv_get_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_status_recv_get_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_status_recv_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_recv_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_recv_get_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_recv_get_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_recv_get_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_recv_get_pending_f(void) +{ + return 0x8; +} +static inline u32 sim_status_event_put_s(void) +{ + return 1; +} +static inline u32 sim_status_event_put_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 sim_status_event_put_m(void) +{ + return 0x1 << 4; +} +static inline u32 sim_status_event_put_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 sim_status_event_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_event_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_event_put_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_event_put_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_event_put_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_event_put_pending_f(void) +{ + return 0x10; +} +static inline u32 sim_status_event_put_clear_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_event_put_clear_f(void) +{ + return 0x10; +} +static inline u32 sim_status_event_get_s(void) +{ + return 1; +} +static inline u32 sim_status_event_get_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 sim_status_event_get_m(void) +{ + return 0x1 << 5; +} +static inline u32 sim_status_event_get_v(u32 r) +{ + return (r >> 5) & 0x1; +} +static inline u32 sim_status_event_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_event_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_status_event_get_idle_v(void) +{ + return 0x00000000; +} +static inline u32 sim_status_event_get_idle_f(void) +{ + return 0x0; +} +static inline u32 sim_status_event_get_pending_v(void) +{ + return 0x00000001; +} +static inline u32 sim_status_event_get_pending_f(void) +{ + return 0x20; +} +static inline u32 sim_control_r(void) +{ + return 0x0000002c; +} +static inline u32 sim_control_send_put_s(void) +{ + return 1; +} +static inline u32 sim_control_send_put_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 sim_control_send_put_m(void) +{ + return 0x1 << 0; +} +static inline u32 sim_control_send_put_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 sim_control_send_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_send_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_send_put_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_send_put_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_send_put_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_send_put_enabled_f(void) +{ + return 0x1; +} +static inline u32 sim_control_send_get_s(void) +{ + return 1; +} +static inline u32 sim_control_send_get_f(u32 v) +{ + return (v & 0x1) << 1; +} +static inline u32 sim_control_send_get_m(void) +{ + return 0x1 << 1; +} +static inline u32 sim_control_send_get_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 sim_control_send_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_send_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_send_get_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_send_get_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_send_get_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_send_get_enabled_f(void) +{ + return 0x2; +} +static inline u32 sim_control_recv_put_s(void) +{ + return 1; +} +static inline u32 sim_control_recv_put_f(u32 v) +{ + return (v & 0x1) << 2; +} +static inline u32 sim_control_recv_put_m(void) +{ + return 0x1 << 2; +} +static inline u32 sim_control_recv_put_v(u32 r) +{ + return (r >> 2) & 0x1; +} +static inline u32 sim_control_recv_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_recv_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_recv_put_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_recv_put_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_recv_put_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_recv_put_enabled_f(void) +{ + return 0x4; +} +static inline u32 sim_control_recv_get_s(void) +{ + return 1; +} +static inline u32 sim_control_recv_get_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_control_recv_get_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_control_recv_get_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_control_recv_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_recv_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_recv_get_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_recv_get_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_recv_get_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_recv_get_enabled_f(void) +{ + return 0x8; +} +static inline u32 sim_control_event_put_s(void) +{ + return 1; +} +static inline u32 sim_control_event_put_f(u32 v) +{ + return (v & 0x1) << 4; +} +static inline u32 sim_control_event_put_m(void) +{ + return 0x1 << 4; +} +static inline u32 sim_control_event_put_v(u32 r) +{ + return (r >> 4) & 0x1; +} +static inline u32 sim_control_event_put__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_event_put__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_event_put_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_event_put_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_event_put_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_event_put_enabled_f(void) +{ + return 0x10; +} +static inline u32 sim_control_event_get_s(void) +{ + return 1; +} +static inline u32 sim_control_event_get_f(u32 v) +{ + return (v & 0x1) << 5; +} +static inline u32 sim_control_event_get_m(void) +{ + return 0x1 << 5; +} +static inline u32 sim_control_event_get_v(u32 r) +{ + return (r >> 5) & 0x1; +} +static inline u32 sim_control_event_get__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_event_get__init_f(void) +{ + return 0x0; +} +static inline u32 sim_control_event_get_disabled_v(void) +{ + return 0x00000000; +} +static inline u32 sim_control_event_get_disabled_f(void) +{ + return 0x0; +} +static inline u32 sim_control_event_get_enabled_v(void) +{ + return 0x00000001; +} +static inline u32 sim_control_event_get_enabled_f(void) +{ + return 0x20; +} +static inline u32 sim_dma_r(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_target_s(void) +{ + return 2; +} +static inline u32 sim_dma_target_f(u32 v) +{ + return (v & 0x3) << 0; +} +static inline u32 sim_dma_target_m(void) +{ + return 0x3 << 0; +} +static inline u32 sim_dma_target_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 sim_dma_target_phys_init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_target_phys_init_f(void) +{ + return 0x1; +} +static inline u32 sim_dma_target_phys__init_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_target_phys__init_f(void) +{ + return 0x1; +} +static inline u32 sim_dma_target_phys__prod_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_target_phys__prod_f(void) +{ + return 0x1; +} +static inline u32 sim_dma_target_phys_nvm_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_target_phys_nvm_f(void) +{ + return 0x1; +} +static inline u32 sim_dma_target_phys_pci_v(void) +{ + return 0x00000002; +} +static inline u32 sim_dma_target_phys_pci_f(void) +{ + return 0x2; +} +static inline u32 sim_dma_target_phys_pci_coherent_v(void) +{ + return 0x00000003; +} +static inline u32 sim_dma_target_phys_pci_coherent_f(void) +{ + return 0x3; +} +static inline u32 sim_dma_status_s(void) +{ + return 1; +} +static inline u32 sim_dma_status_f(u32 v) +{ + return (v & 0x1) << 3; +} +static inline u32 sim_dma_status_m(void) +{ + return 0x1 << 3; +} +static inline u32 sim_dma_status_v(u32 r) +{ + return (r >> 3) & 0x1; +} +static inline u32 sim_dma_status_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_status_init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_status__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_status__init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_status__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_status__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_status_invalid_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_status_invalid_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_status_valid_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_status_valid_f(void) +{ + return 0x8; +} +static inline u32 sim_dma_size_s(void) +{ + return 2; +} +static inline u32 sim_dma_size_f(u32 v) +{ + return (v & 0x3) << 4; +} +static inline u32 sim_dma_size_m(void) +{ + return 0x3 << 4; +} +static inline u32 sim_dma_size_v(u32 r) +{ + return (r >> 4) & 0x3; +} +static inline u32 sim_dma_size_init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_size_init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_size__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_size__init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_size__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_size__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_size_4kb_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_size_4kb_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_size_8kb_v(void) +{ + return 0x00000001; +} +static inline u32 sim_dma_size_8kb_f(void) +{ + return 0x10; +} +static inline u32 sim_dma_size_12kb_v(void) +{ + return 0x00000002; +} +static inline u32 sim_dma_size_12kb_f(void) +{ + return 0x20; +} +static inline u32 sim_dma_size_16kb_v(void) +{ + return 0x00000003; +} +static inline u32 sim_dma_size_16kb_f(void) +{ + return 0x30; +} +static inline u32 sim_dma_addr_lo_s(void) +{ + return 20; +} +static inline u32 sim_dma_addr_lo_f(u32 v) +{ + return (v & 0xfffff) << 12; +} +static inline u32 sim_dma_addr_lo_m(void) +{ + return 0xfffff << 12; +} +static inline u32 sim_dma_addr_lo_v(u32 r) +{ + return (r >> 12) & 0xfffff; +} +static inline u32 sim_dma_addr_lo__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_addr_lo__init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_addr_lo__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_addr_lo__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_hi_r(void) +{ + return 0x00000004; +} +static inline u32 sim_dma_hi_addr_s(void) +{ + return 20; +} +static inline u32 sim_dma_hi_addr_f(u32 v) +{ + return (v & 0xfffff) << 0; +} +static inline u32 sim_dma_hi_addr_m(void) +{ + return 0xfffff << 0; +} +static inline u32 sim_dma_hi_addr_v(u32 r) +{ + return (r >> 0) & 0xfffff; +} +static inline u32 sim_dma_hi_addr__init_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_hi_addr__init_f(void) +{ + return 0x0; +} +static inline u32 sim_dma_hi_addr__prod_v(void) +{ + return 0x00000000; +} +static inline u32 sim_dma_hi_addr__prod_f(void) +{ + return 0x0; +} +static inline u32 sim_msg_signature_r(void) +{ + return 0x00000000; +} +static inline u32 sim_msg_signature_valid_v(void) +{ + return 0x43505256; +} +static inline u32 sim_msg_length_r(void) +{ + return 0x00000004; +} +static inline u32 sim_msg_function_r(void) +{ + return 0x00000008; +} +static inline u32 sim_msg_function_sim_escape_read_v(void) +{ + return 0x00000023; +} +static inline u32 sim_msg_function_sim_escape_write_v(void) +{ + return 0x00000024; +} +static inline u32 sim_msg_result_r(void) +{ + return 0x0000000c; +} +static inline u32 sim_msg_result_success_v(void) +{ + return 0x00000000; +} +static inline u32 sim_msg_result_rpc_pending_v(void) +{ + return 0xFFFFFFFF; +} +static inline u32 sim_msg_sequence_r(void) +{ + return 0x00000010; +} +static inline u32 sim_msg_spare_r(void) +{ + return 0x00000014; +} +static inline u32 sim_msg_spare__init_v(void) +{ + return 0x00000000; +} + +#endif /* __hw_sim_gk20a_h__ */ diff --git a/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h new file mode 100644 index 00000000..5d6397b4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_therm_gk20a_h_ +#define _hw_therm_gk20a_h_ + +static inline u32 therm_use_a_r(void) +{ + return 0x00020798; +} +static inline u32 therm_evt_ext_therm_0_r(void) +{ + return 0x00020700; +} +static inline u32 therm_evt_ext_therm_1_r(void) +{ + return 0x00020704; +} +static inline u32 therm_evt_ext_therm_2_r(void) +{ + return 0x00020708; +} +static inline u32 therm_evt_ba_w0_t1h_r(void) +{ + return 0x00020750; +} +static inline u32 therm_weight_1_r(void) +{ + return 0x00020024; +} +static inline u32 therm_peakpower_config1_r(u32 i) +{ + return 0x00020154 + i*4; +} +static inline u32 therm_peakpower_config1_window_period_2m_v(void) +{ + return 0x0000000f; +} +static inline u32 therm_peakpower_config1_window_period_2m_f(void) +{ + return 0xf; +} +static inline u32 therm_peakpower_config1_ba_sum_shift_s(void) +{ + return 6; +} +static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v) +{ + return (v & 0x3f) << 8; +} +static inline u32 therm_peakpower_config1_ba_sum_shift_m(void) +{ + return 0x3f << 8; +} +static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r) +{ + return (r >> 8) & 0x3f; +} +static inline u32 therm_peakpower_config1_ba_sum_shift_20_f(void) +{ + return 0x1400; +} +static inline u32 therm_peakpower_config1_window_en_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 therm_peakpower_config2_r(u32 i) +{ + return 0x00020170 + i*4; +} +static inline u32 therm_peakpower_config4_r(u32 i) +{ + return 0x000201c0 + i*4; +} +static inline u32 therm_peakpower_config6_r(u32 i) +{ + return 0x00020270 + i*4; +} +static inline u32 therm_peakpower_config8_r(u32 i) +{ + return 0x000202e8 + i*4; +} +static inline u32 therm_peakpower_config9_r(u32 i) +{ + return 0x000202f4 + i*4; +} +static inline u32 therm_config1_r(void) +{ + return 0x00020050; +} +static inline u32 therm_gate_ctrl_r(u32 i) +{ + return 0x00020200 + i*4; +} +static inline u32 therm_gate_ctrl_eng_clk_m(void) +{ + return 0x3 << 0; +} +static inline u32 therm_gate_ctrl_eng_clk_run_f(void) +{ + return 0x0; +} +static inline u32 therm_gate_ctrl_eng_clk_auto_f(void) +{ + return 0x1; +} +static inline u32 therm_gate_ctrl_eng_clk_stop_f(void) +{ + return 0x2; +} +static inline u32 therm_gate_ctrl_blk_clk_m(void) +{ + return 0x3 << 2; +} +static inline u32 therm_gate_ctrl_blk_clk_run_f(void) +{ + return 0x0; +} +static inline u32 therm_gate_ctrl_blk_clk_auto_f(void) +{ + return 0x4; +} +static inline u32 therm_gate_ctrl_eng_pwr_m(void) +{ + return 0x3 << 4; +} +static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void) +{ + return 0x10; +} +static inline u32 therm_gate_ctrl_eng_pwr_off_v(void) +{ + return 0x00000002; +} +static inline u32 therm_gate_ctrl_eng_pwr_off_f(void) +{ + return 0x20; +} +static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v) +{ + return (v & 0x1f) << 8; +} +static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void) +{ + return 0x1f << 8; +} +static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v) +{ + return (v & 0x7) << 13; +} +static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void) +{ + return 0x7 << 13; +} +static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v) +{ + return (v & 0xf) << 20; +} +static inline u32 therm_gate_ctrl_eng_delay_after_m(void) +{ + return 0xf << 20; +} +static inline u32 therm_fecs_idle_filter_r(void) +{ + return 0x00020288; +} +static inline u32 therm_fecs_idle_filter_value_m(void) +{ + return 0xffffffff << 0; +} +static inline u32 therm_hubmmu_idle_filter_r(void) +{ + return 0x0002028c; +} +static inline u32 therm_hubmmu_idle_filter_value_m(void) +{ + return 0xffffffff << 0; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h new file mode 100644 index 00000000..22bc50ac --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_timer_gk20a_h_ +#define _hw_timer_gk20a_h_ + +static inline u32 timer_pri_timeout_r(void) +{ + return 0x00009080; +} +static inline u32 timer_pri_timeout_period_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +static inline u32 timer_pri_timeout_period_m(void) +{ + return 0xffffff << 0; +} +static inline u32 timer_pri_timeout_period_v(u32 r) +{ + return (r >> 0) & 0xffffff; +} +static inline u32 timer_pri_timeout_en_f(u32 v) +{ + return (v & 0x1) << 31; +} +static inline u32 timer_pri_timeout_en_m(void) +{ + return 0x1 << 31; +} +static inline u32 timer_pri_timeout_en_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 timer_pri_timeout_en_en_enabled_f(void) +{ + return 0x80000000; +} +static inline u32 timer_pri_timeout_en_en_disabled_f(void) +{ + return 0x0; +} +static inline u32 timer_pri_timeout_save_0_r(void) +{ + return 0x00009084; +} +static inline u32 timer_pri_timeout_save_1_r(void) +{ + return 0x00009088; +} +static inline u32 timer_pri_timeout_fecs_errcode_r(void) +{ + return 0x0000908c; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h new file mode 100644 index 00000000..c2922814 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_top_gk20a_h_ +#define _hw_top_gk20a_h_ + +static inline u32 top_num_gpcs_r(void) +{ + return 0x00022430; +} +static inline u32 top_num_gpcs_value_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 top_tpc_per_gpc_r(void) +{ + return 0x00022434; +} +static inline u32 top_tpc_per_gpc_value_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 top_num_fbps_r(void) +{ + return 0x00022438; +} +static inline u32 top_num_fbps_value_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 top_fs_status_r(void) +{ + return 0x00022500; +} +static inline u32 top_device_info_r(u32 i) +{ + return 0x00022700 + i*4; +} +static inline u32 top_device_info__size_1_v(void) +{ + return 0x00000040; +} +static inline u32 top_device_info_chain_v(u32 r) +{ + return (r >> 31) & 0x1; +} +static inline u32 top_device_info_chain_enable_v(void) +{ + return 0x00000001; +} +static inline u32 top_device_info_engine_enum_v(u32 r) +{ + return (r >> 26) & 0xf; +} +static inline u32 top_device_info_runlist_enum_v(u32 r) +{ + return (r >> 21) & 0xf; +} +static inline u32 top_device_info_type_enum_v(u32 r) +{ + return (r >> 2) & 0x1fffffff; +} +static inline u32 top_device_info_type_enum_graphics_v(void) +{ + return 0x00000000; +} +static inline u32 top_device_info_type_enum_graphics_f(void) +{ + return 0x0; +} +static inline u32 top_device_info_type_enum_copy0_v(void) +{ + return 0x00000001; +} +static inline u32 top_device_info_type_enum_copy0_f(void) +{ + return 0x4; +} +static inline u32 top_device_info_entry_v(u32 r) +{ + return (r >> 0) & 0x3; +} +static inline u32 top_device_info_entry_not_valid_v(void) +{ + return 0x00000000; +} +static inline u32 top_device_info_entry_enum_v(void) +{ + return 0x00000002; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h new file mode 100644 index 00000000..826e9bd1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_trim_gk20a_h_ +#define _hw_trim_gk20a_h_ + +static inline u32 trim_sys_gpcpll_cfg_r(void) +{ + return 0x00137000; +} +static inline u32 trim_sys_gpcpll_cfg_enable_m(void) +{ + return 0x1 << 0; +} +static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void) +{ + return 0x1; +} +static inline u32 trim_sys_gpcpll_cfg_iddq_m(void) +{ + return 0x1 << 1; +} +static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r) +{ + return (r >> 1) & 0x1; +} +static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void) +{ + return 0x00000000; +} +static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void) +{ + return 0x1 << 4; +} +static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void) +{ + return 0x10; +} +static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r) +{ + return (r >> 17) & 0x1; +} +static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void) +{ + return 0x20000; +} +static inline u32 trim_sys_gpcpll_coeff_r(void) +{ + return 0x00137004; +} +static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v) +{ + return (v & 0xff) << 8; +} +static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void) +{ + return 0xff << 8; +} +static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r) +{ + return (r >> 8) & 0xff; +} +static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v) +{ + return (v & 0x3f) << 16; +} +static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r) +{ + return (r >> 16) & 0x3f; +} +static inline u32 trim_sys_sel_vco_r(void) +{ + return 0x00137100; +} +static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void) +{ + return 0x1 << 0; +} +static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void) +{ + return 0x00000000; +} +static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void) +{ + return 0x1; +} +static inline u32 trim_sys_gpc2clk_out_r(void) +{ + return 0x00137250; +} +static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void) +{ + return 6; +} +static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v) +{ + return (v & 0x3f) << 0; +} +static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void) +{ + return 0x3f << 0; +} +static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r) +{ + return (r >> 0) & 0x3f; +} +static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void) +{ + return 0x3c; +} +static inline u32 trim_sys_gpc2clk_out_vcodiv_s(void) +{ + return 6; +} +static inline u32 trim_sys_gpc2clk_out_vcodiv_f(u32 v) +{ + return (v & 0x3f) << 8; +} +static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void) +{ + return 0x3f << 8; +} +static inline u32 trim_sys_gpc2clk_out_vcodiv_v(u32 r) +{ + return (r >> 8) & 0x3f; +} +static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void) +{ + return 0x1 << 31; +} +static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void) +{ + return 0x80000000; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i) +{ + return 0x00134124 + i*512; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v) +{ + return (v & 0x3fff) << 0; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void) +{ + return 0x10000; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void) +{ + return 0x100000; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void) +{ + return 0x1000000; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i) +{ + return 0x00134128 + i*512; +} +static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r) +{ + return (r >> 0) & 0xfffff; +} +static inline u32 trim_sys_gpcpll_cfg2_r(void) +{ + return 0x0013700c; +} +static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void) +{ + return 0xff << 24; +} +static inline u32 trim_sys_gpcpll_cfg3_r(void) +{ + return 0x00137018; +} +static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v) +{ + return (v & 0xff) << 16; +} +static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void) +{ + return 0xff << 16; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void) +{ + return 0x0013701c; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void) +{ + return 0x1 << 22; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void) +{ + return 0x400000; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void) +{ + return 0x0; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void) +{ + return 0x1 << 31; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void) +{ + return 0x80000000; +} +static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void) +{ + return 0x0; +} +static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void) +{ + return 0x001328a0; +} +static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r) +{ + return (r >> 24) & 0x1; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.c b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c new file mode 100644 index 00000000..b0a74056 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c @@ -0,0 +1,424 @@ +/* + * drivers/video/tegra/host/gk20a/kind_gk20a.c + * + * GK20A memory kind management + * + * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include + +#include "hw_gmmu_gk20a.h" +#include "kind_gk20a.h" + +/* TBD: generate these from kind_macros.h */ + +/* TBD: not sure on the work creation for gk20a, doubtful */ +static inline bool gk20a_kind_work_creation_sked(u8 k) +{ + return false; +} +static inline bool gk20a_kind_work_creation_host(u8 k) +{ + return false; +} + +static inline bool gk20a_kind_work_creation(u8 k) +{ + return gk20a_kind_work_creation_sked(k) || + gk20a_kind_work_creation_host(k); +} + +/* note: taken from the !2cs_compression case */ +static inline bool gk20a_kind_supported(u8 k) +{ + return gk20a_kind_work_creation(k) || + (k == gmmu_pte_kind_invalid_v()) || + (k == gmmu_pte_kind_pitch_v()) || + (k >= gmmu_pte_kind_z16_v() && + k <= gmmu_pte_kind_z16_ms8_2c_v()) || + (k >= gmmu_pte_kind_z16_2z_v() && + k <= gmmu_pte_kind_z16_ms8_2z_v()) || + (k == gmmu_pte_kind_s8z24_v()) || + (k >= gmmu_pte_kind_s8z24_2cz_v() && + k <= gmmu_pte_kind_s8z24_ms8_2cz_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()) || + (k == gmmu_pte_kind_z24s8_v()) || + (k >= gmmu_pte_kind_z24s8_2cz_v() && + k <= gmmu_pte_kind_z24s8_ms8_2cz_v()) || + (k == gmmu_pte_kind_zf32_v()) || + (k >= gmmu_pte_kind_zf32_2cz_v() && + k <= gmmu_pte_kind_zf32_ms8_2cz_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v() && + k <= gmmu_pte_kind_zf32_x24s8_v()) || + (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && + k <= gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()) || + (k == gmmu_pte_kind_generic_16bx2_v()) || + (k == gmmu_pte_kind_c32_2c_v()) || + (k == gmmu_pte_kind_c32_2cra_v()) || + (k == gmmu_pte_kind_c32_ms2_2c_v()) || + (k == gmmu_pte_kind_c32_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms4_2c_v() && + k <= gmmu_pte_kind_c32_ms4_2cbr_v()) || + (k >= gmmu_pte_kind_c32_ms4_2cra_v() && + k <= gmmu_pte_kind_c64_2c_v()) || + (k == gmmu_pte_kind_c64_2cra_v()) || + (k == gmmu_pte_kind_c64_ms2_2c_v()) || + (k == gmmu_pte_kind_c64_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms4_2c_v() && + k <= gmmu_pte_kind_c64_ms4_2cbr_v()) || + (k >= gmmu_pte_kind_c64_ms4_2cra_v() && + k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()) || + (k == gmmu_pte_kind_pitch_no_swizzle_v()); + } + +static inline bool gk20a_kind_z(u8 k) +{ + return (k >= gmmu_pte_kind_z16_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) + /* || + (k >= gmmu_pte_kind_zv32_x24s8_2cszv_v() && + k <= gmmu_pte_kind_xf32_x24s8_ms16_2cs_v())*/; +} + +static inline bool gk20a_kind_c(u8 k) +{ + return gk20a_kind_work_creation(k) || + (k == gmmu_pte_kind_pitch_v()) || + (k == gmmu_pte_kind_generic_16bx2_v()) || + (k >= gmmu_pte_kind_c32_2c_v() && + k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c32_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms4_2c_v() && + k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c64_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms4_2c_v() && + k <= gmmu_pte_kind_pitch_no_swizzle_v()); +} + +static inline bool gk20a_kind_compressible(u8 k) +{ + return (k >= gmmu_pte_kind_z16_2c_v() && + k <= gmmu_pte_kind_z16_ms16_4cz_v()) || + (k >= gmmu_pte_kind_s8z24_1z_v() && + k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) || + (k >= gmmu_pte_kind_z24s8_1z_v() && + k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) || + (k >= gmmu_pte_kind_zf32_1z_v() && + k <= gmmu_pte_kind_zf32_ms16_2cz_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) || + (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() && + k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) || + (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && + k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c32_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms4_2c_v() && + k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c64_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms4_2c_v() && + k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()); +} + +static inline bool gk20a_kind_zbc(u8 k) +{ + return (k >= gmmu_pte_kind_z16_2c_v() && + k <= gmmu_pte_kind_z16_ms16_2c_v()) || + (k >= gmmu_pte_kind_z16_4cz_v() && + k <= gmmu_pte_kind_z16_ms16_4cz_v()) || + (k >= gmmu_pte_kind_s8z24_2cz_v() && + k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_2cs_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()) || + (k >= gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v() && + k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) || + (k >= gmmu_pte_kind_z24s8_2cs_v() && + k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_2cs_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()) || + (k >= gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v() && + k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) || + (k >= gmmu_pte_kind_zf32_2cs_v() && + k <= gmmu_pte_kind_zf32_ms16_2cz_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v() && + k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) || + (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v() && + k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) || + (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() && + k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) || + (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() && + k <= gmmu_pte_kind_c32_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms2_2c_v() && + k <= gmmu_pte_kind_c32_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c32_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms4_2c_v() && + k <= gmmu_pte_kind_c32_ms4_2cra_v()) || + (k >= gmmu_pte_kind_c32_ms8_ms16_2c_v() && + k <= gmmu_pte_kind_c64_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms2_2c_v() && + k <= gmmu_pte_kind_c64_ms2_2cbr_v()) || + (k == gmmu_pte_kind_c64_ms2_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms4_2c_v() && + k <= gmmu_pte_kind_c64_ms4_2cra_v()) || + (k >= gmmu_pte_kind_c64_ms8_ms16_2c_v() && + k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()); +} + +u8 gk20a_uc_kind_map[256]; +void gk20a_init_uncompressed_kind_map(void) +{ + int i; + for (i = 0; i < 256; i++) + gk20a_uc_kind_map[i] = gmmu_pte_kind_invalid_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z16_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_2z_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2z_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2z_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2z_v()] = + gmmu_pte_kind_z16_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cs_v()] = + gmmu_pte_kind_s8z24_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2zv_v()] = + gmmu_pte_kind_v8z24_ms4_vc4_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2zv_v()] = + gmmu_pte_kind_v8z24_ms8_vc8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2zv_v()] = + gmmu_pte_kind_v8z24_ms4_vc12_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()] = + gmmu_pte_kind_v8z24_ms8_vc24_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cz_v()] = + gmmu_pte_kind_z24s8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cz_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cz_v()] = + gmmu_pte_kind_zf32_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v()] = + gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v()] = + gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v()] = + gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()] = + gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v()] = + gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v()] = + gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v()] = + gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()] = + gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cszv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cs_v()] = + gmmu_pte_kind_zf32_x24s8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_c32_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_2cba_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_2bra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cbr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cba_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2bra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_2cbr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_2cba_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_2bra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cbr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cba_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2bra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2cra_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_2cr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2cr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2cr_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2c_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2cr_v()] = + gmmu_pte_kind_generic_16bx2_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2zv_v()] = + gmmu_pte_kind_z24v8_ms4_vc4_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2zv_v()] = + gmmu_pte_kind_z24v8_ms4_vc12_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2zv_v()] = + gmmu_pte_kind_z24v8_ms8_vc8_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()] = + gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2zv_v()] = + gmmu_pte_kind_z24v8_ms8_vc24_v(); + + gk20a_uc_kind_map[gmmu_pte_kind_x8c24_v()] = + gmmu_pte_kind_x8c24_v(); +} + +u16 gk20a_kind_attr[256]; +void gk20a_init_kind_attr(void) +{ + u16 k; + for (k = 0; k < 256; k++) { + gk20a_kind_attr[k] = 0; + if (gk20a_kind_supported((u8)k)) + gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED; + if (gk20a_kind_compressible((u8)k)) + gk20a_kind_attr[k] |= GK20A_KIND_ATTR_COMPRESSIBLE; + if (gk20a_kind_z((u8)k)) + gk20a_kind_attr[k] |= GK20A_KIND_ATTR_Z; + if (gk20a_kind_c((u8)k)) + gk20a_kind_attr[k] |= GK20A_KIND_ATTR_C; + if (gk20a_kind_zbc((u8)k)) + gk20a_kind_attr[k] |= GK20A_KIND_ATTR_ZBC; + } +} diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.h b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h new file mode 100644 index 00000000..93f011d4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h @@ -0,0 +1,67 @@ +/* + * drivers/video/tegra/host/gk20a/kind_gk20a.h + * + * GK20A memory kind management + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __KIND_GK20A_H__ +#define __KIND_GK20A_H__ + + +void gk20a_init_uncompressed_kind_map(void); +void gk20a_init_kind_attr(void); + +extern u16 gk20a_kind_attr[]; +#define NV_KIND_DEFAULT -1 + +#define GK20A_KIND_ATTR_SUPPORTED BIT(0) +#define GK20A_KIND_ATTR_COMPRESSIBLE BIT(1) +#define GK20A_KIND_ATTR_Z BIT(2) +#define GK20A_KIND_ATTR_C BIT(3) +#define GK20A_KIND_ATTR_ZBC BIT(4) + +static inline bool gk20a_kind_is_supported(u8 k) +{ + return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_SUPPORTED); +} +static inline bool gk20a_kind_is_compressible(u8 k) +{ + return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_COMPRESSIBLE); +} + +static inline bool gk20a_kind_is_z(u8 k) +{ + return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_Z); +} + +static inline bool gk20a_kind_is_c(u8 k) +{ + return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_C); +} +static inline bool gk20a_kind_is_zbc(u8 k) +{ + return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_ZBC); +} + +/* maps kind to its uncompressed version */ +extern u8 gk20a_uc_kind_map[]; +static inline u8 gk20a_get_uncompressed_kind(u8 k) +{ + return gk20a_uc_kind_map[k]; +} + +#endif /* __KIND_GK20A_H__ */ diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c new file mode 100644 index 00000000..cbb27cc7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c @@ -0,0 +1,243 @@ +/* + * drivers/video/tegra/host/gk20a/ltc_common.c + * + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "gk20a.h" +#include "gr_gk20a.h" + +static int gk20a_determine_L2_size_bytes(struct gk20a *g) +{ + const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch, + g->gpu_characteristics.impl); + u32 lts_per_ltc; + u32 ways; + u32 sets; + u32 bytes_per_line; + u32 active_ltcs; + u32 cache_size; + + u32 tmp; + u32 active_sets_value; + + tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); + ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); + + active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); + if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { + sets = 64; + } else if (active_sets_value == + ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { + sets = 32; + } else if (active_sets_value == + ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { + sets = 16; + } else { + dev_err(dev_from_gk20a(g), + "Unknown constant %u for active sets", + (unsigned)active_sets_value); + sets = 0; + } + + active_ltcs = g->gr.num_fbps; + + /* chip-specific values */ + switch (gpuid) { + case GK20A_GPUID_GK20A: + lts_per_ltc = 1; + bytes_per_line = 128; + break; + + default: + dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n", + (unsigned)gpuid); + lts_per_ltc = 0; + bytes_per_line = 0; + } + + cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; + + return cache_size; +} + +/* + * Set the maximum number of ways that can have the "EVIST_LAST" class. + */ +static void gk20a_ltc_set_max_ways_evict_last(struct gk20a *g, u32 max_ways) +{ + u32 mgmt_reg; + + mgmt_reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) & + ~ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0); + mgmt_reg |= ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways); + + gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(), mgmt_reg); +} + +/* + * Sets the ZBC color for the passed index. + */ +static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, + struct zbc_entry *color_val, + u32 index) +{ + u32 i; + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + for (i = 0; + i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), + color_val->color_l2[i]); +} + +/* + * Sets the ZBC depth for the passed index. + */ +static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, + struct zbc_entry *depth_val, + u32 index) +{ + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), + depth_val->depth); +} + +/* + * Clear the L2 ZBC color table for the passed index. + */ +static void gk20a_ltc_clear_zbc_color_entry(struct gk20a *g, u32 index) +{ + u32 i; + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + for (i = 0; + i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) + gk20a_writel(g, + ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), 0); +} + +/* + * Clear the L2 ZBC depth entry for the passed index. + */ +static void gk20a_ltc_clear_zbc_depth_entry(struct gk20a *g, u32 index) +{ + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0); +} + +static int gk20a_ltc_init_zbc(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 i, j; + + /* reset zbc clear */ + for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE - + GK20A_STARTOF_ZBC_TABLE; i++) { + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) & + ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) | + ltc_ltcs_ltss_dstg_zbc_index_address_f( + i + GK20A_STARTOF_ZBC_TABLE)); + for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++) + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0); + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0); + } + + gr_gk20a_clear_zbc_table(g, gr); + gr_gk20a_load_zbc_default_table(g, gr); + + return 0; +} + +static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 compbit_base_post_divide; + u64 compbit_base_post_multiply64; + u64 compbit_store_base_iova = + NV_MC_SMMU_VADDR_TRANSLATE(gr->compbit_store.base_iova); + u64 compbit_base_post_divide64 = (compbit_store_base_iova >> + ltc_ltcs_ltss_cbc_base_alignment_shift_v()); + + do_div(compbit_base_post_divide64, gr->num_fbps); + compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); + + compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * + gr->num_fbps) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + if (compbit_base_post_multiply64 < compbit_store_base_iova) + compbit_base_post_divide++; + + gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), + compbit_base_post_divide); + + gk20a_dbg(gpu_dbg_info | gpu_dbg_map | gpu_dbg_pte, + "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", + (u32)(compbit_store_base_iova >> 32), + (u32)(compbit_store_base_iova & 0xffffffff), + compbit_base_post_divide); +} + +/* Flushes the compression bit cache as well as "data". + * Note: the name here is a bit of a misnomer. ELPG uses this + * internally... but ELPG doesn't have to be on to do it manually. + */ +static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) +{ + u32 data; + s32 retry = 100; + + gk20a_dbg_fn(""); + + /* Make sure all previous writes are committed to the L2. There's no + guarantee that writes are to DRAM. This will be a sysmembar internal + to the L2. */ + gk20a_writel(g, ltc_ltss_g_elpg_r(), + ltc_ltss_g_elpg_flush_pending_f()); + do { + data = gk20a_readl(g, ltc_ltss_g_elpg_r()); + + if (ltc_ltss_g_elpg_flush_v(data) == + ltc_ltss_g_elpg_flush_pending_v()) { + gk20a_dbg_info("g_elpg_flush 0x%x", data); + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "g_elpg_flush too many retries"); + +} diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c new file mode 100644 index 00000000..08aedecd --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -0,0 +1,203 @@ +/* + * drivers/video/tegra/host/gk20a/ltc_gk20a.c + * + * GK20A Graphics + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "hw_ltc_gk20a.h" +#include "hw_proj_gk20a.h" + +#include "ltc_common.c" + +static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + struct device *d = dev_from_gk20a(g); + DEFINE_DMA_ATTRS(attrs); + dma_addr_t iova; + + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 128KB */ + u32 max_comptag_lines = max_size << 3; + + u32 hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + + u32 cbc_param = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + u32 comptags_per_cacheline = + ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); + u32 slices_per_fbp = + ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param); + u32 cacheline_size = + 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); + + u32 compbit_backing_size; + + gk20a_dbg_fn(""); + + if (max_comptag_lines == 0) { + gr->compbit_store.size = 0; + return 0; + } + + if (max_comptag_lines > hw_max_comptag_lines) + max_comptag_lines = hw_max_comptag_lines; + + /* no hybird fb */ + compbit_backing_size = + DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * + cacheline_size * slices_per_fbp * gr->num_fbps; + + /* aligned to 2KB * num_fbps */ + compbit_backing_size += + gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, 64*1024); + + max_comptag_lines = + (compbit_backing_size * comptags_per_cacheline) / + cacheline_size * slices_per_fbp * gr->num_fbps; + + if (max_comptag_lines > hw_max_comptag_lines) + max_comptag_lines = hw_max_comptag_lines; + + gk20a_dbg_info("compbit backing store size : %d", + compbit_backing_size); + gk20a_dbg_info("max comptag lines : %d", + max_comptag_lines); + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + gr->compbit_store.size = compbit_backing_size; + gr->compbit_store.pages = dma_alloc_attrs(d, gr->compbit_store.size, + &iova, GFP_KERNEL, &attrs); + if (!gr->compbit_store.pages) { + gk20a_err(dev_from_gk20a(g), "failed to allocate" + "backing store for compbit : size %d", + compbit_backing_size); + return -ENOMEM; + } + gr->compbit_store.base_iova = iova; + + gk20a_allocator_init(&gr->comp_tags, "comptag", + 1, /* start */ + max_comptag_lines - 1, /* length*/ + 1); /* align */ + + return 0; +} + +static int gk20a_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + u32 fbp, slice, ctrl1, val; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); + u32 delay = GR_IDLE_CHECK_DEFAULT; + u32 slices_per_fbp = + ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + + gk20a_dbg_fn(""); + + if (gr->compbit_store.size == 0) + return 0; + + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | + ltc_ltcs_ltss_cbc_ctrl1_clear_active_f()); + + for (fbp = 0; fbp < gr->num_fbps; fbp++) { + for (slice = 0; slice < slices_per_fbp; slice++) { + + delay = GR_IDLE_CHECK_DEFAULT; + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + fbp * proj_ltc_stride_v() + + slice * proj_lts_stride_v(); + + do { + val = gk20a_readl(g, ctrl1); + if (ltc_ltcs_ltss_cbc_ctrl1_clear_v(val) != + ltc_ltcs_ltss_cbc_ctrl1_clear_active_v()) + break; + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, + GR_IDLE_CHECK_MAX); + + } while (time_before(jiffies, end_jiffies) || + !tegra_platform_is_silicon()); + + if (!time_before(jiffies, end_jiffies)) { + gk20a_err(dev_from_gk20a(g), + "comp tag clear timeout\n"); + return -EBUSY; + } + } + } + + return 0; +} + + +#ifdef CONFIG_DEBUG_FS +static void gk20a_ltc_sync_debugfs(struct gk20a *g) +{ + u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); + + spin_lock(&g->debugfs_lock); + if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { + u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); + if (g->mm.ltc_enabled_debug) + /* bypass disabled (normal caching ops)*/ + reg &= ~reg_f; + else + /* bypass enabled (no caching) */ + reg |= reg_f; + + gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); + g->mm.ltc_enabled = g->mm.ltc_enabled_debug; + } + spin_unlock(&g->debugfs_lock); +} +#endif + +void gk20a_init_ltc(struct gpu_ops *gops) +{ + gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; + gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last; + gops->ltc.init_comptags = gk20a_ltc_init_comptags; + gops->ltc.clear_comptags = gk20a_ltc_clear_comptags; + gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; + gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; + gops->ltc.clear_zbc_color_entry = gk20a_ltc_clear_zbc_color_entry; + gops->ltc.clear_zbc_depth_entry = gk20a_ltc_clear_zbc_depth_entry; + gops->ltc.init_zbc = gk20a_ltc_init_zbc; + gops->ltc.init_cbc = gk20a_ltc_init_cbc; +#ifdef CONFIG_DEBUG_FS + gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs; +#endif + gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; +} diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h new file mode 100644 index 00000000..208811b2 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h @@ -0,0 +1,21 @@ +/* + * GK20A L2 + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVHOST_GK20A_LTC +#define _NVHOST_GK20A_LTC +struct gk20a; + +void gk20a_init_ltc(struct gpu_ops *gops); +#endif diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c new file mode 100644 index 00000000..b22df5e8 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -0,0 +1,2984 @@ +/* + * drivers/video/tegra/host/gk20a/mm_gk20a.c + * + * GK20A memory management + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "mm_gk20a.h" +#include "hw_gmmu_gk20a.h" +#include "hw_fb_gk20a.h" +#include "hw_bus_gk20a.h" +#include "hw_ram_gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_flush_gk20a.h" +#include "hw_ltc_gk20a.h" + +#include "kind_gk20a.h" + +#ifdef CONFIG_ARM64 +#define outer_flush_range(a, b) +#define __cpuc_flush_dcache_area __flush_dcache_area +#endif + +/* + * GPU mapping life cycle + * ====================== + * + * Kernel mappings + * --------------- + * + * Kernel mappings are created through vm.map(..., false): + * + * - Mappings to the same allocations are reused and refcounted. + * - This path does not support deferred unmapping (i.e. kernel must wait for + * all hw operations on the buffer to complete before unmapping). + * - References to dmabuf are owned and managed by the (kernel) clients of + * the gk20a_vm layer. + * + * + * User space mappings + * ------------------- + * + * User space mappings are created through as.map_buffer -> vm.map(..., true): + * + * - Mappings to the same allocations are reused and refcounted. + * - This path supports deferred unmapping (i.e. we delay the actual unmapping + * until all hw operations have completed). + * - References to dmabuf are owned and managed by the vm_gk20a + * layer itself. vm.map acquires these refs, and sets + * mapped_buffer->own_mem_ref to record that we must release the refs when we + * actually unmap. + * + */ + +static inline int vm_aspace_id(struct vm_gk20a *vm) +{ + /* -1 is bar1 or pmu, etc. */ + return vm->as_share ? vm->as_share->id : -1; +} +static inline u32 hi32(u64 f) +{ + return (u32)(f >> 32); +} +static inline u32 lo32(u64 f) +{ + return (u32)(f & 0xffffffff); +} + +#define FLUSH_CPU_DCACHE(va, pa, size) \ + do { \ + __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ + outer_flush_range(pa, pa + (size_t)(size)); \ + } while (0) + +static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); +static struct mapped_buffer_node *find_mapped_buffer_locked( + struct rb_root *root, u64 addr); +static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( + struct rb_root *root, struct dma_buf *dmabuf, + u32 kind); +static int update_gmmu_ptes_locked(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + struct sg_table *sgt, + u64 first_vaddr, u64 last_vaddr, + u8 kind_v, u32 ctag_offset, bool cacheable, + int rw_flag); +static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); +static void gk20a_vm_remove_support(struct vm_gk20a *vm); + + +/* note: keep the page sizes sorted lowest to highest here */ +static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; +static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; +static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, + 0x1ffffLL }; +static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; + +struct gk20a_comptags { + u32 offset; + u32 lines; +}; + +struct gk20a_dmabuf_priv { + struct mutex lock; + + struct gk20a_allocator *comptag_allocator; + struct gk20a_comptags comptags; + + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + int pin_count; +}; + +static void gk20a_mm_delete_priv(void *_priv) +{ + struct gk20a_dmabuf_priv *priv = _priv; + if (!priv) + return; + + if (priv->comptags.lines) { + BUG_ON(!priv->comptag_allocator); + priv->comptag_allocator->free(priv->comptag_allocator, + priv->comptags.offset, + priv->comptags.lines); + } + + kfree(priv); +} + +struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf) +{ + struct gk20a_dmabuf_priv *priv; + + priv = dma_buf_get_drvdata(dmabuf, dev); + if (WARN_ON(!priv)) + return ERR_PTR(-EINVAL); + + mutex_lock(&priv->lock); + + if (priv->pin_count == 0) { + priv->attach = dma_buf_attach(dmabuf, dev); + if (IS_ERR(priv->attach)) { + mutex_unlock(&priv->lock); + return (struct sg_table *)priv->attach; + } + + priv->sgt = dma_buf_map_attachment(priv->attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(priv->sgt)) { + dma_buf_detach(dmabuf, priv->attach); + mutex_unlock(&priv->lock); + return priv->sgt; + } + } + + priv->pin_count++; + mutex_unlock(&priv->lock); + return priv->sgt; +} + +void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, + struct sg_table *sgt) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); + dma_addr_t dma_addr; + + if (IS_ERR(priv) || !priv) + return; + + mutex_lock(&priv->lock); + WARN_ON(priv->sgt != sgt); + priv->pin_count--; + WARN_ON(priv->pin_count < 0); + dma_addr = sg_dma_address(priv->sgt->sgl); + if (priv->pin_count == 0) { + dma_buf_unmap_attachment(priv->attach, priv->sgt, + DMA_BIDIRECTIONAL); + dma_buf_detach(dmabuf, priv->attach); + } + mutex_unlock(&priv->lock); +} + + +static void gk20a_get_comptags(struct device *dev, + struct dma_buf *dmabuf, + struct gk20a_comptags *comptags) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); + + if (!comptags) + return; + + if (!priv) { + comptags->lines = 0; + comptags->offset = 0; + return; + } + + *comptags = priv->comptags; +} + +static int gk20a_alloc_comptags(struct device *dev, + struct dma_buf *dmabuf, + struct gk20a_allocator *allocator, + int lines) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); + u32 offset = 0; + int err; + + if (!priv) + return -ENOSYS; + + if (!lines) + return -EINVAL; + + /* store the allocator so we can use it when we free the ctags */ + priv->comptag_allocator = allocator; + err = allocator->alloc(allocator, &offset, lines); + if (!err) { + priv->comptags.lines = lines; + priv->comptags.offset = offset; + } + return err; +} + + + + +static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) +{ + gk20a_dbg_fn(""); + if (g->ops.fb.reset) + g->ops.fb.reset(g); + + if (g->ops.fb.init_fs_state) + g->ops.fb.init_fs_state(g); + + return 0; +} + +void gk20a_remove_mm_support(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + struct device *d = dev_from_gk20a(g); + struct vm_gk20a *vm = &mm->bar1.vm; + struct inst_desc *inst_block = &mm->bar1.inst_block; + + gk20a_dbg_fn(""); + + if (inst_block->cpuva) + dma_free_coherent(d, inst_block->size, + inst_block->cpuva, inst_block->iova); + inst_block->cpuva = NULL; + inst_block->iova = 0; + + gk20a_vm_remove_support(vm); +} + +int gk20a_init_mm_setup_sw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int i; + + gk20a_dbg_fn(""); + + if (mm->sw_ready) { + gk20a_dbg_fn("skip init"); + return 0; + } + + mm->g = g; + mutex_init(&mm->tlb_lock); + mutex_init(&mm->l2_op_lock); + mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; + mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; + mm->pde_stride = mm->big_page_size << 10; + mm->pde_stride_shift = ilog2(mm->pde_stride); + BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ + + for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) { + + u32 num_ptes, pte_space, num_pages; + + /* assuming "full" page tables */ + num_ptes = mm->pde_stride / gmmu_page_sizes[i]; + + pte_space = num_ptes * gmmu_pte__size_v(); + /* allocate whole pages */ + pte_space = roundup(pte_space, PAGE_SIZE); + + num_pages = pte_space / PAGE_SIZE; + /* make sure "order" is viable */ + BUG_ON(!is_power_of_2(num_pages)); + + mm->page_table_sizing[i].num_ptes = num_ptes; + mm->page_table_sizing[i].order = ilog2(num_pages); + } + + /*TBD: make channel vm size configurable */ + mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; + + gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); + + gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", + gmmu_page_sizes[gmmu_page_size_small] >> 10, + (mm->page_table_sizing[gmmu_page_size_small].num_ptes * + gmmu_pte__size_v()) >> 10); + + gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", + gmmu_page_sizes[gmmu_page_size_big] >> 10, + (mm->page_table_sizing[gmmu_page_size_big].num_ptes * + gmmu_pte__size_v()) >> 10); + + + gk20a_init_bar1_vm(mm); + + mm->remove_support = gk20a_remove_mm_support; + mm->sw_ready = true; + + gk20a_dbg_fn("done"); + return 0; +} + +/* make sure gk20a_init_mm_support is called before */ +static int gk20a_init_mm_setup_hw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + struct inst_desc *inst_block = &mm->bar1.inst_block; + phys_addr_t inst_pa = inst_block->cpu_pa; + + gk20a_dbg_fn(""); + + /* set large page size in fb + * note this is very early on, can we defer it ? */ + { + u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); + + if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K) + fb_mmu_ctrl = (fb_mmu_ctrl & + ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | + fb_mmu_ctrl_vm_pg_size_128kb_f(); + else + BUG_ON(1); /* no support/testing for larger ones yet */ + + gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); + } + + inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); + gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); + + /* this is very early in init... can we defer this? */ + { + gk20a_writel(g, bus_bar1_block_r(), + bus_bar1_block_target_vid_mem_f() | + bus_bar1_block_mode_virtual_f() | + bus_bar1_block_ptr_f(inst_pa)); + } + + gk20a_dbg_fn("done"); + return 0; +} + +int gk20a_init_mm_support(struct gk20a *g) +{ + u32 err; + + err = gk20a_init_mm_reset_enable_hw(g); + if (err) + return err; + + err = gk20a_init_mm_setup_sw(g); + if (err) + return err; + + err = gk20a_init_mm_setup_hw(g); + if (err) + return err; + + return err; +} + +#ifdef CONFIG_GK20A_PHYS_PAGE_TABLES +static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, + void **handle, + struct sg_table **sgt, + size_t *size) +{ + u32 num_pages = 1 << order; + u32 len = num_pages * PAGE_SIZE; + int err; + struct page *pages; + + gk20a_dbg_fn(""); + + pages = alloc_pages(GFP_KERNEL, order); + if (!pages) { + gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n"); + goto err_out; + } + *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); + goto err_alloced; + } + err = sg_alloc_table(*sgt, 1, GFP_KERNEL); + if (err) { + gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n"); + goto err_sg_table; + } + sg_set_page((*sgt)->sgl, pages, len, 0); + *handle = page_address(pages); + memset(*handle, 0, len); + *size = len; + FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len); + + return 0; + +err_sg_table: + kfree(*sgt); +err_alloced: + __free_pages(pages, order); +err_out: + return -ENOMEM; +} + +static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, + struct sg_table *sgt, u32 order, + size_t size) +{ + gk20a_dbg_fn(""); + BUG_ON(sgt == NULL); + free_pages((unsigned long)handle, order); + sg_free_table(sgt); + kfree(sgt); +} + +static int map_gmmu_pages(void *handle, struct sg_table *sgt, + void **va, size_t size) +{ + FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); + *va = handle; + return 0; +} + +static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) +{ + FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); +} +#else +static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, + void **handle, + struct sg_table **sgt, + size_t *size) +{ + struct device *d = dev_from_vm(vm); + u32 num_pages = 1 << order; + u32 len = num_pages * PAGE_SIZE; + dma_addr_t iova; + DEFINE_DMA_ATTRS(attrs); + struct page **pages; + int err = 0; + + gk20a_dbg_fn(""); + + *size = len; + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs); + if (!pages) { + gk20a_err(d, "memory allocation failed\n"); + goto err_out; + } + + err = gk20a_get_sgtable_from_pages(d, sgt, pages, + iova, len); + if (err) { + gk20a_err(d, "sgt allocation failed\n"); + goto err_free; + } + + *handle = (void *)pages; + + return 0; + +err_free: + dma_free_attrs(d, len, pages, iova, &attrs); + pages = NULL; + iova = 0; +err_out: + return -ENOMEM; +} + +static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, + struct sg_table *sgt, u32 order, + size_t size) +{ + struct device *d = dev_from_vm(vm); + u64 iova; + DEFINE_DMA_ATTRS(attrs); + struct page **pages = (struct page **)handle; + + gk20a_dbg_fn(""); + BUG_ON(sgt == NULL); + + iova = sg_dma_address(sgt->sgl); + + gk20a_free_sgtable(&sgt); + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + dma_free_attrs(d, size, pages, iova, &attrs); + pages = NULL; + iova = 0; +} + +static int map_gmmu_pages(void *handle, struct sg_table *sgt, + void **kva, size_t size) +{ + int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = (struct page **)handle; + gk20a_dbg_fn(""); + + *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL)); + if (!(*kva)) + return -ENOMEM; + + return 0; +} + +static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) +{ + gk20a_dbg_fn(""); + vunmap(va); +} +#endif + +/* allocate a phys contig region big enough for a full + * sized gmmu page table for the given gmmu_page_size. + * the whole range is zeroed so it's "invalid"/will fault + */ + +static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a gmmu_pgsz_idx, + struct page_table_gk20a *pte) +{ + int err; + u32 pte_order; + void *handle = NULL; + struct sg_table *sgt; + size_t size; + + gk20a_dbg_fn(""); + + /* allocate enough pages for the table */ + pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order; + + err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); + if (err) + return err; + + gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d", + pte, gk20a_mm_iova_addr(sgt->sgl), pte_order); + + pte->ref = handle; + pte->sgt = sgt; + pte->size = size; + + return 0; +} + +/* given address range (inclusive) determine the pdes crossed */ +static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, + u64 addr_lo, u64 addr_hi, + u32 *pde_lo, u32 *pde_hi) +{ + *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift); + *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift); + gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", + addr_lo, addr_hi, vm->mm->pde_stride_shift); + gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", + *pde_lo, *pde_hi); +} + +static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i) +{ + return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); +} + +static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm, + u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) +{ + u32 ret; + /* mask off pde part */ + addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1)); + /* shift over to get pte index. note assumption that pte index + * doesn't leak over into the high 32b */ + ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]); + + gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); + return ret; +} + +static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, + u32 *pte_offset) +{ + /* ptes are 8B regardless of pagesize */ + /* pte space pages are 4KB. so 512 ptes per 4KB page*/ + *pte_page = i >> 9; + + /* this offset is a pte offset, not a byte offset */ + *pte_offset = i & ((1<<9)-1); + + gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", + i, *pte_page, *pte_offset); +} + + +/* + * given a pde index/page table number make sure it has + * backing store and if not go ahead allocate it and + * record it in the appropriate pde + */ +static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, + u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) +{ + int err; + struct page_table_gk20a *pte = + vm->pdes.ptes[gmmu_pgsz_idx] + i; + + gk20a_dbg_fn(""); + + /* if it's already in place it's valid */ + if (pte->ref) + return 0; + + gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", + gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); + + err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); + if (err) + return err; + + /* rewrite pde */ + update_gmmu_pde_locked(vm, i); + + return 0; +} + +static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, + u64 addr) +{ + struct vm_reserved_va_node *va_node; + list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list) + if (addr >= va_node->vaddr_start && + addr < (u64)va_node->vaddr_start + (u64)va_node->size) + return va_node; + + return NULL; +} + +int gk20a_vm_get_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node ***mapped_buffers, + int *num_buffers) +{ + struct mapped_buffer_node *mapped_buffer; + struct mapped_buffer_node **buffer_list; + struct rb_node *node; + int i = 0; + + mutex_lock(&vm->update_gmmu_lock); + + buffer_list = kzalloc(sizeof(*buffer_list) * + vm->num_user_mapped_buffers, GFP_KERNEL); + if (!buffer_list) { + mutex_unlock(&vm->update_gmmu_lock); + return -ENOMEM; + } + + node = rb_first(&vm->mapped_buffers); + while (node) { + mapped_buffer = + container_of(node, struct mapped_buffer_node, node); + if (mapped_buffer->user_mapped) { + buffer_list[i] = mapped_buffer; + kref_get(&mapped_buffer->ref); + i++; + } + node = rb_next(&mapped_buffer->node); + } + + BUG_ON(i != vm->num_user_mapped_buffers); + + *num_buffers = vm->num_user_mapped_buffers; + *mapped_buffers = buffer_list; + + mutex_unlock(&vm->update_gmmu_lock); + + return 0; +} + +static void gk20a_vm_unmap_locked_kref(struct kref *ref) +{ + struct mapped_buffer_node *mapped_buffer = + container_of(ref, struct mapped_buffer_node, ref); + gk20a_vm_unmap_locked(mapped_buffer); +} + +void gk20a_vm_put_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node **mapped_buffers, + int num_buffers) +{ + int i; + + mutex_lock(&vm->update_gmmu_lock); + + for (i = 0; i < num_buffers; ++i) + kref_put(&mapped_buffers[i]->ref, + gk20a_vm_unmap_locked_kref); + + mutex_unlock(&vm->update_gmmu_lock); + + kfree(mapped_buffers); +} + +static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) +{ + struct device *d = dev_from_vm(vm); + int retries; + struct mapped_buffer_node *mapped_buffer; + + mutex_lock(&vm->update_gmmu_lock); + + mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); + if (!mapped_buffer) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, "invalid addr to unmap 0x%llx", offset); + return; + } + + if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + mutex_unlock(&vm->update_gmmu_lock); + + retries = 1000; + while (retries) { + if (atomic_read(&mapped_buffer->ref.refcount) == 1) + break; + retries--; + udelay(50); + } + if (!retries) + gk20a_err(d, "sync-unmap failed on 0x%llx", + offset); + mutex_lock(&vm->update_gmmu_lock); + } + + mapped_buffer->user_mapped--; + if (mapped_buffer->user_mapped == 0) + vm->num_user_mapped_buffers--; + kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); + + mutex_unlock(&vm->update_gmmu_lock); +} + +static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, + u64 size, + enum gmmu_pgsz_gk20a gmmu_pgsz_idx) + +{ + struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; + int err; + u64 offset; + u32 start_page_nr = 0, num_pages; + u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx]; + + if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) { + dev_warn(dev_from_vm(vm), + "invalid page size requested in gk20a vm alloc"); + return -EINVAL; + } + + if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { + dev_warn(dev_from_vm(vm), + "unsupportd page size requested"); + return -EINVAL; + + } + + /* be certain we round up to gmmu_page_size if needed */ + /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ + size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); + + gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, + gmmu_page_sizes[gmmu_pgsz_idx]>>10); + + /* The vma allocator represents page accounting. */ + num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx]; + + err = vma->alloc(vma, &start_page_nr, num_pages); + + if (err) { + gk20a_err(dev_from_vm(vm), + "%s oom: sz=0x%llx", vma->name, size); + return 0; + } + + offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx]; + gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); + + return offset; +} + +static int gk20a_vm_free_va(struct vm_gk20a *vm, + u64 offset, u64 size, + enum gmmu_pgsz_gk20a pgsz_idx) +{ + struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; + u32 page_size = gmmu_page_sizes[pgsz_idx]; + u32 page_shift = gmmu_page_shifts[pgsz_idx]; + u32 start_page_nr, num_pages; + int err; + + gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", + vma->name, offset, size); + + start_page_nr = (u32)(offset >> page_shift); + num_pages = (u32)((size + page_size - 1) >> page_shift); + + err = vma->free(vma, start_page_nr, num_pages); + if (err) { + gk20a_err(dev_from_vm(vm), + "not found: offset=0x%llx, sz=0x%llx", + offset, size); + } + + return err; +} + +static int insert_mapped_buffer(struct rb_root *root, + struct mapped_buffer_node *mapped_buffer) +{ + struct rb_node **new_node = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new_node) { + struct mapped_buffer_node *cmp_with = + container_of(*new_node, struct mapped_buffer_node, + node); + + parent = *new_node; + + if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */ + new_node = &((*new_node)->rb_left); + else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */ + new_node = &((*new_node)->rb_right); + else + return -EINVAL; /* no fair dup'ing */ + } + + /* Add new node and rebalance tree. */ + rb_link_node(&mapped_buffer->node, parent, new_node); + rb_insert_color(&mapped_buffer->node, root); + + return 0; +} + +static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( + struct rb_root *root, struct dma_buf *dmabuf, + u32 kind) +{ + struct rb_node *node = rb_first(root); + while (node) { + struct mapped_buffer_node *mapped_buffer = + container_of(node, struct mapped_buffer_node, node); + if (mapped_buffer->dmabuf == dmabuf && + kind == mapped_buffer->kind) + return mapped_buffer; + node = rb_next(&mapped_buffer->node); + } + return 0; +} + +static struct mapped_buffer_node *find_mapped_buffer_locked( + struct rb_root *root, u64 addr) +{ + + struct rb_node *node = root->rb_node; + while (node) { + struct mapped_buffer_node *mapped_buffer = + container_of(node, struct mapped_buffer_node, node); + if (mapped_buffer->addr > addr) /* u64 cmp */ + node = node->rb_left; + else if (mapped_buffer->addr != addr) /* u64 cmp */ + node = node->rb_right; + else + return mapped_buffer; + } + return 0; +} + +static struct mapped_buffer_node *find_mapped_buffer_range_locked( + struct rb_root *root, u64 addr) +{ + struct rb_node *node = root->rb_node; + while (node) { + struct mapped_buffer_node *m = + container_of(node, struct mapped_buffer_node, node); + if (m->addr <= addr && m->addr + m->size > addr) + return m; + else if (m->addr > addr) /* u64 cmp */ + node = node->rb_left; + else + node = node->rb_right; + } + return 0; +} + +#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0])) + +struct buffer_attrs { + struct sg_table *sgt; + u64 size; + u64 align; + u32 ctag_offset; + u32 ctag_lines; + int pgsz_idx; + u8 kind_v; + u8 uc_kind_v; +}; + +static void gmmu_select_page_size(struct buffer_attrs *bfr) +{ + int i; + /* choose the biggest first (top->bottom) */ + for (i = (gmmu_nr_page_sizes-1); i >= 0; i--) + if (!(gmmu_page_offset_masks[i] & bfr->align)) { + /* would like to add this too but nvmap returns the + * original requested size not the allocated size. + * (!(gmmu_page_offset_masks[i] & bfr->size)) */ + bfr->pgsz_idx = i; + break; + } +} + +static int setup_buffer_kind_and_compression(struct device *d, + u32 flags, + struct buffer_attrs *bfr, + enum gmmu_pgsz_gk20a pgsz_idx) +{ + bool kind_compressible; + + if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) + bfr->kind_v = gmmu_pte_kind_pitch_v(); + + if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) { + gk20a_err(d, "kind 0x%x not supported", bfr->kind_v); + return -EINVAL; + } + + bfr->uc_kind_v = gmmu_pte_kind_invalid_v(); + /* find a suitable uncompressed kind if it becomes necessary later */ + kind_compressible = gk20a_kind_is_compressible(bfr->kind_v); + if (kind_compressible) { + bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v); + if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) { + /* shouldn't happen, but it is worth cross-checking */ + gk20a_err(d, "comptag kind 0x%x can't be" + " downgraded to uncompressed kind", + bfr->kind_v); + return -EINVAL; + } + } + /* comptags only supported for suitable kinds, 128KB pagesize */ + if (unlikely(kind_compressible && + (gmmu_page_sizes[pgsz_idx] != 128*1024))) { + /* + gk20a_warn(d, "comptags specified" + " but pagesize being used doesn't support it");*/ + /* it is safe to fall back to uncompressed as + functionality is not harmed */ + bfr->kind_v = bfr->uc_kind_v; + kind_compressible = false; + } + if (kind_compressible) + bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> + COMP_TAG_LINE_SIZE_SHIFT; + else + bfr->ctag_lines = 0; + + return 0; +} + +static int validate_fixed_buffer(struct vm_gk20a *vm, + struct buffer_attrs *bfr, + u64 map_offset) +{ + struct device *dev = dev_from_vm(vm); + struct vm_reserved_va_node *va_node; + struct mapped_buffer_node *buffer; + + if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) { + gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", + map_offset); + return -EINVAL; + } + + /* find the space reservation */ + va_node = addr_to_reservation(vm, map_offset); + if (!va_node) { + gk20a_warn(dev, "fixed offset mapping without space allocation"); + return -EINVAL; + } + + /* check that this mappings does not collide with existing + * mappings by checking the overlapping area between the current + * buffer and all other mapped buffers */ + + list_for_each_entry(buffer, + &va_node->va_buffers_list, va_buffers_list) { + s64 begin = max(buffer->addr, map_offset); + s64 end = min(buffer->addr + + buffer->size, map_offset + bfr->size); + if (end - begin > 0) { + gk20a_warn(dev, "overlapping buffer map requested"); + return -EINVAL; + } + } + + return 0; +} + +static u64 __locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct sg_table *sgt, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag) +{ + int err = 0, i = 0; + u32 pde_lo, pde_hi; + struct device *d = dev_from_vm(vm); + + /* Allocate (or validate when map_offset != 0) the virtual address. */ + if (!map_offset) { + map_offset = gk20a_vm_alloc_va(vm, size, + pgsz_idx); + if (!map_offset) { + gk20a_err(d, "failed to allocate va space"); + err = -ENOMEM; + goto fail; + } + } + + pde_range_from_vaddr_range(vm, + map_offset, + map_offset + size - 1, + &pde_lo, &pde_hi); + + /* mark the addr range valid (but with 0 phys addr, which will fault) */ + for (i = pde_lo; i <= pde_hi; i++) { + err = validate_gmmu_page_table_gk20a_locked(vm, i, + pgsz_idx); + if (err) { + gk20a_err(d, "failed to validate page table %d: %d", + i, err); + goto fail; + } + } + + err = update_gmmu_ptes_locked(vm, pgsz_idx, + sgt, + map_offset, map_offset + size - 1, + kind_v, + ctag_offset, + flags & + NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + rw_flag); + if (err) { + gk20a_err(d, "failed to update ptes on map"); + goto fail; + } + + return map_offset; + fail: + gk20a_err(d, "%s: failed with err=%d\n", __func__, err); + return 0; +} + +static void __locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag) +{ + int err = 0; + struct gk20a *g = gk20a_from_vm(vm); + + if (va_allocated) { + err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); + if (err) { + dev_err(dev_from_vm(vm), + "failed to free va"); + return; + } + } + + /* unmap here needs to know the page size we assigned at mapping */ + err = update_gmmu_ptes_locked(vm, + pgsz_idx, + 0, /* n/a for unmap */ + vaddr, + vaddr + size - 1, + 0, 0, false /* n/a for unmap */, + rw_flag); + if (err) + dev_err(dev_from_vm(vm), + "failed to update gmmu ptes on unmap"); + + /* detect which if any pdes/ptes can now be released */ + + /* flush l2 so any dirty lines are written out *now*. + * also as we could potentially be switching this buffer + * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at + * some point in the future we need to invalidate l2. e.g. switching + * from a render buffer unmap (here) to later using the same memory + * for gmmu ptes. note the positioning of this relative to any smmu + * unmapping (below). */ + + gk20a_mm_l2_flush(g, true); +} + +static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + int kind, + struct sg_table **sgt, + bool user_mapped, + int rw_flag) +{ + struct mapped_buffer_node *mapped_buffer = 0; + + mapped_buffer = + find_mapped_buffer_reverse_locked(&vm->mapped_buffers, + dmabuf, kind); + if (!mapped_buffer) + return 0; + + if (mapped_buffer->flags != flags) + return 0; + + if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET && + mapped_buffer->addr != offset_align) + return 0; + + BUG_ON(mapped_buffer->vm != vm); + + /* mark the buffer as used */ + if (user_mapped) { + if (mapped_buffer->user_mapped == 0) + vm->num_user_mapped_buffers++; + mapped_buffer->user_mapped++; + + /* If the mapping comes from user space, we own + * the handle ref. Since we reuse an + * existing mapping here, we need to give back those + * refs once in order not to leak. + */ + if (mapped_buffer->own_mem_ref) + dma_buf_put(mapped_buffer->dmabuf); + else + mapped_buffer->own_mem_ref = true; + } + kref_get(&mapped_buffer->ref); + + gk20a_dbg(gpu_dbg_map, + "reusing as=%d pgsz=%d flags=0x%x ctags=%d " + "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x " + "own_mem_ref=%d user_mapped=%d", + vm_aspace_id(vm), mapped_buffer->pgsz_idx, + mapped_buffer->flags, + mapped_buffer->ctag_lines, + mapped_buffer->ctag_offset, + hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), + hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)), + hi32((u64)sg_phys(mapped_buffer->sgt->sgl)), + lo32((u64)sg_phys(mapped_buffer->sgt->sgl)), + mapped_buffer->own_mem_ref, user_mapped); + + if (sgt) + *sgt = mapped_buffer->sgt; + return mapped_buffer->addr; +} + +u64 gk20a_vm_map(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/, + int kind, + struct sg_table **sgt, + bool user_mapped, + int rw_flag) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; + struct device *d = dev_from_vm(vm); + struct mapped_buffer_node *mapped_buffer = 0; + bool inserted = false, va_allocated = false; + u32 gmmu_page_size = 0; + u64 map_offset = 0; + int err = 0; + struct buffer_attrs bfr = {0}; + struct gk20a_comptags comptags; + + mutex_lock(&vm->update_gmmu_lock); + + /* check if this buffer is already mapped */ + map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align, + flags, kind, sgt, + user_mapped, rw_flag); + if (map_offset) { + mutex_unlock(&vm->update_gmmu_lock); + return map_offset; + } + + /* pin buffer to get phys/iovmm addr */ + bfr.sgt = gk20a_mm_pin(d, dmabuf); + if (IS_ERR(bfr.sgt)) { + /* Falling back to physical is actually possible + * here in many cases if we use 4K phys pages in the + * gmmu. However we have some regions which require + * contig regions to work properly (either phys-contig + * or contig through smmu io_vaspace). Until we can + * track the difference between those two cases we have + * to fail the mapping when we run out of SMMU space. + */ + gk20a_warn(d, "oom allocating tracking buffer"); + goto clean_up; + } + + if (sgt) + *sgt = bfr.sgt; + + bfr.kind_v = kind; + bfr.size = dmabuf->size; + bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl)); + bfr.pgsz_idx = -1; + + /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select + * page size according to memory alignment */ + if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ? + gmmu_page_size_big : gmmu_page_size_small; + } else { + gmmu_select_page_size(&bfr); + } + + /* validate/adjust bfr attributes */ + if (unlikely(bfr.pgsz_idx == -1)) { + gk20a_err(d, "unsupported page size detected"); + goto clean_up; + } + + if (unlikely(bfr.pgsz_idx < gmmu_page_size_small || + bfr.pgsz_idx > gmmu_page_size_big)) { + BUG_ON(1); + err = -EINVAL; + goto clean_up; + } + gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; + + /* Check if we should use a fixed offset for mapping this buffer */ + if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { + err = validate_fixed_buffer(vm, &bfr, offset_align); + if (err) + goto clean_up; + + map_offset = offset_align; + va_allocated = false; + } else + va_allocated = true; + + if (sgt) + *sgt = bfr.sgt; + + err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx); + if (unlikely(err)) { + gk20a_err(d, "failure setting up kind and compression"); + goto clean_up; + } + + /* bar1 and pmu vm don't need ctag */ + if (!vm->enable_ctag) + bfr.ctag_lines = 0; + + gk20a_get_comptags(d, dmabuf, &comptags); + + if (bfr.ctag_lines && !comptags.lines) { + /* allocate compression resources if needed */ + err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, + bfr.ctag_lines); + if (err) { + /* ok to fall back here if we ran out */ + /* TBD: we can partially alloc ctags as well... */ + bfr.ctag_lines = bfr.ctag_offset = 0; + bfr.kind_v = bfr.uc_kind_v; + } else { + gk20a_get_comptags(d, dmabuf, &comptags); + + /* init/clear the ctag buffer */ + g->ops.ltc.clear_comptags(g, + comptags.offset, + comptags.offset + comptags.lines - 1); + } + } + + /* store the comptag info */ + bfr.ctag_offset = comptags.offset; + + /* update gmmu ptes */ + map_offset = __locked_gmmu_map(vm, map_offset, + bfr.sgt, + bfr.size, + bfr.pgsz_idx, + bfr.kind_v, + bfr.ctag_offset, + flags, rw_flag); + if (!map_offset) + goto clean_up; + + gk20a_dbg(gpu_dbg_map, + "as=%d pgsz=%d " + "kind=0x%x kind_uc=0x%x flags=0x%x " + "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", + vm_aspace_id(vm), gmmu_page_size, + bfr.kind_v, bfr.uc_kind_v, flags, + bfr.ctag_lines, bfr.ctag_offset, + hi32(map_offset), lo32(map_offset), + hi32((u64)sg_dma_address(bfr.sgt->sgl)), + lo32((u64)sg_dma_address(bfr.sgt->sgl)), + hi32((u64)sg_phys(bfr.sgt->sgl)), + lo32((u64)sg_phys(bfr.sgt->sgl))); + +#if defined(NVHOST_DEBUG) + { + int i; + struct scatterlist *sg = NULL; + gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)"); + for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) { + u64 da = sg_dma_address(sg); + u64 pa = sg_phys(sg); + u64 len = sg->length; + gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x", + i, hi32(pa), lo32(pa), hi32(da), lo32(da), + hi32(len), lo32(len)); + } + } +#endif + + /* keep track of the buffer for unmapping */ + /* TBD: check for multiple mapping of same buffer */ + mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL); + if (!mapped_buffer) { + gk20a_warn(d, "oom allocating tracking buffer"); + goto clean_up; + } + mapped_buffer->dmabuf = dmabuf; + mapped_buffer->sgt = bfr.sgt; + mapped_buffer->addr = map_offset; + mapped_buffer->size = bfr.size; + mapped_buffer->pgsz_idx = bfr.pgsz_idx; + mapped_buffer->ctag_offset = bfr.ctag_offset; + mapped_buffer->ctag_lines = bfr.ctag_lines; + mapped_buffer->vm = vm; + mapped_buffer->flags = flags; + mapped_buffer->kind = kind; + mapped_buffer->va_allocated = va_allocated; + mapped_buffer->user_mapped = user_mapped ? 1 : 0; + mapped_buffer->own_mem_ref = user_mapped; + INIT_LIST_HEAD(&mapped_buffer->unmap_list); + INIT_LIST_HEAD(&mapped_buffer->va_buffers_list); + kref_init(&mapped_buffer->ref); + + err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer); + if (err) { + gk20a_err(d, "failed to insert into mapped buffer tree"); + goto clean_up; + } + inserted = true; + if (user_mapped) + vm->num_user_mapped_buffers++; + + gk20a_dbg_info("allocated va @ 0x%llx", map_offset); + + if (!va_allocated) { + struct vm_reserved_va_node *va_node; + + /* find the space reservation */ + va_node = addr_to_reservation(vm, map_offset); + list_add_tail(&mapped_buffer->va_buffers_list, + &va_node->va_buffers_list); + mapped_buffer->va_node = va_node; + } + + mutex_unlock(&vm->update_gmmu_lock); + + /* Invalidate kernel mappings immediately */ + if (vm_aspace_id(vm) == -1) + gk20a_mm_tlb_invalidate(vm); + + return map_offset; + +clean_up: + if (inserted) { + rb_erase(&mapped_buffer->node, &vm->mapped_buffers); + if (user_mapped) + vm->num_user_mapped_buffers--; + } + kfree(mapped_buffer); + if (va_allocated) + gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); + if (!IS_ERR(bfr.sgt)) + gk20a_mm_unpin(d, dmabuf, bfr.sgt); + + mutex_unlock(&vm->update_gmmu_lock); + gk20a_dbg_info("err=%d\n", err); + return 0; +} + +u64 gk20a_gmmu_map(struct vm_gk20a *vm, + struct sg_table **sgt, + u64 size, + u32 flags, + int rw_flag) +{ + u64 vaddr; + + mutex_lock(&vm->update_gmmu_lock); + vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ + *sgt, /* sg table */ + size, + 0, /* page size index = 0 i.e. SZ_4K */ + 0, /* kind */ + 0, /* ctag_offset */ + flags, rw_flag); + mutex_unlock(&vm->update_gmmu_lock); + if (!vaddr) { + gk20a_err(dev_from_vm(vm), "failed to allocate va space"); + return 0; + } + + /* Invalidate kernel mappings immediately */ + gk20a_mm_tlb_invalidate(vm); + + return vaddr; +} + +void gk20a_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int rw_flag) +{ + mutex_lock(&vm->update_gmmu_lock); + __locked_gmmu_unmap(vm, + vaddr, + size, + 0, /* page size 4K */ + true, /*va_allocated */ + rw_flag); + mutex_unlock(&vm->update_gmmu_lock); +} + +phys_addr_t gk20a_get_phys_from_iova(struct device *d, + u64 dma_addr) +{ + phys_addr_t phys; + u64 iova; + + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); + if (!mapping) + return dma_addr; + + iova = dma_addr & PAGE_MASK; + phys = iommu_iova_to_phys(mapping->domain, iova); + return phys; +} + +/* get sg_table from already allocated buffer */ +int gk20a_get_sgtable(struct device *d, struct sg_table **sgt, + void *cpuva, u64 iova, + size_t size) +{ + int err = 0; + *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); + if (!(*sgt)) { + dev_err(d, "failed to allocate memory\n"); + err = -ENOMEM; + goto fail; + } + err = dma_get_sgtable(d, *sgt, + cpuva, iova, + size); + if (err) { + dev_err(d, "failed to create sg table\n"); + goto fail; + } + sg_dma_address((*sgt)->sgl) = iova; + + return 0; + fail: + if (*sgt) { + kfree(*sgt); + *sgt = NULL; + } + return err; +} + +int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, + struct page **pages, u64 iova, + size_t size) +{ + int err = 0; + *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); + if (!(*sgt)) { + dev_err(d, "failed to allocate memory\n"); + err = -ENOMEM; + goto fail; + } + err = sg_alloc_table(*sgt, 1, GFP_KERNEL); + if (err) { + dev_err(d, "failed to allocate sg_table\n"); + goto fail; + } + sg_set_page((*sgt)->sgl, *pages, size, 0); + sg_dma_address((*sgt)->sgl) = iova; + + return 0; + fail: + if (*sgt) { + kfree(*sgt); + *sgt = NULL; + } + return err; +} + +void gk20a_free_sgtable(struct sg_table **sgt) +{ + sg_free_table(*sgt); + kfree(*sgt); + *sgt = NULL; +} + +u64 gk20a_mm_iova_addr(struct scatterlist *sgl) +{ + u64 result = sg_phys(sgl); +#ifdef CONFIG_TEGRA_IOMMU_SMMU + if (sg_dma_address(sgl) == DMA_ERROR_CODE) + result = 0; + else if (sg_dma_address(sgl)) { + result = sg_dma_address(sgl) | + 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT; + } +#endif + return result; +} + +static int update_gmmu_ptes_locked(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + struct sg_table *sgt, + u64 first_vaddr, u64 last_vaddr, + u8 kind_v, u32 ctag_offset, + bool cacheable, + int rw_flag) +{ + int err; + u32 pde_lo, pde_hi, pde_i; + struct scatterlist *cur_chunk; + unsigned int cur_offset; + u32 pte_w[2] = {0, 0}; /* invalid pte */ + u32 ctag = ctag_offset; + u32 ctag_incr; + u32 page_size = gmmu_page_sizes[pgsz_idx]; + u64 addr = 0; + + pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, + &pde_lo, &pde_hi); + + gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", + pgsz_idx, pde_lo, pde_hi); + + /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch + * below (per-pte). Note: this doesn't work unless page size (when + * comptags are active) is 128KB. We have checks elsewhere for that. */ + ctag_incr = !!ctag_offset; + + if (sgt) + cur_chunk = sgt->sgl; + else + cur_chunk = NULL; + + cur_offset = 0; + + for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { + u32 pte_lo, pte_hi; + u32 pte_cur; + void *pte_kv_cur; + + struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; + + if (pde_i == pde_lo) + pte_lo = pte_index_from_vaddr(vm, first_vaddr, + pgsz_idx); + else + pte_lo = 0; + + if ((pde_i != pde_hi) && (pde_hi != pde_lo)) + pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1; + else + pte_hi = pte_index_from_vaddr(vm, last_vaddr, + pgsz_idx); + + /* get cpu access to the ptes */ + err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, + pte->size); + if (err) { + gk20a_err(dev_from_vm(vm), + "couldn't map ptes for update as=%d pte_ref_cnt=%d", + vm_aspace_id(vm), pte->ref_cnt); + goto clean_up; + } + + gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); + for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { + + if (likely(sgt)) { + u64 new_addr = gk20a_mm_iova_addr(cur_chunk); + if (new_addr) { + addr = new_addr; + addr += cur_offset; + } + + pte_w[0] = gmmu_pte_valid_true_f() | + gmmu_pte_address_sys_f(addr + >> gmmu_pte_address_shift_v()); + pte_w[1] = gmmu_pte_aperture_video_memory_f() | + gmmu_pte_kind_f(kind_v) | + gmmu_pte_comptagline_f(ctag); + + if (rw_flag == gk20a_mem_flag_read_only) { + pte_w[0] |= gmmu_pte_read_only_true_f(); + pte_w[1] |= + gmmu_pte_write_disable_true_f(); + } else if (rw_flag == + gk20a_mem_flag_write_only) { + pte_w[1] |= + gmmu_pte_read_disable_true_f(); + } + + if (!cacheable) + pte_w[1] |= gmmu_pte_vol_true_f(); + + pte->ref_cnt++; + + gk20a_dbg(gpu_dbg_pte, + "pte_cur=%d addr=0x%x,%08x kind=%d" + " ctag=%d vol=%d refs=%d" + " [0x%08x,0x%08x]", + pte_cur, hi32(addr), lo32(addr), + kind_v, ctag, !cacheable, + pte->ref_cnt, pte_w[1], pte_w[0]); + + ctag += ctag_incr; + cur_offset += page_size; + addr += page_size; + while (cur_chunk && + cur_offset >= cur_chunk->length) { + cur_offset -= cur_chunk->length; + cur_chunk = sg_next(cur_chunk); + } + + } else { + pte->ref_cnt--; + gk20a_dbg(gpu_dbg_pte, + "pte_cur=%d ref=%d [0x0,0x0]", + pte_cur, pte->ref_cnt); + } + + gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); + gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); + } + + unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); + + if (pte->ref_cnt == 0) { + /* It can make sense to keep around one page table for + * each flavor (empty)... in case a new map is coming + * right back to alloc (and fill it in) again. + * But: deferring unmapping should help with pathologic + * unmap/map/unmap/map cases where we'd trigger pte + * free/alloc/free/alloc. + */ + free_gmmu_pages(vm, pte->ref, pte->sgt, + vm->mm->page_table_sizing[pgsz_idx].order, + pte->size); + pte->ref = NULL; + + /* rewrite pde */ + update_gmmu_pde_locked(vm, pde_i); + } + + } + + smp_mb(); + vm->tlb_dirty = true; + gk20a_dbg_fn("set tlb dirty"); + + return 0; + +clean_up: + /*TBD: potentially rewrite above to pre-map everything it needs to + * as that's the only way it can fail */ + return err; + +} + + +/* for gk20a the "video memory" apertures here are misnomers. */ +static inline u32 big_valid_pde0_bits(u64 pte_addr) +{ + u32 pde0_bits = + gmmu_pde_aperture_big_video_memory_f() | + gmmu_pde_address_big_sys_f( + (u32)(pte_addr >> gmmu_pde_address_shift_v())); + return pde0_bits; +} +static inline u32 small_valid_pde1_bits(u64 pte_addr) +{ + u32 pde1_bits = + gmmu_pde_aperture_small_video_memory_f() | + gmmu_pde_vol_small_true_f() | /* tbd: why? */ + gmmu_pde_address_small_sys_f( + (u32)(pte_addr >> gmmu_pde_address_shift_v())); + return pde1_bits; +} + +/* Given the current state of the ptes associated with a pde, + determine value and write it out. There's no checking + here to determine whether or not a change was actually + made. So, superfluous updates will cause unnecessary + pde invalidations. +*/ +static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) +{ + bool small_valid, big_valid; + u64 pte_addr[2] = {0, 0}; + struct page_table_gk20a *small_pte = + vm->pdes.ptes[gmmu_page_size_small] + i; + struct page_table_gk20a *big_pte = + vm->pdes.ptes[gmmu_page_size_big] + i; + u32 pde_v[2] = {0, 0}; + u32 *pde; + + small_valid = small_pte && small_pte->ref; + big_valid = big_pte && big_pte->ref; + + if (small_valid) + pte_addr[gmmu_page_size_small] = + gk20a_mm_iova_addr(small_pte->sgt->sgl); + if (big_valid) + pte_addr[gmmu_page_size_big] = + gk20a_mm_iova_addr(big_pte->sgt->sgl); + + pde_v[0] = gmmu_pde_size_full_f(); + pde_v[0] |= big_valid ? + big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) + : + (gmmu_pde_aperture_big_invalid_f()); + + pde_v[1] |= (small_valid ? + small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) + : + (gmmu_pde_aperture_small_invalid_f() | + gmmu_pde_vol_small_false_f()) + ) + | + (big_valid ? (gmmu_pde_vol_big_true_f()) : + gmmu_pde_vol_big_false_f()); + + pde = pde_from_index(vm, i); + + gk20a_mem_wr32(pde, 0, pde_v[0]); + gk20a_mem_wr32(pde, 1, pde_v[1]); + + smp_mb(); + + FLUSH_CPU_DCACHE(pde, + sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), + sizeof(u32)*2); + + gk20a_mm_l2_invalidate(vm->mm->g); + + gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); + + vm->tlb_dirty = true; +} + + +static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, + u32 num_pages, u32 pgsz_idx) +{ + struct mm_gk20a *mm = vm->mm; + struct gk20a *g = mm->g; + u32 pgsz = gmmu_page_sizes[pgsz_idx]; + u32 i; + dma_addr_t iova; + + /* allocate the zero page if the va does not already have one */ + if (!vm->zero_page_cpuva) { + int err = 0; + vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, + mm->big_page_size, + &iova, + GFP_KERNEL); + if (!vm->zero_page_cpuva) { + dev_err(&g->dev->dev, "failed to allocate zero page\n"); + return -ENOMEM; + } + + vm->zero_page_iova = iova; + err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, + vm->zero_page_cpuva, vm->zero_page_iova, + mm->big_page_size); + if (err) { + dma_free_coherent(&g->dev->dev, mm->big_page_size, + vm->zero_page_cpuva, + vm->zero_page_iova); + vm->zero_page_iova = 0; + vm->zero_page_cpuva = NULL; + + dev_err(&g->dev->dev, "failed to create sg table for zero page\n"); + return -ENOMEM; + } + } + + for (i = 0; i < num_pages; i++) { + u64 page_vaddr = __locked_gmmu_map(vm, vaddr, + vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0, + NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, + gk20a_mem_flag_none); + + if (!page_vaddr) { + gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!"); + goto err_unmap; + } + vaddr += pgsz; + } + + gk20a_mm_l2_flush(mm->g, true); + + return 0; + +err_unmap: + + WARN_ON(1); + /* something went wrong. unmap pages */ + while (i--) { + vaddr -= pgsz; + __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0, + gk20a_mem_flag_none); + } + + return -EINVAL; +} + +/* NOTE! mapped_buffers lock must be held */ +static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) +{ + struct vm_gk20a *vm = mapped_buffer->vm; + + if (mapped_buffer->va_node && + mapped_buffer->va_node->sparse) { + u64 vaddr = mapped_buffer->addr; + u32 pgsz_idx = mapped_buffer->pgsz_idx; + u32 num_pages = mapped_buffer->size >> + gmmu_page_shifts[pgsz_idx]; + + /* there is little we can do if this fails... */ + gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); + + } else + __locked_gmmu_unmap(vm, + mapped_buffer->addr, + mapped_buffer->size, + mapped_buffer->pgsz_idx, + mapped_buffer->va_allocated, + gk20a_mem_flag_none); + + gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", + vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx], + hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), + mapped_buffer->own_mem_ref); + + gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, + mapped_buffer->sgt); + + /* remove from mapped buffer tree and remove list, free */ + rb_erase(&mapped_buffer->node, &vm->mapped_buffers); + if (!list_empty(&mapped_buffer->va_buffers_list)) + list_del(&mapped_buffer->va_buffers_list); + + /* keep track of mapped buffers */ + if (mapped_buffer->user_mapped) + vm->num_user_mapped_buffers--; + + if (mapped_buffer->own_mem_ref) + dma_buf_put(mapped_buffer->dmabuf); + + kfree(mapped_buffer); + + return; +} + +void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) +{ + struct device *d = dev_from_vm(vm); + struct mapped_buffer_node *mapped_buffer; + + mutex_lock(&vm->update_gmmu_lock); + mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); + if (!mapped_buffer) { + mutex_unlock(&vm->update_gmmu_lock); + gk20a_err(d, "invalid addr to unmap 0x%llx", offset); + return; + } + kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); + mutex_unlock(&vm->update_gmmu_lock); +} + +static void gk20a_vm_remove_support(struct vm_gk20a *vm) +{ + struct gk20a *g = vm->mm->g; + struct mapped_buffer_node *mapped_buffer; + struct vm_reserved_va_node *va_node, *va_node_tmp; + struct rb_node *node; + + gk20a_dbg_fn(""); + mutex_lock(&vm->update_gmmu_lock); + + /* TBD: add a flag here for the unmap code to recognize teardown + * and short-circuit any otherwise expensive operations. */ + + node = rb_first(&vm->mapped_buffers); + while (node) { + mapped_buffer = + container_of(node, struct mapped_buffer_node, node); + gk20a_vm_unmap_locked(mapped_buffer); + node = rb_first(&vm->mapped_buffers); + } + + /* destroy remaining reserved memory areas */ + list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list, + reserved_va_list) { + list_del(&va_node->reserved_va_list); + kfree(va_node); + } + + /* TBD: unmapping all buffers above may not actually free + * all vm ptes. jettison them here for certain... */ + + unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); + free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size); + + kfree(vm->pdes.ptes[gmmu_page_size_small]); + kfree(vm->pdes.ptes[gmmu_page_size_big]); + gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); + gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); + + mutex_unlock(&vm->update_gmmu_lock); + + /* release zero page if used */ + if (vm->zero_page_cpuva) + dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, + vm->zero_page_cpuva, vm->zero_page_iova); + + /* vm is not used anymore. release it. */ + kfree(vm); +} + +static void gk20a_vm_remove_support_kref(struct kref *ref) +{ + struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); + gk20a_vm_remove_support(vm); +} + +void gk20a_vm_get(struct vm_gk20a *vm) +{ + kref_get(&vm->ref); +} + +void gk20a_vm_put(struct vm_gk20a *vm) +{ + kref_put(&vm->ref, gk20a_vm_remove_support_kref); +} + +/* address space interfaces for the gk20a module */ +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) +{ + struct gk20a_as *as = as_share->as; + struct gk20a *g = gk20a_from_as(as); + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm; + u64 vma_size; + u32 num_pages, low_hole_pages; + char name[32]; + int err; + + gk20a_dbg_fn(""); + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) + return -ENOMEM; + + as_share->vm = vm; + + vm->mm = mm; + vm->as_share = as_share; + + vm->big_pages = true; + + vm->va_start = mm->pde_stride; /* create a one pde hole */ + vm->va_limit = mm->channel.size; /* note this means channel.size is + really just the max */ + { + u32 pde_lo, pde_hi; + pde_range_from_vaddr_range(vm, + 0, vm->va_limit-1, + &pde_lo, &pde_hi); + vm->pdes.num_pdes = pde_hi + 1; + } + + vm->pdes.ptes[gmmu_page_size_small] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + vm->pdes.ptes[gmmu_page_size_big] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + if (!(vm->pdes.ptes[gmmu_page_size_small] && + vm->pdes.ptes[gmmu_page_size_big])) + return -ENOMEM; + + gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d", + vm->va_limit, vm->pdes.num_pdes); + + /* allocate the page table directory */ + err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, + &vm->pdes.sgt, &vm->pdes.size); + if (err) + return -ENOMEM; + + err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, + vm->pdes.size); + if (err) { + free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, + vm->pdes.size); + return -ENOMEM; + } + gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx", + vm->pdes.kv, + gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + /* we could release vm->pdes.kv but it's only one page... */ + + + /* low-half: alloc small pages */ + /* high-half: alloc big pages */ + vma_size = mm->channel.size >> 1; + + snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, + gmmu_page_sizes[gmmu_page_size_small]>>10); + num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); + + /* num_pages above is without regard to the low-side hole. */ + low_hole_pages = (vm->va_start >> + gmmu_page_shifts[gmmu_page_size_small]); + + gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, + low_hole_pages, /* start */ + num_pages - low_hole_pages, /* length */ + 1); /* align */ + + snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, + gmmu_page_sizes[gmmu_page_size_big]>>10); + + num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); + gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, + num_pages, /* start */ + num_pages, /* length */ + 1); /* align */ + + vm->mapped_buffers = RB_ROOT; + + mutex_init(&vm->update_gmmu_lock); + kref_init(&vm->ref); + INIT_LIST_HEAD(&vm->reserved_va_list); + + vm->enable_ctag = true; + + return 0; +} + + +int gk20a_vm_release_share(struct gk20a_as_share *as_share) +{ + struct vm_gk20a *vm = as_share->vm; + + gk20a_dbg_fn(""); + + vm->as_share = NULL; + + /* put as reference to vm */ + gk20a_vm_put(vm); + + as_share->vm = NULL; + + return 0; +} + + +int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, + struct nvhost_as_alloc_space_args *args) + +{ int err = -ENOMEM; + int pgsz_idx; + u32 start_page_nr; + struct gk20a_allocator *vma; + struct vm_gk20a *vm = as_share->vm; + struct vm_reserved_va_node *va_node; + u64 vaddr_start = 0; + + gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", + args->flags, args->page_size, args->pages, + args->o_a.offset); + + /* determine pagesz idx */ + for (pgsz_idx = gmmu_page_size_small; + pgsz_idx < gmmu_nr_page_sizes; + pgsz_idx++) { + if (gmmu_page_sizes[pgsz_idx] == args->page_size) + break; + } + + if (pgsz_idx >= gmmu_nr_page_sizes) { + err = -EINVAL; + goto clean_up; + } + + va_node = kzalloc(sizeof(*va_node), GFP_KERNEL); + if (!va_node) { + err = -ENOMEM; + goto clean_up; + } + + if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE && + pgsz_idx != gmmu_page_size_big) { + err = -ENOSYS; + kfree(va_node); + goto clean_up; + } + + start_page_nr = 0; + if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) + start_page_nr = (u32)(args->o_a.offset >> + gmmu_page_shifts[pgsz_idx]); + + vma = &vm->vma[pgsz_idx]; + err = vma->alloc(vma, &start_page_nr, args->pages); + if (err) { + kfree(va_node); + goto clean_up; + } + + vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx]; + + va_node->vaddr_start = vaddr_start; + va_node->size = (u64)args->page_size * (u64)args->pages; + va_node->pgsz_idx = args->page_size; + INIT_LIST_HEAD(&va_node->va_buffers_list); + INIT_LIST_HEAD(&va_node->reserved_va_list); + + mutex_lock(&vm->update_gmmu_lock); + + /* mark that we need to use sparse mappings here */ + if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { + err = gk20a_vm_put_empty(vm, vaddr_start, args->pages, + pgsz_idx); + if (err) { + mutex_unlock(&vm->update_gmmu_lock); + vma->free(vma, start_page_nr, args->pages); + kfree(va_node); + goto clean_up; + } + + va_node->sparse = true; + } + + list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); + + mutex_unlock(&vm->update_gmmu_lock); + + args->o_a.offset = vaddr_start; + +clean_up: + return err; +} + +int gk20a_vm_free_space(struct gk20a_as_share *as_share, + struct nvhost_as_free_space_args *args) +{ + int err = -ENOMEM; + int pgsz_idx; + u32 start_page_nr; + struct gk20a_allocator *vma; + struct vm_gk20a *vm = as_share->vm; + struct vm_reserved_va_node *va_node; + + gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, + args->pages, args->offset); + + /* determine pagesz idx */ + for (pgsz_idx = gmmu_page_size_small; + pgsz_idx < gmmu_nr_page_sizes; + pgsz_idx++) { + if (gmmu_page_sizes[pgsz_idx] == args->page_size) + break; + } + + if (pgsz_idx >= gmmu_nr_page_sizes) { + err = -EINVAL; + goto clean_up; + } + + start_page_nr = (u32)(args->offset >> + gmmu_page_shifts[pgsz_idx]); + + vma = &vm->vma[pgsz_idx]; + err = vma->free(vma, start_page_nr, args->pages); + + if (err) + goto clean_up; + + mutex_lock(&vm->update_gmmu_lock); + va_node = addr_to_reservation(vm, args->offset); + if (va_node) { + struct mapped_buffer_node *buffer; + + /* there is no need to unallocate the buffers in va. Just + * convert them into normal buffers */ + + list_for_each_entry(buffer, + &va_node->va_buffers_list, va_buffers_list) + list_del_init(&buffer->va_buffers_list); + + list_del(&va_node->reserved_va_list); + + /* if this was a sparse mapping, free the va */ + if (va_node->sparse) + __locked_gmmu_unmap(vm, + va_node->vaddr_start, + va_node->size, + va_node->pgsz_idx, + false, + gk20a_mem_flag_none); + kfree(va_node); + } + mutex_unlock(&vm->update_gmmu_lock); + +clean_up: + return err; +} + +int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, + struct channel_gk20a *ch) +{ + int err = 0; + struct vm_gk20a *vm = as_share->vm; + + gk20a_dbg_fn(""); + + ch->vm = vm; + err = channel_gk20a_commit_va(ch); + if (err) + ch->vm = 0; + + return err; +} + +int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) +{ + struct gk20a_dmabuf_priv *priv; + static DEFINE_MUTEX(priv_lock); + + priv = dma_buf_get_drvdata(dmabuf, dev); + if (likely(priv)) + return 0; + + mutex_lock(&priv_lock); + priv = dma_buf_get_drvdata(dmabuf, dev); + if (priv) + goto priv_exist_or_err; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + priv = ERR_PTR(-ENOMEM); + goto priv_exist_or_err; + } + mutex_init(&priv->lock); + dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); +priv_exist_or_err: + mutex_unlock(&priv_lock); + if (IS_ERR(priv)) + return -ENOMEM; + + return 0; +} + + +static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf) +{ + int kind = 0; +#ifdef CONFIG_TEGRA_NVMAP + int err; + u64 nvmap_param; + + err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND, + &nvmap_param); + kind = err ? kind : nvmap_param; +#endif + return kind; +} + +int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, + int dmabuf_fd, + u64 *offset_align, + u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ + int kind) +{ + int err = 0; + struct vm_gk20a *vm = as_share->vm; + struct dma_buf *dmabuf; + u64 ret_va; + + gk20a_dbg_fn(""); + + /* get ref to the mem handle (released on unmap_locked) */ + dmabuf = dma_buf_get(dmabuf_fd); + if (!dmabuf) + return 0; + + err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); + if (err) { + dma_buf_put(dmabuf); + return err; + } + + if (kind == -1) + kind = gk20a_dmabuf_get_kind(dmabuf); + + ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, + flags, kind, NULL, true, + gk20a_mem_flag_none); + *offset_align = ret_va; + if (!ret_va) { + dma_buf_put(dmabuf); + err = -EINVAL; + } + + return err; +} + +int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) +{ + struct vm_gk20a *vm = as_share->vm; + + gk20a_dbg_fn(""); + + gk20a_vm_unmap_user(vm, offset); + return 0; +} + +int gk20a_init_bar1_vm(struct mm_gk20a *mm) +{ + int err; + phys_addr_t inst_pa; + void *inst_ptr; + struct vm_gk20a *vm = &mm->bar1.vm; + struct gk20a *g = gk20a_from_mm(mm); + struct device *d = dev_from_gk20a(g); + struct inst_desc *inst_block = &mm->bar1.inst_block; + u64 pde_addr; + u32 pde_addr_lo; + u32 pde_addr_hi; + dma_addr_t iova; + + vm->mm = mm; + + mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; + + gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); + + vm->va_start = mm->pde_stride * 1; + vm->va_limit = mm->bar1.aperture_size; + + { + u32 pde_lo, pde_hi; + pde_range_from_vaddr_range(vm, + 0, vm->va_limit-1, + &pde_lo, &pde_hi); + vm->pdes.num_pdes = pde_hi + 1; + } + + /* bar1 is likely only to ever use/need small page sizes. */ + /* But just in case, for now... arrange for both.*/ + vm->pdes.ptes[gmmu_page_size_small] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + vm->pdes.ptes[gmmu_page_size_big] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + if (!(vm->pdes.ptes[gmmu_page_size_small] && + vm->pdes.ptes[gmmu_page_size_big])) + return -ENOMEM; + + gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d", + vm->va_limit, vm->pdes.num_pdes); + + + /* allocate the page table directory */ + err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, + &vm->pdes.sgt, &vm->pdes.size); + if (err) + goto clean_up; + + err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, + vm->pdes.size); + if (err) { + free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, + vm->pdes.size); + goto clean_up; + } + gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", + vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + /* we could release vm->pdes.kv but it's only one page... */ + + pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr_lo = u64_lo32(pde_addr >> 12); + pde_addr_hi = u64_hi32(pde_addr); + + gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", + (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl), + pde_addr_lo, pde_addr_hi); + + /* allocate instance mem for bar1 */ + inst_block->size = ram_in_alloc_size_v(); + inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, + &iova, GFP_KERNEL); + if (!inst_block->cpuva) { + gk20a_err(d, "%s: memory allocation failed\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + inst_block->iova = iova; + inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); + if (!inst_block->cpu_pa) { + gk20a_err(d, "%s: failed to get phys address\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + inst_pa = inst_block->cpu_pa; + inst_ptr = inst_block->cpuva; + + gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", + (u64)inst_pa, inst_ptr); + + memset(inst_ptr, 0, ram_fc_size_val_v()); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pde_addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(pde_addr_hi)); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), + u64_lo32(vm->va_limit) | 0xFFF); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), + ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + + gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); + gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1", + 1,/*start*/ + (vm->va_limit >> 12) - 1 /* length*/, + 1); /* align */ + /* initialize just in case we try to use it anyway */ + gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused", + 0x0badc0de, /* start */ + 1, /* length */ + 1); /* align */ + + vm->mapped_buffers = RB_ROOT; + + mutex_init(&vm->update_gmmu_lock); + kref_init(&vm->ref); + INIT_LIST_HEAD(&vm->reserved_va_list); + + return 0; + +clean_up: + /* free, etc */ + if (inst_block->cpuva) + dma_free_coherent(d, inst_block->size, + inst_block->cpuva, inst_block->iova); + inst_block->cpuva = NULL; + inst_block->iova = 0; + return err; +} + +/* pmu vm, share channel_vm interfaces */ +int gk20a_init_pmu_vm(struct mm_gk20a *mm) +{ + int err; + phys_addr_t inst_pa; + void *inst_ptr; + struct vm_gk20a *vm = &mm->pmu.vm; + struct gk20a *g = gk20a_from_mm(mm); + struct device *d = dev_from_gk20a(g); + struct inst_desc *inst_block = &mm->pmu.inst_block; + u64 pde_addr; + u32 pde_addr_lo; + u32 pde_addr_hi; + dma_addr_t iova; + + vm->mm = mm; + + mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; + + gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); + + vm->va_start = GK20A_PMU_VA_START; + vm->va_limit = vm->va_start + mm->pmu.aperture_size; + + { + u32 pde_lo, pde_hi; + pde_range_from_vaddr_range(vm, + 0, vm->va_limit-1, + &pde_lo, &pde_hi); + vm->pdes.num_pdes = pde_hi + 1; + } + + /* The pmu is likely only to ever use/need small page sizes. */ + /* But just in case, for now... arrange for both.*/ + vm->pdes.ptes[gmmu_page_size_small] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + vm->pdes.ptes[gmmu_page_size_big] = + kzalloc(sizeof(struct page_table_gk20a) * + vm->pdes.num_pdes, GFP_KERNEL); + + if (!(vm->pdes.ptes[gmmu_page_size_small] && + vm->pdes.ptes[gmmu_page_size_big])) + return -ENOMEM; + + gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d", + vm->va_limit, vm->pdes.num_pdes); + + /* allocate the page table directory */ + err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, + &vm->pdes.sgt, &vm->pdes.size); + if (err) + goto clean_up; + + err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, + vm->pdes.size); + if (err) { + free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, + vm->pdes.size); + goto clean_up; + } + gk20a_dbg_info("pmu pdes phys @ 0x%llx", + (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); + /* we could release vm->pdes.kv but it's only one page... */ + + pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); + pde_addr_lo = u64_lo32(pde_addr >> 12); + pde_addr_hi = u64_hi32(pde_addr); + + gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", + (u64)pde_addr, pde_addr_lo, pde_addr_hi); + + /* allocate instance mem for pmu */ + inst_block->size = GK20A_PMU_INST_SIZE; + inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, + &iova, GFP_KERNEL); + if (!inst_block->cpuva) { + gk20a_err(d, "%s: memory allocation failed\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + inst_block->iova = iova; + inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova); + if (!inst_block->cpu_pa) { + gk20a_err(d, "%s: failed to get phys address\n", __func__); + err = -ENOMEM; + goto clean_up; + } + + inst_pa = inst_block->cpu_pa; + inst_ptr = inst_block->cpuva; + + gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); + + memset(inst_ptr, 0, GK20A_PMU_INST_SIZE); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pde_addr_lo)); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(pde_addr_hi)); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), + u64_lo32(vm->va_limit) | 0xFFF); + + gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), + ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + + gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu", + (vm->va_start >> 12), /* start */ + (vm->va_limit - vm->va_start) >> 12, /*length*/ + 1); /* align */ + /* initialize just in case we try to use it anyway */ + gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused", + 0x0badc0de, /* start */ + 1, /* length */ + 1); /* align */ + + + vm->mapped_buffers = RB_ROOT; + + mutex_init(&vm->update_gmmu_lock); + kref_init(&vm->ref); + INIT_LIST_HEAD(&vm->reserved_va_list); + + return 0; + +clean_up: + /* free, etc */ + if (inst_block->cpuva) + dma_free_coherent(d, inst_block->size, + inst_block->cpuva, inst_block->iova); + inst_block->cpuva = NULL; + inst_block->iova = 0; + return err; +} + +void gk20a_mm_fb_flush(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + u32 data; + s32 retry = 100; + + gk20a_dbg_fn(""); + + mutex_lock(&mm->l2_op_lock); + + g->ops.ltc.elpg_flush(g); + + /* Make sure all previous writes are committed to the L2. There's no + guarantee that writes are to DRAM. This will be a sysmembar internal + to the L2. */ + gk20a_writel(g, flush_fb_flush_r(), + flush_fb_flush_pending_busy_f()); + + do { + data = gk20a_readl(g, flush_fb_flush_r()); + + if (flush_fb_flush_outstanding_v(data) == + flush_fb_flush_outstanding_true_v() || + flush_fb_flush_pending_v(data) == + flush_fb_flush_pending_busy_v()) { + gk20a_dbg_info("fb_flush 0x%x", data); + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "fb_flush too many retries"); + + mutex_unlock(&mm->l2_op_lock); +} + +static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) +{ + u32 data; + s32 retry = 200; + + /* Invalidate any clean lines from the L2 so subsequent reads go to + DRAM. Dirty lines are not affected by this operation. */ + gk20a_writel(g, flush_l2_system_invalidate_r(), + flush_l2_system_invalidate_pending_busy_f()); + + do { + data = gk20a_readl(g, flush_l2_system_invalidate_r()); + + if (flush_l2_system_invalidate_outstanding_v(data) == + flush_l2_system_invalidate_outstanding_true_v() || + flush_l2_system_invalidate_pending_v(data) == + flush_l2_system_invalidate_pending_busy_v()) { + gk20a_dbg_info("l2_system_invalidate 0x%x", + data); + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "l2_system_invalidate too many retries"); +} + +void gk20a_mm_l2_invalidate(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + mutex_lock(&mm->l2_op_lock); + gk20a_mm_l2_invalidate_locked(g); + mutex_unlock(&mm->l2_op_lock); +} + +void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) +{ + struct mm_gk20a *mm = &g->mm; + u32 data; + s32 retry = 200; + + gk20a_dbg_fn(""); + + mutex_lock(&mm->l2_op_lock); + + /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 + as clean, so subsequent reads might hit in the L2. */ + gk20a_writel(g, flush_l2_flush_dirty_r(), + flush_l2_flush_dirty_pending_busy_f()); + + do { + data = gk20a_readl(g, flush_l2_flush_dirty_r()); + + if (flush_l2_flush_dirty_outstanding_v(data) == + flush_l2_flush_dirty_outstanding_true_v() || + flush_l2_flush_dirty_pending_v(data) == + flush_l2_flush_dirty_pending_busy_v()) { + gk20a_dbg_info("l2_flush_dirty 0x%x", data); + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "l2_flush_dirty too many retries"); + + if (invalidate) + gk20a_mm_l2_invalidate_locked(g); + + mutex_unlock(&mm->l2_op_lock); +} + + +int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset) +{ + struct mapped_buffer_node *mapped_buffer; + + gk20a_dbg_fn("gpu_va=0x%llx", gpu_va); + + mutex_lock(&vm->update_gmmu_lock); + + mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers, + gpu_va); + if (!mapped_buffer) { + mutex_unlock(&vm->update_gmmu_lock); + return -EINVAL; + } + + *dmabuf = mapped_buffer->dmabuf; + *offset = gpu_va - mapped_buffer->addr; + + mutex_unlock(&vm->update_gmmu_lock); + + return 0; +} + +void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) +{ + struct mm_gk20a *mm = vm->mm; + struct gk20a *g = gk20a_from_vm(vm); + u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); + u32 data; + s32 retry = 200; + + gk20a_dbg_fn(""); + + /* pagetables are considered sw states which are preserved after + prepare_poweroff. When gk20a deinit releases those pagetables, + common code in vm unmap path calls tlb invalidate that touches + hw. Use the power_on flag to skip tlb invalidation when gpu + power is turned off */ + + if (!g->power_on) + return; + + /* No need to invalidate if tlb is clean */ + mutex_lock(&vm->update_gmmu_lock); + if (!vm->tlb_dirty) { + mutex_unlock(&vm->update_gmmu_lock); + return; + } + vm->tlb_dirty = false; + mutex_unlock(&vm->update_gmmu_lock); + + mutex_lock(&mm->tlb_lock); + do { + data = gk20a_readl(g, fb_mmu_ctrl_r()); + if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) + break; + usleep_range(20, 40); + retry--; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "wait mmu fifo space too many retries"); + + gk20a_writel(g, fb_mmu_invalidate_pdb_r(), + fb_mmu_invalidate_pdb_addr_f(addr_lo) | + fb_mmu_invalidate_pdb_aperture_vid_mem_f()); + + /* this is a sledgehammer, it would seem */ + gk20a_writel(g, fb_mmu_invalidate_r(), + fb_mmu_invalidate_all_pdb_true_f() | + fb_mmu_invalidate_all_va_true_f() | + fb_mmu_invalidate_trigger_true_f()); + + do { + data = gk20a_readl(g, fb_mmu_ctrl_r()); + if (fb_mmu_ctrl_pri_fifo_empty_v(data) != + fb_mmu_ctrl_pri_fifo_empty_false_f()) + break; + retry--; + usleep_range(20, 40); + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "mmu invalidate too many retries"); + + mutex_unlock(&mm->tlb_lock); +} + +int gk20a_mm_suspend(struct gk20a *g) +{ + gk20a_dbg_fn(""); + + gk20a_mm_fb_flush(g); + gk20a_mm_l2_flush(g, true); + + gk20a_dbg_fn("done"); + return 0; +} + +void gk20a_mm_ltc_isr(struct gk20a *g) +{ + u32 intr; + + intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r()); + gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr); + gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); +} + +bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g) +{ + u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); + return fb_mmu_debug_ctrl_debug_v(debug_ctrl) == + fb_mmu_debug_ctrl_debug_enabled_v(); +} + +static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, + const unsigned int msec) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msec); + while (1) { + u32 val; + + val = gk20a_readl(g, fb_mmu_vpr_info_r()); + if (fb_mmu_vpr_info_fetch_v(val) == + fb_mmu_vpr_info_fetch_false_v()) + break; + + if (tegra_platform_is_silicon() && + WARN_ON(time_after(jiffies, timeout))) + return -ETIME; + } + + return 0; +} + +int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g) +{ + int ret = 0; + + gk20a_busy_noresume(g->dev); + if (!pm_runtime_active(&g->dev->dev)) + goto fail; + + if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) { + ret = -ETIME; + goto fail; + } + + gk20a_writel(g, fb_mmu_vpr_info_r(), + fb_mmu_vpr_info_fetch_true_v()); + + ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5); + + fail: + gk20a_idle(g->dev); + return ret; +} diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h new file mode 100644 index 00000000..23d15c23 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -0,0 +1,464 @@ +/* + * drivers/video/tegra/host/gk20a/mm_gk20a.h + * + * GK20A memory management + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __MM_GK20A_H__ +#define __MM_GK20A_H__ + +#include +#include +#include +#include +#include "gk20a_allocator.h" + +/* This "address bit" in the gmmu ptes (and other gk20a accesses) + * signals the address as presented should be translated by the SMMU. + * Without this bit present gk20a accesses are *not* translated. + */ +/* Hack, get this from manuals somehow... */ +#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34 +#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \ + (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT)) + +/* For now keep the size relatively small-ish compared to the full + * 40b va. 32GB for now. It consists of two 16GB spaces. */ +#define NV_GMMU_VA_RANGE 35ULL +#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) + +struct mem_desc { + struct dma_buf *ref; + struct sg_table *sgt; + u32 size; +}; + +struct mem_desc_sub { + u32 offset; + u32 size; +}; + +struct gpfifo_desc { + size_t size; + u32 entry_num; + + u32 get; + u32 put; + + bool wrap; + + u64 iova; + struct gpfifo *cpu_va; + u64 gpu_va; +}; + +struct mmu_desc { + void *cpuva; + u64 iova; + size_t size; +}; + +struct inst_desc { + u64 iova; + void *cpuva; + phys_addr_t cpu_pa; + size_t size; +}; + +struct surface_mem_desc { + u64 iova; + void *cpuva; + struct sg_table *sgt; + size_t size; +}; + +struct userd_desc { + struct sg_table *sgt; + u64 iova; + void *cpuva; + size_t size; + u64 gpu_va; +}; + +struct runlist_mem_desc { + u64 iova; + void *cpuva; + size_t size; +}; + +struct patch_desc { + struct page **pages; + u64 iova; + size_t size; + void *cpu_va; + u64 gpu_va; + u32 data_count; +}; + +struct pmu_mem_desc { + void *cpuva; + u64 iova; + u64 pmu_va; + size_t size; +}; + +struct priv_cmd_queue_mem_desc { + u64 base_iova; + u32 *base_cpuva; + size_t size; +}; + +struct zcull_ctx_desc { + struct mem_desc mem; + u64 gpu_va; + u32 ctx_attr; + u32 ctx_sw_mode; +}; + +struct pm_ctx_desc { + struct mem_desc mem; + u64 gpu_va; + u32 ctx_attr; + u32 ctx_sw_mode; +}; + +struct gr_ctx_buffer_desc; +struct platform_device; +struct gr_ctx_buffer_desc { + void (*destroy)(struct platform_device *, struct gr_ctx_buffer_desc *); + struct sg_table *sgt; + struct page **pages; + size_t size; + u64 iova; + struct dma_attrs attrs; + void *priv; +}; + +struct gr_ctx_desc { + struct page **pages; + u64 iova; + size_t size; + u64 gpu_va; +}; + +struct compbit_store_desc { + struct pages **pages; + size_t size; + u64 base_iova; +}; + +struct page_table_gk20a { + /* backing for */ + /* Either a *page or a *mem_handle */ + void *ref; + /* track mapping cnt on this page table */ + u32 ref_cnt; + struct sg_table *sgt; + size_t size; +}; + +#ifndef _NVHOST_MEM_MGR_H +enum gk20a_mem_rw_flag { + gk20a_mem_flag_none = 0, + gk20a_mem_flag_read_only = 1, + gk20a_mem_flag_write_only = 2, +}; +#endif + +enum gmmu_pgsz_gk20a { + gmmu_page_size_small = 0, + gmmu_page_size_big = 1, + gmmu_nr_page_sizes = 2 +}; + + +struct page_directory_gk20a { + /* backing for */ + u32 num_pdes; + void *kv; + /* Either a *page or a *mem_handle */ + void *ref; + struct sg_table *sgt; + size_t size; + struct page_table_gk20a *ptes[gmmu_nr_page_sizes]; +}; + +struct priv_cmd_queue { + struct priv_cmd_queue_mem_desc mem; + u64 base_gpuva; /* gpu_va base */ + u16 size; /* num of entries in words */ + u16 put; /* put for priv cmd queue */ + u16 get; /* get for priv cmd queue */ + struct list_head free; /* list of pre-allocated free entries */ + struct list_head head; /* list of used entries */ +}; + +struct priv_cmd_entry { + u32 *ptr; + u64 gva; + u16 get; /* start of entry in queue */ + u16 size; /* in words */ + u32 gp_get; /* gp_get when submitting last priv cmd */ + u32 gp_put; /* gp_put when submitting last priv cmd */ + u32 gp_wrap; /* wrap when submitting last priv cmd */ + bool pre_alloc; /* prealloc entry, free to free list */ + struct list_head list; /* node for lists */ +}; + +struct mapped_buffer_node { + struct vm_gk20a *vm; + struct rb_node node; + struct list_head unmap_list; + struct list_head va_buffers_list; + struct vm_reserved_va_node *va_node; + u64 addr; + u64 size; + struct dma_buf *dmabuf; + struct sg_table *sgt; + struct kref ref; + u32 user_mapped; + bool own_mem_ref; + u32 pgsz_idx; + u32 ctag_offset; + u32 ctag_lines; + u32 flags; + u32 kind; + bool va_allocated; +}; + +struct vm_reserved_va_node { + struct list_head reserved_va_list; + struct list_head va_buffers_list; + u32 pgsz_idx; + u64 vaddr_start; + u64 size; + bool sparse; +}; + +struct vm_gk20a { + struct mm_gk20a *mm; + struct gk20a_as_share *as_share; /* as_share this represents */ + + u64 va_start; + u64 va_limit; + + int num_user_mapped_buffers; + + bool big_pages; /* enable large page support */ + bool enable_ctag; + bool tlb_dirty; + bool mapped; + + struct kref ref; + + struct mutex update_gmmu_lock; + + struct page_directory_gk20a pdes; + + struct gk20a_allocator vma[gmmu_nr_page_sizes]; + struct rb_root mapped_buffers; + + struct list_head reserved_va_list; + + dma_addr_t zero_page_iova; + void *zero_page_cpuva; + struct sg_table *zero_page_sgt; +}; + +struct gk20a; +struct channel_gk20a; + +int gk20a_init_mm_support(struct gk20a *g); +int gk20a_init_mm_setup_sw(struct gk20a *g); +int gk20a_init_bar1_vm(struct mm_gk20a *mm); +int gk20a_init_pmu_vm(struct mm_gk20a *mm); + +void gk20a_mm_fb_flush(struct gk20a *g); +void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); +void gk20a_mm_l2_invalidate(struct gk20a *g); + +struct mm_gk20a { + struct gk20a *g; + + u32 compression_page_size; + u32 big_page_size; + u32 pde_stride; + u32 pde_stride_shift; + + struct { + u32 order; + u32 num_ptes; + } page_table_sizing[gmmu_nr_page_sizes]; + + + struct { + u64 size; + } channel; + + struct { + u32 aperture_size; + struct vm_gk20a vm; + struct inst_desc inst_block; + } bar1; + + struct { + u32 aperture_size; + struct vm_gk20a vm; + struct inst_desc inst_block; + } pmu; + + struct mutex tlb_lock; + struct mutex l2_op_lock; + + void (*remove_support)(struct mm_gk20a *mm); + bool sw_ready; +#ifdef CONFIG_DEBUG_FS + u32 ltc_enabled; + u32 ltc_enabled_debug; +#endif +}; + +int gk20a_mm_init(struct mm_gk20a *mm); + +#define gk20a_from_mm(mm) ((mm)->g) +#define gk20a_from_vm(vm) ((vm)->mm->g) + +#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) + +#define DEFAULT_ALLOC_ALIGNMENT (4*1024) + +static inline int bar1_aperture_size_mb_gk20a(void) +{ + return 128; /*TBD read this from fuses?*/ +} +/* max address bits */ +static inline int max_physaddr_bits_gk20a(void) +{ + return 40;/*"old" sys physaddr, meaningful? */ +} +static inline int max_vid_physaddr_bits_gk20a(void) +{ + /* "vid phys" is asid/smmu phys?, + * i.e. is this the real sys physaddr? */ + return 37; +} +static inline int max_vaddr_bits_gk20a(void) +{ + return 40; /* chopped for area? */ +} + +#if 0 /*related to addr bits above, concern below TBD on which is accurate */ +#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ + bus_bar1_block_ptr_s()) +#else +#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() +#endif + +void gk20a_mm_dump_vm(struct vm_gk20a *vm, + u64 va_begin, u64 va_end, char *label); + +int gk20a_mm_suspend(struct gk20a *g); + +phys_addr_t gk20a_get_phys_from_iova(struct device *d, + u64 dma_addr); + +int gk20a_get_sgtable(struct device *d, struct sg_table **sgt, + void *cpuva, u64 iova, + size_t size); + +int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt, + struct page **pages, u64 iova, + size_t size); + +void gk20a_free_sgtable(struct sg_table **sgt); + +u64 gk20a_mm_iova_addr(struct scatterlist *sgl); + +void gk20a_mm_ltc_isr(struct gk20a *g); + +bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g); + +int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g); + +u64 gk20a_gmmu_map(struct vm_gk20a *vm, + struct sg_table **sgt, + u64 size, + u32 flags, + int rw_flag); + +void gk20a_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + int rw_flag); + +struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); +void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, + struct sg_table *sgt); + +u64 gk20a_vm_map(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/, + int kind, + struct sg_table **sgt, + bool user_mapped, + int rw_flag); + +/* unmap handle from kernel */ +void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); + +/* get reference to all currently mapped buffers */ +int gk20a_vm_get_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node ***mapped_buffers, + int *num_buffers); + +/* put references on the given buffers */ +void gk20a_vm_put_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node **mapped_buffers, + int num_buffers); + +/* invalidate tlbs for the vm area */ +void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm); + +/* find buffer corresponding to va */ +int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset); + +void gk20a_vm_get(struct vm_gk20a *vm); +void gk20a_vm_put(struct vm_gk20a *vm); + +/* vm-as interface */ +struct nvhost_as_alloc_space_args; +struct nvhost_as_free_space_args; +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); +int gk20a_vm_release_share(struct gk20a_as_share *as_share); +int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, + struct nvhost_as_alloc_space_args *args); +int gk20a_vm_free_space(struct gk20a_as_share *as_share, + struct nvhost_as_free_space_args *args); +int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, + struct channel_gk20a *ch); +int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, + int dmabuf_fd, + u64 *offset_align, + u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ + int kind); +int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); + +int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); +#endif /*_MM_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h new file mode 100644 index 00000000..09f348cb --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h @@ -0,0 +1,160 @@ +/* + * drivers/video/tegra/host/gk20a/soc/platform_gk20a.h + * + * GK20A Platform (SoC) Interface + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _GK20A_PLATFORM_H_ +#define _GK20A_PLATFORM_H_ + +#include +#include + +struct gk20a; +struct channel_gk20a; +struct gr_ctx_buffer_desc; +struct gk20a_scale_profile; + +struct gk20a_platform { +#ifdef CONFIG_TEGRA_GK20A + u32 syncpt_base; +#endif + /* Populated by the gk20a driver before probing the platform. */ + struct gk20a *g; + + /* Should be populated at probe. */ + bool can_railgate; + + /* Should be populated at probe. */ + bool has_syncpoints; + + /* Should be populated by probe. */ + struct dentry *debugfs; + + /* Clock configuration is stored here. Platform probe is responsible + * for filling this data. */ + struct clk *clk[3]; + int num_clks; + + /* Delay before rail gated */ + int railgate_delay; + + /* Delay before clock gated */ + int clockgate_delay; + + /* Initialize the platform interface of the gk20a driver. + * + * The platform implementation of this function must + * - set the power and clocks of the gk20a device to a known + * state, and + * - populate the gk20a_platform structure (a pointer to the + * structure can be obtained by calling gk20a_get_platform). + * + * After this function is finished, the driver will initialise + * pm runtime and genpd based on the platform configuration. + */ + int (*probe)(struct platform_device *dev); + + /* Second stage initialisation - called once all power management + * initialisations are done. + */ + int (*late_probe)(struct platform_device *dev); + + /* Called before submitting work to the gpu. The platform may use this + * hook to ensure that any other hw modules that the gpu depends on are + * powered. The platform implementation must count refs to this call. */ + int (*channel_busy)(struct platform_device *dev); + + /* Called after the work on the gpu is completed. The platform may use + * this hook to release power refs to any other hw modules that the gpu + * depends on. The platform implementation must count refs to this + * call. */ + void (*channel_idle)(struct platform_device *dev); + + /* This function is called to allocate secure memory (memory that the + * CPU cannot see). The function should fill the context buffer + * descriptor (especially fields destroy, sgt, size). + */ + int (*secure_alloc)(struct platform_device *dev, + struct gr_ctx_buffer_desc *desc, + size_t size); + + /* Device is going to be suspended */ + int (*suspend)(struct device *); + + /* Called to turn off the device */ + int (*railgate)(struct platform_device *dev); + + /* Called to turn on the device */ + int (*unrailgate)(struct platform_device *dev); + + /* Postscale callback is called after frequency change */ + void (*postscale)(struct platform_device *pdev, + unsigned long freq); + + /* Pre callback is called before frequency change */ + void (*prescale)(struct platform_device *pdev); + + /* Devfreq governor name. If scaling is enabled, we request + * this governor to be used in scaling */ + const char *devfreq_governor; + + /* Quality of service id. If this is set, the scaling routines + * will register a callback to id. Each time we receive a new value, + * the postscale callback gets called. */ + int qos_id; + + /* Called as part of debug dump. If the gpu gets hung, this function + * is responsible for delivering all necessary debug data of other + * hw units which may interact with the gpu without direct supervision + * of the CPU. + */ + void (*dump_platform_dependencies)(struct platform_device *dev); +}; + +static inline struct gk20a_platform *gk20a_get_platform( + struct platform_device *dev) +{ + return (struct gk20a_platform *)platform_get_drvdata(dev); +} + +extern struct gk20a_platform gk20a_generic_platform; +#ifdef CONFIG_TEGRA_GK20A +extern struct gk20a_platform gk20a_tegra_platform; +#endif + +static inline int gk20a_platform_channel_busy(struct platform_device *dev) +{ + struct gk20a_platform *p = gk20a_get_platform(dev); + int ret = 0; + if (p->channel_busy) + ret = p->channel_busy(dev); + + return ret; +} + +static inline void gk20a_platform_channel_idle(struct platform_device *dev) +{ + struct gk20a_platform *p = gk20a_get_platform(dev); + if (p->channel_idle) + p->channel_idle(dev); +} + +static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev) +{ + struct gk20a_platform *p = gk20a_get_platform(dev); + return p->has_syncpoints; +} + +#endif diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c new file mode 100644 index 00000000..7b750df6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c @@ -0,0 +1,35 @@ +/* + * drivers/video/tegra/host/gk20a/platform_gk20a_generic.c + * + * GK20A Generic Platform Interface + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "platform_gk20a.h" + +static int gk20a_generic_probe(struct platform_device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + /* TODO: Initialize clocks and power */ + (void)platform; + + return 0; +} + +struct gk20a_platform gk20a_generic_platform = { + .probe = gk20a_generic_probe, +}; diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c new file mode 100644 index 00000000..35658f31 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c @@ -0,0 +1,561 @@ +/* + * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c + * + * GK20A Tegra Platform Interface + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../arch/arm/mach-tegra/iomap.h" + +#include "gk20a.h" +#include "hal_gk20a.h" +#include "platform_gk20a.h" +#include "gk20a_scale.h" + +#define TEGRA_GK20A_INTR INT_GPU +#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL + +#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ +#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ + +extern struct device tegra_vpr_dev; +struct gk20a_platform t132_gk20a_tegra_platform; + +struct gk20a_emc_params { + long emc_slope; + long emc_offset; + long emc_dip_slope; + long emc_dip_offset; + long emc_xmid; + bool linear; +}; + +/* + * 20.12 fixed point arithmetic + */ + +static const int FXFRAC = 12; +static const int FX_HALF = (1 << 12) / 2; + +#define INT_TO_FX(x) ((x) << FXFRAC) +#define FX_TO_INT(x) ((x) >> FXFRAC) + +#define MHZ_TO_HZ(x) ((x) * 1000000) +#define HZ_TO_MHZ(x) ((x) / 1000000) + +int FXMUL(int x, int y) +{ + return ((long long) x * (long long) y) >> FXFRAC; +} + +int FXDIV(int x, int y) +{ + /* long long div operation not supported, must shift manually. This + * would have been + * + * return (((long long) x) << FXFRAC) / (long long) y; + */ + int pos, t; + if (x == 0) + return 0; + + /* find largest allowable right shift to numerator, limit to FXFRAC */ + t = x < 0 ? -x : x; + pos = 31 - fls(t); /* fls can't be 32 if x != 0 */ + if (pos > FXFRAC) + pos = FXFRAC; + + y >>= FXFRAC - pos; + if (y == 0) + return 0x7FFFFFFF; /* overflow, return MAX_FIXED */ + + return (x << pos) / y; +} + +static int gk20a_tegra_channel_busy(struct platform_device *dev) +{ + int ret = 0; + + /* Explicitly turn on the host1x clocks + * - This is needed as host1x driver sets ignore_children = true + * to cater the use case of display clock ON but host1x clock OFF + * in OS-Idle-Display-ON case + * - This was easily done in ACM as it only checked the ref count + * of host1x (or any device for that matter) to be zero before + * turning off its clock + * - However, runtime PM checks to see if *ANY* child of device is + * in ACTIVE state and if yes, it doesn't suspend the parent. As a + * result of this, display && host1x clocks remains ON during + * OS-Idle-Display-ON case + * - The code below fixes this use-case + */ + if (to_platform_device(dev->dev.parent)) + ret = nvhost_module_busy_ext( + to_platform_device(dev->dev.parent)); + + return ret; +} + +static void gk20a_tegra_channel_idle(struct platform_device *dev) +{ + /* Explicitly turn off the host1x clocks */ + if (to_platform_device(dev->dev.parent)) + nvhost_module_idle_ext(to_platform_device(dev->dev.parent)); +} + +static void gk20a_tegra_secure_destroy(struct platform_device *pdev, + struct gr_ctx_buffer_desc *desc) +{ + gk20a_free_sgtable(&desc->sgt); + dma_free_attrs(&tegra_vpr_dev, desc->size, + (void *)(uintptr_t)&desc->iova, + desc->iova, &desc->attrs); +} + +static int gk20a_tegra_secure_alloc(struct platform_device *pdev, + struct gr_ctx_buffer_desc *desc, + size_t size) +{ + struct device *dev = &pdev->dev; + DEFINE_DMA_ATTRS(attrs); + dma_addr_t iova; + struct sg_table *sgt; + struct page *page; + int err = 0; + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + + (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova, + GFP_KERNEL, &attrs); + if (dma_mapping_error(&tegra_vpr_dev, iova)) + return -ENOMEM; + + desc->iova = iova; + desc->size = size; + desc->attrs = attrs; + desc->destroy = gk20a_tegra_secure_destroy; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + gk20a_err(dev, "failed to allocate memory\n"); + goto fail; + } + err = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (err) { + gk20a_err(dev, "failed to allocate sg_table\n"); + goto fail_sgt; + } + page = phys_to_page(iova); + sg_set_page(sgt->sgl, page, size, 0); + sg_dma_address(sgt->sgl) = iova; + + desc->sgt = sgt; + + return err; + +fail_sgt: + kfree(sgt); +fail: + dma_free_attrs(&tegra_vpr_dev, desc->size, + (void *)(uintptr_t)&desc->iova, + desc->iova, &desc->attrs); + return err; +} + +/* + * gk20a_tegra_get_emc_rate() + * + * This function returns the minimum emc clock based on gpu frequency + */ + +long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) +{ + long hz; + + freq = INT_TO_FX(HZ_TO_MHZ(freq)); + hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset; + + hz -= FXMUL(emc_params->emc_dip_slope, + FXMUL(freq - emc_params->emc_xmid, + freq - emc_params->emc_xmid)) + + emc_params->emc_dip_offset; + + hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ + hz = (hz < 0) ? 0 : hz; + + return hz; +} + +/* + * gk20a_tegra_postscale(profile, freq) + * + * This function sets emc frequency based on current gpu frequency + */ + +static void gk20a_tegra_postscale(struct platform_device *pdev, + unsigned long freq) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *emc_params = profile->private_data; + struct gk20a *g = get_gk20a(pdev); + + long after = gk20a_clk_get_rate(g); + long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); + + clk_set_rate(platform->clk[2], emc_target); +} + +/* + * gk20a_tegra_prescale(profile, freq) + * + * This function informs EDP about changed constraints. + */ + +static void gk20a_tegra_prescale(struct platform_device *pdev) +{ + struct gk20a *g = get_gk20a(pdev); + u32 avg = 0; + + gk20a_pmu_load_norm(g, &avg); + tegra_edp_notify_gpu_load(avg); +} + +/* + * gk20a_tegra_calibrate_emc() + * + * Compute emc scaling parameters + * + * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) + * + * Remc - 3d.emc rate + * R3d - 3d.cbus rate + * Rm - 3d.cbus 'middle' rate = (max + min)/2 + * S - emc_slope + * O - emc_offset + * Sd - emc_dip_slope + * Od - emc_dip_offset + * + * this superposes a quadratic dip centered around the middle 3d + * frequency over a linear correlation of 3d.emc to 3d clock + * rates. + * + * S, O are chosen so that the maximum 3d rate produces the + * maximum 3d.emc rate exactly, and the minimum 3d rate produces + * at least the minimum 3d.emc rate. + * + * Sd and Od are chosen to produce the largest dip that will + * keep 3d.emc frequencies monotonously decreasing with 3d + * frequencies. To achieve this, the first derivative of Remc + * with respect to R3d should be zero for the minimal 3d rate: + * + * R'emc = S - 2 * Sd * (R3d - Rm) + * R'emc(R3d-min) = 0 + * S = 2 * Sd * (R3d-min - Rm) + * = 2 * Sd * (R3d-min - R3d-max) / 2 + * + * +------------------------------+ + * | Sd = S / (R3d-min - R3d-max) | + * +------------------------------+ + * + * dip = Sd * (R3d - Rm)^2 + Od + * + * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives + * + * Sd * (R3d-min - Rm)^2 + Od = 0 + * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 + * = -Sd * ((R3d-min - R3d-max)^2) / 4 + * + * +------------------------------+ + * | Od = (emc-max - emc-min) / 4 | + * +------------------------------+ + * + */ + +void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, + struct clk *clk_3d, struct clk *clk_3d_emc) +{ + long correction; + unsigned long max_emc; + unsigned long min_emc; + unsigned long min_rate_3d; + unsigned long max_rate_3d; + + max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); + max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); + + min_emc = clk_round_rate(clk_3d_emc, 0); + min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); + + max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); + max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); + + min_rate_3d = clk_round_rate(clk_3d, 0); + min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); + + emc_params->emc_slope = + FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); + emc_params->emc_offset = max_emc - + FXMUL(emc_params->emc_slope, max_rate_3d); + /* Guarantee max 3d rate maps to max emc rate */ + emc_params->emc_offset += max_emc - + (FXMUL(emc_params->emc_slope, max_rate_3d) + + emc_params->emc_offset); + + emc_params->emc_dip_offset = (max_emc - min_emc) / 4; + emc_params->emc_dip_slope = + -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d); + emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2; + correction = + emc_params->emc_dip_offset + + FXMUL(emc_params->emc_dip_slope, + FXMUL(max_rate_3d - emc_params->emc_xmid, + max_rate_3d - emc_params->emc_xmid)); + emc_params->emc_dip_offset -= correction; +} + +/* + * gk20a_tegra_railgate() + * + * Gate (disable) gk20a power rail + */ + +static int gk20a_tegra_railgate(struct platform_device *pdev) +{ + if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) + tegra_powergate_partition(TEGRA_POWERGATE_GPU); + return 0; +} + +/* + * gk20a_tegra_unrailgate() + * + * Ungate (enable) gk20a power rail + */ + +static int gk20a_tegra_unrailgate(struct platform_device *pdev) +{ + tegra_unpowergate_partition(TEGRA_POWERGATE_GPU); + return 0; +} + +struct { + char *name; + unsigned long default_rate; +} tegra_gk20a_clocks[] = { + {"PLLG_ref", UINT_MAX}, + {"pwr", 204000000}, + {"emc", UINT_MAX} }; + +/* + * gk20a_tegra_get_clocks() + * + * This function finds clocks in tegra platform and populates + * the clock information to gk20a platform data. + */ + +static int gk20a_tegra_get_clocks(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + char devname[16]; + int i; + int ret = 0; + + snprintf(devname, sizeof(devname), + (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n", + pdev->name, pdev->id); + + platform->num_clks = 0; + for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { + long rate = tegra_gk20a_clocks[i].default_rate; + struct clk *c; + + c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); + if (IS_ERR(c)) { + ret = PTR_ERR(c); + goto err_get_clock; + } + rate = clk_round_rate(c, rate); + clk_set_rate(c, rate); + platform->clk[i] = c; + } + platform->num_clks = i; + + return 0; + +err_get_clock: + + while (i--) + clk_put(platform->clk[i]); + return ret; +} + +static void gk20a_tegra_scale_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *emc_params; + + if (!profile) + return; + + emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL); + if (!emc_params) + return; + + gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), + platform->clk[2]); + + profile->private_data = emc_params; +} + +static void gk20a_tegra_debug_dump(struct platform_device *pdev) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a *g = platform->g; + nvhost_debug_dump_device(g->dev); +} + +static int gk20a_tegra_probe(struct platform_device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) { + t132_gk20a_tegra_platform.g = platform->g; + *platform = t132_gk20a_tegra_platform; + } + + gk20a_tegra_get_clocks(dev); + + return 0; +} + +static int gk20a_tegra_late_probe(struct platform_device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + /* Make gk20a power domain a subdomain of mc */ + tegra_pd_add_sd(&platform->g->pd); + + /* Initialise tegra specific scaling quirks */ + gk20a_tegra_scale_init(dev); + + return 0; +} + +static int gk20a_tegra_suspend(struct device *dev) +{ + tegra_edp_notify_gpu_load(0); + return 0; +} + +static struct resource gk20a_tegra_resources[] = { + { + .start = TEGRA_GK20A_BAR0_BASE, + .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = TEGRA_GK20A_BAR1_BASE, + .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + { /* Used on ASIM only */ + .start = TEGRA_GK20A_SIM_BASE, + .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = TEGRA_GK20A_INTR, + .end = TEGRA_GK20A_INTR, + .flags = IORESOURCE_IRQ, + }, + { + .start = TEGRA_GK20A_INTR_NONSTALL, + .end = TEGRA_GK20A_INTR_NONSTALL, + .flags = IORESOURCE_IRQ, + }, +}; + +struct gk20a_platform t132_gk20a_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .railgate_delay = 500, + .clockgate_delay = 50, + + .probe = gk20a_tegra_probe, + .late_probe = gk20a_tegra_late_probe, + + /* power management callbacks */ + .suspend = gk20a_tegra_suspend, + .railgate = gk20a_tegra_railgate, + .unrailgate = gk20a_tegra_unrailgate, + + /* frequency scaling configuration */ + .prescale = gk20a_tegra_prescale, + .postscale = gk20a_tegra_postscale, + .devfreq_governor = "nvhost_podgov", + .qos_id = PM_QOS_GPU_FREQ_MIN, + + .channel_busy = gk20a_tegra_channel_busy, + .channel_idle = gk20a_tegra_channel_idle, + .secure_alloc = gk20a_tegra_secure_alloc, + .dump_platform_dependencies = gk20a_tegra_debug_dump, +}; + +struct gk20a_platform gk20a_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .railgate_delay = 500, + .clockgate_delay = 50, + .can_railgate = true, + + .probe = gk20a_tegra_probe, + .late_probe = gk20a_tegra_late_probe, + + /* power management callbacks */ + .suspend = gk20a_tegra_suspend, + .railgate = gk20a_tegra_railgate, + .unrailgate = gk20a_tegra_unrailgate, + + /* frequency scaling configuration */ + .prescale = gk20a_tegra_prescale, + .postscale = gk20a_tegra_postscale, + .devfreq_governor = "nvhost_podgov", + .qos_id = PM_QOS_GPU_FREQ_MIN, + + .channel_busy = gk20a_tegra_channel_busy, + .channel_idle = gk20a_tegra_channel_idle, + .secure_alloc = gk20a_tegra_secure_alloc, + .dump_platform_dependencies = gk20a_tegra_debug_dump, +}; + +struct platform_device tegra_gk20a_device = { + .name = "gk20a", + .resource = gk20a_tegra_resources, + .num_resources = ARRAY_SIZE(gk20a_tegra_resources), + .dev = { + .platform_data = &gk20a_tegra_platform, + }, +}; diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c new file mode 100644 index 00000000..a00499a9 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -0,0 +1,3796 @@ +/* + * drivers/video/tegra/host/gk20a/pmu_gk20a.c + * + * GK20A PMU (aka. gPMU outside gk20a context) + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include /* for mdelay */ +#include +#include +#include +#include +#include + +#include "gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_pwr_gk20a.h" +#include "hw_top_gk20a.h" + +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" + +#define gk20a_dbg_pmu(fmt, arg...) \ + gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) + +static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu); +static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, + u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt); +static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work); +static void pmu_save_zbc(struct gk20a *g, u32 entries); +static void ap_callback_init_and_enable_ctrl( + struct gk20a *g, struct pmu_msg *msg, + void *param, u32 seq_desc, u32 status); +static int gk20a_pmu_ap_send_command(struct gk20a *g, + union pmu_ap_cmd *p_ap_cmd, bool b_block); + +static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu) +{ + return sizeof(struct pmu_cmdline_args_v0); +} + +static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu) +{ + return sizeof(struct pmu_cmdline_args_v1); +} + +static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq) +{ + pmu->args_v1.cpu_freq_hz = freq; +} + +static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq) +{ + pmu->args_v0.cpu_freq_hz = freq; +} + +static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu) +{ + return (void *)(&pmu->args_v1); +} + +static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu) +{ + return (void *)(&pmu->args_v0); +} + +static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu) +{ + return sizeof(struct pmu_allocation_v1); +} + +static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu) +{ + return sizeof(struct pmu_allocation_v0); +} + +static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu, + void **pmu_alloc_ptr, void *assign_ptr) +{ + struct pmu_allocation_v1 **pmu_a_ptr = + (struct pmu_allocation_v1 **)pmu_alloc_ptr; + *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr; +} + +static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu, + void **pmu_alloc_ptr, void *assign_ptr) +{ + struct pmu_allocation_v0 **pmu_a_ptr = + (struct pmu_allocation_v0 **)pmu_alloc_ptr; + *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr; +} + +static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u16 size) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + pmu_a_ptr->alloc.dmem.size = size; +} + +static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u16 size) +{ + struct pmu_allocation_v0 *pmu_a_ptr = + (struct pmu_allocation_v0 *)pmu_alloc_ptr; + pmu_a_ptr->alloc.dmem.size = size; +} + +static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + return pmu_a_ptr->alloc.dmem.size; +} + +static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v0 *pmu_a_ptr = + (struct pmu_allocation_v0 *)pmu_alloc_ptr; + return pmu_a_ptr->alloc.dmem.size; +} + +static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + return pmu_a_ptr->alloc.dmem.offset; +} + +static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v0 *pmu_a_ptr = + (struct pmu_allocation_v0 *)pmu_alloc_ptr; + return pmu_a_ptr->alloc.dmem.offset; +} + +static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + return &pmu_a_ptr->alloc.dmem.offset; +} + +static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v0 *pmu_a_ptr = + (struct pmu_allocation_v0 *)pmu_alloc_ptr; + return &pmu_a_ptr->alloc.dmem.offset; +} + +static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u32 offset) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + pmu_a_ptr->alloc.dmem.offset = offset; +} + +static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu, + void *pmu_alloc_ptr, u32 offset) +{ + struct pmu_allocation_v0 *pmu_a_ptr = + (struct pmu_allocation_v0 *)pmu_alloc_ptr; + pmu_a_ptr->alloc.dmem.offset = offset; +} + +static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init) +{ + return (void *)(&(init->pmu_init_v1)); +} + +static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); + return init->sw_managed_area_offset; +} + +static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); + return init->sw_managed_area_size; +} + +static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init) +{ + return (void *)(&(init->pmu_init_v0)); +} + +static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v0 *init = + (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0); + return init->sw_managed_area_offset; +} + +static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v0 *init = + (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0); + return init->sw_managed_area_size; +} + +static u32 get_pmu_perfmon_cmd_start_size_v1(void) +{ + return sizeof(struct pmu_perfmon_cmd_start_v1); +} + +static u32 get_pmu_perfmon_cmd_start_size_v0(void) +{ + return sizeof(struct pmu_perfmon_cmd_start_v0); +} + +static int get_perfmon_cmd_start_offsetofvar_v1( + enum pmu_perfmon_cmd_start_fields field) +{ + switch (field) { + case COUNTER_ALLOC: + return offsetof(struct pmu_perfmon_cmd_start_v1, + counter_alloc); + default: + return -EINVAL; + break; + } + return 0; +} + +static int get_perfmon_cmd_start_offsetofvar_v0( + enum pmu_perfmon_cmd_start_fields field) +{ + switch (field) { + case COUNTER_ALLOC: + return offsetof(struct pmu_perfmon_cmd_start_v0, + counter_alloc); + default: + return -EINVAL; + break; + } + return 0; +} + +static u32 get_pmu_perfmon_cmd_init_size_v1(void) +{ + return sizeof(struct pmu_perfmon_cmd_init_v1); +} + +static u32 get_pmu_perfmon_cmd_init_size_v0(void) +{ + return sizeof(struct pmu_perfmon_cmd_init_v0); +} + +static int get_perfmon_cmd_init_offsetofvar_v1( + enum pmu_perfmon_cmd_start_fields field) +{ + switch (field) { + case COUNTER_ALLOC: + return offsetof(struct pmu_perfmon_cmd_init_v1, + counter_alloc); + default: + return -EINVAL; + break; + } + return 0; +} + +static int get_perfmon_cmd_init_offsetofvar_v0( + enum pmu_perfmon_cmd_start_fields field) +{ + switch (field) { + case COUNTER_ALLOC: + return offsetof(struct pmu_perfmon_cmd_init_v0, + counter_alloc); + default: + return -EINVAL; + break; + } + return 0; +} + +static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + start->cmd_type = value; +} + +static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; + start->cmd_type = value; +} + +static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + start->group_id = value; +} + +static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; + start->group_id = value; +} + +static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + start->state_id = value; +} + +static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; + start->state_id = value; +} + +static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + start->flags = value; +} + +static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value) +{ + struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; + start->flags = value; +} + +static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + return start->flags; +} + +static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc) +{ + struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0; + return start->flags; +} + +static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc, + u16 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->sample_buffer = value; +} + +static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc, + u16 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->sample_buffer = value; +} + +static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->to_decrease_count = value; +} + +static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->to_decrease_count = value; +} + +static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->base_counter_id = value; +} + +static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->base_counter_id = value; +} + +static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc, + u32 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->sample_period_us = value; +} + +static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc, + u32 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->sample_period_us = value; +} + +static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->num_counters = value; +} + +static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->num_counters = value; +} + +static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + init->samples_in_moving_avg = value; +} + +static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0; + init->samples_in_moving_avg = value; +} + +static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue, + u32 id, void *pmu_init_msg) +{ + struct pmu_init_msg_pmu_v0 *init = + (struct pmu_init_msg_pmu_v0 *)pmu_init_msg; + queue->index = init->queue_info[id].index; + queue->offset = init->queue_info[id].offset; + queue->size = init->queue_info[id].size; +} + +static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue, + u32 id, void *pmu_init_msg) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)pmu_init_msg; + queue->index = init->queue_info[id].index; + queue->offset = init->queue_info[id].offset; + queue->size = init->queue_info[id].size; +} + +static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq) +{ + return (void *)(&seq->in_v1); +} + +static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq) +{ + return (void *)(&seq->in_v0); +} + +static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq) +{ + return (void *)(&seq->out_v1); +} + +static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq) +{ + return (void *)(&seq->out_v0); +} + +static int gk20a_init_pmu(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + switch (pmu->desc->app_version) { + case APP_VERSION_1: + g->ops.pmu_ver.cmd_id_zbc_table_update = 16; + g->ops.pmu_ver.get_pmu_cmdline_args_size = + pmu_cmdline_size_v1; + g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = + set_pmu_cmdline_args_cpufreq_v1; + g->ops.pmu_ver.get_pmu_cmdline_args_ptr = + get_pmu_cmdline_args_ptr_v1; + g->ops.pmu_ver.get_pmu_allocation_struct_size = + get_pmu_allocation_size_v1; + g->ops.pmu_ver.set_pmu_allocation_ptr = + set_pmu_allocation_ptr_v1; + g->ops.pmu_ver.pmu_allocation_set_dmem_size = + pmu_allocation_set_dmem_size_v1; + g->ops.pmu_ver.pmu_allocation_get_dmem_size = + pmu_allocation_get_dmem_size_v1; + g->ops.pmu_ver.pmu_allocation_get_dmem_offset = + pmu_allocation_get_dmem_offset_v1; + g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr = + pmu_allocation_get_dmem_offset_addr_v1; + g->ops.pmu_ver.pmu_allocation_set_dmem_offset = + pmu_allocation_set_dmem_offset_v1; + g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = + get_pmu_init_msg_pmu_queue_params_v1; + g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr = + get_pmu_msg_pmu_init_msg_ptr_v1; + g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off = + get_pmu_init_msg_pmu_sw_mg_off_v1; + g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size = + get_pmu_init_msg_pmu_sw_mg_size_v1; + g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size = + get_pmu_perfmon_cmd_start_size_v1; + g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar = + get_perfmon_cmd_start_offsetofvar_v1; + g->ops.pmu_ver.perfmon_start_set_cmd_type = + perfmon_start_set_cmd_type_v1; + g->ops.pmu_ver.perfmon_start_set_group_id = + perfmon_start_set_group_id_v1; + g->ops.pmu_ver.perfmon_start_set_state_id = + perfmon_start_set_state_id_v1; + g->ops.pmu_ver.perfmon_start_set_flags = + perfmon_start_set_flags_v1; + g->ops.pmu_ver.perfmon_start_get_flags = + perfmon_start_get_flags_v1; + g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size = + get_pmu_perfmon_cmd_init_size_v1; + g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar = + get_perfmon_cmd_init_offsetofvar_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer = + perfmon_cmd_init_set_sample_buffer_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt = + perfmon_cmd_init_set_dec_cnt_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id = + perfmon_cmd_init_set_base_cnt_id_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us = + perfmon_cmd_init_set_samp_period_us_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt = + perfmon_cmd_init_set_num_cnt_v1; + g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg = + perfmon_cmd_init_set_mov_avg_v1; + g->ops.pmu_ver.get_pmu_seq_in_a_ptr = + get_pmu_sequence_in_alloc_ptr_v1; + g->ops.pmu_ver.get_pmu_seq_out_a_ptr = + get_pmu_sequence_out_alloc_ptr_v1; + break; + case APP_VERSION_0: + g->ops.pmu_ver.cmd_id_zbc_table_update = 14; + g->ops.pmu_ver.get_pmu_cmdline_args_size = + pmu_cmdline_size_v0; + g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = + set_pmu_cmdline_args_cpufreq_v0; + g->ops.pmu_ver.get_pmu_cmdline_args_ptr = + get_pmu_cmdline_args_ptr_v0; + g->ops.pmu_ver.get_pmu_allocation_struct_size = + get_pmu_allocation_size_v0; + g->ops.pmu_ver.set_pmu_allocation_ptr = + set_pmu_allocation_ptr_v0; + g->ops.pmu_ver.pmu_allocation_set_dmem_size = + pmu_allocation_set_dmem_size_v0; + g->ops.pmu_ver.pmu_allocation_get_dmem_size = + pmu_allocation_get_dmem_size_v0; + g->ops.pmu_ver.pmu_allocation_get_dmem_offset = + pmu_allocation_get_dmem_offset_v0; + g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr = + pmu_allocation_get_dmem_offset_addr_v0; + g->ops.pmu_ver.pmu_allocation_set_dmem_offset = + pmu_allocation_set_dmem_offset_v0; + g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params = + get_pmu_init_msg_pmu_queue_params_v0; + g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr = + get_pmu_msg_pmu_init_msg_ptr_v0; + g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off = + get_pmu_init_msg_pmu_sw_mg_off_v0; + g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size = + get_pmu_init_msg_pmu_sw_mg_size_v0; + g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size = + get_pmu_perfmon_cmd_start_size_v0; + g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar = + get_perfmon_cmd_start_offsetofvar_v0; + g->ops.pmu_ver.perfmon_start_set_cmd_type = + perfmon_start_set_cmd_type_v0; + g->ops.pmu_ver.perfmon_start_set_group_id = + perfmon_start_set_group_id_v0; + g->ops.pmu_ver.perfmon_start_set_state_id = + perfmon_start_set_state_id_v0; + g->ops.pmu_ver.perfmon_start_set_flags = + perfmon_start_set_flags_v0; + g->ops.pmu_ver.perfmon_start_get_flags = + perfmon_start_get_flags_v0; + g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size = + get_pmu_perfmon_cmd_init_size_v0; + g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar = + get_perfmon_cmd_init_offsetofvar_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer = + perfmon_cmd_init_set_sample_buffer_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt = + perfmon_cmd_init_set_dec_cnt_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id = + perfmon_cmd_init_set_base_cnt_id_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us = + perfmon_cmd_init_set_samp_period_us_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt = + perfmon_cmd_init_set_num_cnt_v0; + g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg = + perfmon_cmd_init_set_mov_avg_v0; + g->ops.pmu_ver.get_pmu_seq_in_a_ptr = + get_pmu_sequence_in_alloc_ptr_v0; + g->ops.pmu_ver.get_pmu_seq_out_a_ptr = + get_pmu_sequence_out_alloc_ptr_v0; + break; + default: + gk20a_err(dev_from_gk20a(pmu->g), + "PMU code version not supported\n"); + return -EINVAL; + break; + } + return 0; +} + +static void pmu_copy_from_dmem(struct pmu_gk20a *pmu, + u32 src, u8 *dst, u32 size, u8 port) +{ + struct gk20a *g = pmu->g; + u32 i, words, bytes; + u32 data, addr_mask; + u32 *dst_u32 = (u32*)dst; + + if (size == 0) { + gk20a_err(dev_from_gk20a(g), + "size is zero"); + return; + } + + if (src & 0x3) { + gk20a_err(dev_from_gk20a(g), + "src (0x%08x) not 4-byte aligned", src); + return; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = pwr_falcon_dmemc_offs_m() | + pwr_falcon_dmemc_blk_m(); + + src &= addr_mask; + + gk20a_writel(g, pwr_falcon_dmemc_r(port), + src | pwr_falcon_dmemc_aincr_f(1)); + + for (i = 0; i < words; i++) + dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port)); + + if (bytes > 0) { + data = gk20a_readl(g, pwr_falcon_dmemd_r(port)); + for (i = 0; i < bytes; i++) { + dst[(words << 2) + i] = ((u8 *)&data)[i]; + gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x", + i, dst[(words << 2) + i]); + } + } + mutex_unlock(&pmu->pmu_copy_lock); + return; +} + +static void pmu_copy_to_dmem(struct pmu_gk20a *pmu, + u32 dst, u8 *src, u32 size, u8 port) +{ + struct gk20a *g = pmu->g; + u32 i, words, bytes; + u32 data, addr_mask; + u32 *src_u32 = (u32*)src; + + if (size == 0) { + gk20a_err(dev_from_gk20a(g), + "size is zero"); + return; + } + + if (dst & 0x3) { + gk20a_err(dev_from_gk20a(g), + "dst (0x%08x) not 4-byte aligned", dst); + return; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = pwr_falcon_dmemc_offs_m() | + pwr_falcon_dmemc_blk_m(); + + dst &= addr_mask; + + gk20a_writel(g, pwr_falcon_dmemc_r(port), + dst | pwr_falcon_dmemc_aincw_f(1)); + + for (i = 0; i < words; i++) + gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]); + + if (bytes > 0) { + data = 0; + for (i = 0; i < bytes; i++) + ((u8 *)&data)[i] = src[(words << 2) + i]; + gk20a_writel(g, pwr_falcon_dmemd_r(port), data); + } + + data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask; + size = ALIGN(size, 4); + if (data != dst + size) { + gk20a_err(dev_from_gk20a(g), + "copy failed. bytes written %d, expected %d", + data - dst, size); + } + mutex_unlock(&pmu->pmu_copy_lock); + return; +} + +static int pmu_idle(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(2000); + u32 idle_stat; + + /* wait for pmu idle */ + do { + idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r()); + + if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 && + pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) { + break; + } + + if (time_after_eq(jiffies, end_jiffies)) { + gk20a_err(dev_from_gk20a(g), + "timeout waiting pmu idle : 0x%08x", + idle_stat); + return -EBUSY; + } + usleep_range(100, 200); + } while (1); + + gk20a_dbg_fn("done"); + return 0; +} + +static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable) +{ + struct gk20a *g = pmu->g; + + gk20a_dbg_fn(""); + + gk20a_writel(g, mc_intr_mask_0_r(), + gk20a_readl(g, mc_intr_mask_0_r()) & + ~mc_intr_mask_0_pmu_enabled_f()); + gk20a_writel(g, mc_intr_mask_1_r(), + gk20a_readl(g, mc_intr_mask_1_r()) & + ~mc_intr_mask_1_pmu_enabled_f()); + + gk20a_writel(g, pwr_falcon_irqmclr_r(), + pwr_falcon_irqmclr_gptmr_f(1) | + pwr_falcon_irqmclr_wdtmr_f(1) | + pwr_falcon_irqmclr_mthd_f(1) | + pwr_falcon_irqmclr_ctxsw_f(1) | + pwr_falcon_irqmclr_halt_f(1) | + pwr_falcon_irqmclr_exterr_f(1) | + pwr_falcon_irqmclr_swgen0_f(1) | + pwr_falcon_irqmclr_swgen1_f(1) | + pwr_falcon_irqmclr_ext_f(0xff)); + + if (enable) { + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */ + gk20a_writel(g, pwr_falcon_irqdest_r(), + pwr_falcon_irqdest_host_gptmr_f(0) | + pwr_falcon_irqdest_host_wdtmr_f(1) | + pwr_falcon_irqdest_host_mthd_f(0) | + pwr_falcon_irqdest_host_ctxsw_f(0) | + pwr_falcon_irqdest_host_halt_f(1) | + pwr_falcon_irqdest_host_exterr_f(0) | + pwr_falcon_irqdest_host_swgen0_f(1) | + pwr_falcon_irqdest_host_swgen1_f(0) | + pwr_falcon_irqdest_host_ext_f(0xff) | + pwr_falcon_irqdest_target_gptmr_f(1) | + pwr_falcon_irqdest_target_wdtmr_f(0) | + pwr_falcon_irqdest_target_mthd_f(0) | + pwr_falcon_irqdest_target_ctxsw_f(0) | + pwr_falcon_irqdest_target_halt_f(0) | + pwr_falcon_irqdest_target_exterr_f(0) | + pwr_falcon_irqdest_target_swgen0_f(0) | + pwr_falcon_irqdest_target_swgen1_f(0) | + pwr_falcon_irqdest_target_ext_f(0xff)); + + /* 0=disable, 1=enable */ + gk20a_writel(g, pwr_falcon_irqmset_r(), + pwr_falcon_irqmset_gptmr_f(1) | + pwr_falcon_irqmset_wdtmr_f(1) | + pwr_falcon_irqmset_mthd_f(0) | + pwr_falcon_irqmset_ctxsw_f(0) | + pwr_falcon_irqmset_halt_f(1) | + pwr_falcon_irqmset_exterr_f(1) | + pwr_falcon_irqmset_swgen0_f(1) | + pwr_falcon_irqmset_swgen1_f(1)); + + gk20a_writel(g, mc_intr_mask_0_r(), + gk20a_readl(g, mc_intr_mask_0_r()) | + mc_intr_mask_0_pmu_enabled_f()); + } + + gk20a_dbg_fn("done"); +} + +static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable) +{ + struct gk20a *g = pmu->g; + + gk20a_dbg_fn(""); + + if (enable) { + int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT; + gk20a_enable(g, mc_enable_pwr_enabled_f()); + + do { + u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) & + (pwr_falcon_dmactl_dmem_scrubbing_m() | + pwr_falcon_dmactl_imem_scrubbing_m()); + + if (!w) { + gk20a_dbg_fn("done"); + return 0; + } + udelay(GR_IDLE_CHECK_DEFAULT); + } while (--retries || !tegra_platform_is_silicon()); + + gk20a_disable(g, mc_enable_pwr_enabled_f()); + gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout"); + + return -ETIMEDOUT; + } else { + gk20a_disable(g, mc_enable_pwr_enabled_f()); + return 0; + } +} + +static int pmu_enable(struct pmu_gk20a *pmu, bool enable) +{ + struct gk20a *g = pmu->g; + u32 pmc_enable; + int err; + + gk20a_dbg_fn(""); + + if (!enable) { + pmc_enable = gk20a_readl(g, mc_enable_r()); + if (mc_enable_pwr_v(pmc_enable) != + mc_enable_pwr_disabled_v()) { + + pmu_enable_irq(pmu, false); + pmu_enable_hw(pmu, false); + } + } else { + err = pmu_enable_hw(pmu, true); + if (err) + return err; + + /* TBD: post reset */ + + err = pmu_idle(pmu); + if (err) + return err; + + pmu_enable_irq(pmu, true); + } + + gk20a_dbg_fn("done"); + return 0; +} + +static int pmu_reset(struct pmu_gk20a *pmu) +{ + int err; + + err = pmu_idle(pmu); + if (err) + return err; + + /* TBD: release pmu hw mutex */ + + err = pmu_enable(pmu, false); + if (err) + return err; + + /* TBD: cancel all sequences */ + /* TBD: init all sequences and state tables */ + /* TBD: restore pre-init message handler */ + + err = pmu_enable(pmu, true); + if (err) + return err; + + return 0; +} + +static int pmu_bootstrap(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct gk20a_platform *platform = platform_get_drvdata(g->dev); + struct mm_gk20a *mm = &g->mm; + struct pmu_ucode_desc *desc = pmu->desc; + u64 addr_code, addr_data, addr_load; + u32 i, blocks, addr_args; + + gk20a_dbg_fn(""); + + gk20a_writel(g, pwr_falcon_itfen_r(), + gk20a_readl(g, pwr_falcon_itfen_r()) | + pwr_falcon_itfen_ctxen_enable_f()); + gk20a_writel(g, pwr_pmu_new_instblk_r(), + pwr_pmu_new_instblk_ptr_f( + mm->pmu.inst_block.cpu_pa >> 12) | + pwr_pmu_new_instblk_valid_f(1) | + pwr_pmu_new_instblk_target_sys_coh_f()); + + /* TBD: load all other surfaces */ + + g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu, + clk_get_rate(platform->clk[1])); + + addr_args = (pwr_falcon_hwcfg_dmem_size_v( + gk20a_readl(g, pwr_falcon_hwcfg_r())) + << GK20A_PMU_DMEM_BLKSIZE2) - + g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); + + pmu_copy_to_dmem(pmu, addr_args, + (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)), + g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0); + + gk20a_writel(g, pwr_falcon_dmemc_r(0), + pwr_falcon_dmemc_offs_f(0) | + pwr_falcon_dmemc_blk_f(0) | + pwr_falcon_dmemc_aincw_f(1)); + + addr_code = u64_lo32((pmu->ucode.pmu_va + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8) ; + addr_data = u64_lo32((pmu->ucode.pmu_va + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8); + addr_load = u64_lo32((pmu->ucode.pmu_va + + desc->bootloader_start_offset) >> 8); + + gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); + gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); + gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size); + gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size); + gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry); + gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data); + gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size); + gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code); + gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1); + gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args); + + gk20a_writel(g, pwr_falcon_dmatrfbase_r(), + addr_load - (desc->bootloader_imem_offset >> 8)); + + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; + + for (i = 0; i < blocks; i++) { + gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(), + desc->bootloader_imem_offset + (i << 8)); + gk20a_writel(g, pwr_falcon_dmatrffboffs_r(), + desc->bootloader_imem_offset + (i << 8)); + gk20a_writel(g, pwr_falcon_dmatrfcmd_r(), + pwr_falcon_dmatrfcmd_imem_f(1) | + pwr_falcon_dmatrfcmd_write_f(0) | + pwr_falcon_dmatrfcmd_size_f(6) | + pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE)); + } + + gk20a_writel(g, pwr_falcon_bootvec_r(), + pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point)); + + gk20a_writel(g, pwr_falcon_cpuctl_r(), + pwr_falcon_cpuctl_startcpu_f(1)); + + gk20a_writel(g, pwr_falcon_os_r(), desc->app_version); + + return 0; +} + +static void pmu_seq_init(struct pmu_gk20a *pmu) +{ + u32 i; + + memset(pmu->seq, 0, + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); + memset(pmu->pmu_seq_tbl, 0, + sizeof(pmu->pmu_seq_tbl)); + + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) + pmu->seq[i].id = i; +} + +static int pmu_seq_acquire(struct pmu_gk20a *pmu, + struct pmu_sequence **pseq) +{ + struct gk20a *g = pmu->g; + struct pmu_sequence *seq; + u32 index; + + mutex_lock(&pmu->pmu_seq_lock); + index = find_first_zero_bit(pmu->pmu_seq_tbl, + sizeof(pmu->pmu_seq_tbl)); + if (index >= sizeof(pmu->pmu_seq_tbl)) { + gk20a_err(dev_from_gk20a(g), + "no free sequence available"); + mutex_unlock(&pmu->pmu_seq_lock); + return -EAGAIN; + } + set_bit(index, pmu->pmu_seq_tbl); + mutex_unlock(&pmu->pmu_seq_lock); + + seq = &pmu->seq[index]; + seq->state = PMU_SEQ_STATE_PENDING; + + *pseq = seq; + return 0; +} + +static void pmu_seq_release(struct pmu_gk20a *pmu, + struct pmu_sequence *seq) +{ + struct gk20a *g = pmu->g; + seq->state = PMU_SEQ_STATE_FREE; + seq->desc = PMU_INVALID_SEQ_DESC; + seq->callback = NULL; + seq->cb_params = NULL; + seq->msg = NULL; + seq->out_payload = NULL; + g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, + g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0); + g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu, + g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0); + + clear_bit(seq->id, pmu->pmu_seq_tbl); +} + +static int pmu_queue_init(struct pmu_gk20a *pmu, + u32 id, union pmu_init_msg_pmu *init) +{ + struct gk20a *g = pmu->g; + struct pmu_queue *queue = &pmu->queue[id]; + queue->id = id; + g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init); + + queue->mutex_id = id; + mutex_init(&queue->mutex); + + gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x", + id, queue->index, queue->offset, queue->size); + + return 0; +} + +static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue, + u32 *head, bool set) +{ + struct gk20a *g = pmu->g; + + BUG_ON(!head); + + if (PMU_IS_COMMAND_QUEUE(queue->id)) { + + if (queue->index >= pwr_pmu_queue_head__size_1_v()) + return -EINVAL; + + if (!set) + *head = pwr_pmu_queue_head_address_v( + gk20a_readl(g, + pwr_pmu_queue_head_r(queue->index))); + else + gk20a_writel(g, + pwr_pmu_queue_head_r(queue->index), + pwr_pmu_queue_head_address_f(*head)); + } else { + if (!set) + *head = pwr_pmu_msgq_head_val_v( + gk20a_readl(g, pwr_pmu_msgq_head_r())); + else + gk20a_writel(g, + pwr_pmu_msgq_head_r(), + pwr_pmu_msgq_head_val_f(*head)); + } + + return 0; +} + +static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue, + u32 *tail, bool set) +{ + struct gk20a *g = pmu->g; + + BUG_ON(!tail); + + if (PMU_IS_COMMAND_QUEUE(queue->id)) { + + if (queue->index >= pwr_pmu_queue_tail__size_1_v()) + return -EINVAL; + + if (!set) + *tail = pwr_pmu_queue_tail_address_v( + gk20a_readl(g, + pwr_pmu_queue_tail_r(queue->index))); + else + gk20a_writel(g, + pwr_pmu_queue_tail_r(queue->index), + pwr_pmu_queue_tail_address_f(*tail)); + } else { + if (!set) + *tail = pwr_pmu_msgq_tail_val_v( + gk20a_readl(g, pwr_pmu_msgq_tail_r())); + else + gk20a_writel(g, + pwr_pmu_msgq_tail_r(), + pwr_pmu_msgq_tail_val_f(*tail)); + } + + return 0; +} + +static inline void pmu_queue_read(struct pmu_gk20a *pmu, + u32 offset, u8 *dst, u32 size) +{ + pmu_copy_from_dmem(pmu, offset, dst, size, 0); +} + +static inline void pmu_queue_write(struct pmu_gk20a *pmu, + u32 offset, u8 *src, u32 size) +{ + pmu_copy_to_dmem(pmu, offset, src, size, 0); +} + +int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token) +{ + struct gk20a *g = pmu->g; + struct pmu_mutex *mutex; + u32 data, owner, max_retry; + + if (!pmu->initialized) + return 0; + + BUG_ON(!token); + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); + BUG_ON(id > pmu->mutex_cnt); + + mutex = &pmu->mutex[id]; + + owner = pwr_pmu_mutex_value_v( + gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); + + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { + BUG_ON(mutex->ref_cnt == 0); + gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token); + mutex->ref_cnt++; + return 0; + } + + max_retry = 40; + do { + data = pwr_pmu_mutex_id_value_v( + gk20a_readl(g, pwr_pmu_mutex_id_r())); + if (data == pwr_pmu_mutex_id_value_init_v() || + data == pwr_pmu_mutex_id_value_not_avail_v()) { + gk20a_warn(dev_from_gk20a(g), + "fail to generate mutex token: val 0x%08x", + owner); + usleep_range(20, 40); + continue; + } + + owner = data; + gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), + pwr_pmu_mutex_value_f(owner)); + + data = pwr_pmu_mutex_value_v( + gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); + + if (owner == data) { + mutex->ref_cnt = 1; + gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x", + mutex->index, *token); + *token = owner; + return 0; + } else { + gk20a_dbg_info("fail to acquire mutex idx=0x%08x", + mutex->index); + + data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); + data = set_field(data, + pwr_pmu_mutex_id_release_value_m(), + pwr_pmu_mutex_id_release_value_f(owner)); + gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); + + usleep_range(20, 40); + continue; + } + } while (max_retry-- > 0); + + return -EBUSY; +} + +int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token) +{ + struct gk20a *g = pmu->g; + struct pmu_mutex *mutex; + u32 owner, data; + + if (!pmu->initialized) + return 0; + + BUG_ON(!token); + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); + BUG_ON(id > pmu->mutex_cnt); + + mutex = &pmu->mutex[id]; + + owner = pwr_pmu_mutex_value_v( + gk20a_readl(g, pwr_pmu_mutex_r(mutex->index))); + + if (*token != owner) { + gk20a_err(dev_from_gk20a(g), + "requester 0x%08x NOT match owner 0x%08x", + *token, owner); + return -EINVAL; + } + + if (--mutex->ref_cnt == 0) { + gk20a_writel(g, pwr_pmu_mutex_r(mutex->index), + pwr_pmu_mutex_value_initial_lock_f()); + + data = gk20a_readl(g, pwr_pmu_mutex_id_release_r()); + data = set_field(data, pwr_pmu_mutex_id_release_value_m(), + pwr_pmu_mutex_id_release_value_f(owner)); + gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data); + + gk20a_dbg_pmu("mutex released: id=%d, token=0x%x", + mutex->index, *token); + } + + return 0; +} + +static int pmu_queue_lock(struct pmu_gk20a *pmu, + struct pmu_queue *queue) +{ + int err; + + if (PMU_IS_MESSAGE_QUEUE(queue->id)) + return 0; + + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { + mutex_lock(&queue->mutex); + queue->locked = true; + return 0; + } + + err = pmu_mutex_acquire(pmu, queue->mutex_id, + &queue->mutex_lock); + if (err == 0) + queue->locked = true; + + return err; +} + +static int pmu_queue_unlock(struct pmu_gk20a *pmu, + struct pmu_queue *queue) +{ + int err; + + if (PMU_IS_MESSAGE_QUEUE(queue->id)) + return 0; + + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { + mutex_unlock(&queue->mutex); + queue->locked = false; + return 0; + } + + if (queue->locked) { + err = pmu_mutex_release(pmu, queue->mutex_id, + &queue->mutex_lock); + if (err == 0) + queue->locked = false; + } + + return 0; +} + +/* called by pmu_read_message, no lock */ +static bool pmu_queue_is_empty(struct pmu_gk20a *pmu, + struct pmu_queue *queue) +{ + u32 head, tail; + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + if (queue->opened && queue->oflag == OFLAG_READ) + tail = queue->position; + else + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); + + return head == tail; +} + +static bool pmu_queue_has_room(struct pmu_gk20a *pmu, + struct pmu_queue *queue, u32 size, bool *need_rewind) +{ + u32 head, tail, free; + bool rewind = false; + + BUG_ON(!queue->locked); + + size = ALIGN(size, QUEUE_ALIGNMENT); + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); + + if (head >= tail) { + free = queue->offset + queue->size - head; + free -= PMU_CMD_HDR_SIZE; + + if (size > free) { + rewind = true; + head = queue->offset; + } + } + + if (head < tail) + free = tail - head - 1; + + if (need_rewind) + *need_rewind = rewind; + + return size <= free; +} + +static int pmu_queue_push(struct pmu_gk20a *pmu, + struct pmu_queue *queue, void *data, u32 size) +{ + gk20a_dbg_fn(""); + + if (!queue->opened && queue->oflag == OFLAG_WRITE){ + gk20a_err(dev_from_gk20a(pmu->g), + "queue not opened for write"); + return -EINVAL; + } + + pmu_queue_write(pmu, queue->position, data, size); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + return 0; +} + +static int pmu_queue_pop(struct pmu_gk20a *pmu, + struct pmu_queue *queue, void *data, u32 size, + u32 *bytes_read) +{ + u32 head, tail, used; + + *bytes_read = 0; + + if (!queue->opened && queue->oflag == OFLAG_READ){ + gk20a_err(dev_from_gk20a(pmu->g), + "queue not opened for read"); + return -EINVAL; + } + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + tail = queue->position; + + if (head == tail) + return 0; + + if (head > tail) + used = head - tail; + else + used = queue->offset + queue->size - tail; + + if (size > used) { + gk20a_warn(dev_from_gk20a(pmu->g), + "queue size smaller than request read"); + size = used; + } + + pmu_queue_read(pmu, tail, data, size); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + *bytes_read = size; + return 0; +} + +static void pmu_queue_rewind(struct pmu_gk20a *pmu, + struct pmu_queue *queue) +{ + struct pmu_cmd cmd; + + gk20a_dbg_fn(""); + + if (!queue->opened) { + gk20a_err(dev_from_gk20a(pmu->g), + "queue not opened"); + return; + } + + if (queue->oflag == OFLAG_WRITE) { + cmd.hdr.unit_id = PMU_UNIT_REWIND; + cmd.hdr.size = PMU_CMD_HDR_SIZE; + pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); + gk20a_dbg_pmu("queue %d rewinded", queue->id); + } + + queue->position = queue->offset; + return; +} + +/* open for read and lock the queue */ +static int pmu_queue_open_read(struct pmu_gk20a *pmu, + struct pmu_queue *queue) +{ + int err; + + err = pmu_queue_lock(pmu, queue); + if (err) + return err; + + if (queue->opened) + BUG(); + + pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); + queue->oflag = OFLAG_READ; + queue->opened = true; + + return 0; +} + +/* open for write and lock the queue + make sure there's enough free space for the write */ +static int pmu_queue_open_write(struct pmu_gk20a *pmu, + struct pmu_queue *queue, u32 size) +{ + bool rewind = false; + int err; + + err = pmu_queue_lock(pmu, queue); + if (err) + return err; + + if (queue->opened) + BUG(); + + if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { + gk20a_err(dev_from_gk20a(pmu->g), "queue full"); + return -EAGAIN; + } + + pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); + queue->oflag = OFLAG_WRITE; + queue->opened = true; + + if (rewind) + pmu_queue_rewind(pmu, queue); + + return 0; +} + +/* close and unlock the queue */ +static int pmu_queue_close(struct pmu_gk20a *pmu, + struct pmu_queue *queue, bool commit) +{ + if (!queue->opened) + return 0; + + if (commit) { + if (queue->oflag == OFLAG_READ) { + pmu_queue_tail(pmu, queue, + &queue->position, QUEUE_SET); + } + else { + pmu_queue_head(pmu, queue, + &queue->position, QUEUE_SET); + } + } + + queue->opened = false; + + pmu_queue_unlock(pmu, queue); + + return 0; +} + +static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu, + struct gk20a_pmu_save_state *save) +{ + save->seq = pmu->seq; + save->next_seq_desc = pmu->next_seq_desc; + save->mutex = pmu->mutex; + save->mutex_cnt = pmu->mutex_cnt; + save->desc = pmu->desc; + save->ucode = pmu->ucode; + save->elpg_enable = pmu->elpg_enable; + save->pg_wq = pmu->pg_wq; + save->seq_buf = pmu->seq_buf; + save->pg_buf = pmu->pg_buf; + save->sw_ready = pmu->sw_ready; + save->pg_init = pmu->pg_init; +} + +static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu, + struct gk20a_pmu_save_state *save) +{ + pmu->seq = save->seq; + pmu->next_seq_desc = save->next_seq_desc; + pmu->mutex = save->mutex; + pmu->mutex_cnt = save->mutex_cnt; + pmu->desc = save->desc; + pmu->ucode = save->ucode; + pmu->elpg_enable = save->elpg_enable; + pmu->pg_wq = save->pg_wq; + pmu->seq_buf = save->seq_buf; + pmu->pg_buf = save->pg_buf; + pmu->sw_ready = save->sw_ready; + pmu->pg_init = save->pg_init; +} + +void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) +{ + struct gk20a_pmu_save_state save; + + gk20a_dbg_fn(""); + + gk20a_allocator_destroy(&pmu->dmem); + + /* Save the stuff you don't want to lose */ + gk20a_save_pmu_sw_state(pmu, &save); + + /* this function is also called by pmu_destory outside gk20a deinit that + releases gk20a struct so fill up with zeros here. */ + memset(pmu, 0, sizeof(struct pmu_gk20a)); + + /* Restore stuff you want to keep */ + gk20a_restore_pmu_sw_state(pmu, &save); +} + +int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + + gk20a_dbg_fn(""); + + pmu_enable_hw(pmu, true); + + return 0; +} + +static void pmu_elpg_enable_allow(struct work_struct *work); + +int gk20a_init_pmu_setup_sw(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->pmu.vm; + struct device *d = dev_from_gk20a(g); + int i, err = 0; + u8 *ptr; + void *ucode_ptr; + struct sg_table *sgt_pmu_ucode; + struct sg_table *sgt_seq_buf; + DEFINE_DMA_ATTRS(attrs); + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (pmu->sw_ready) { + for (i = 0; i < pmu->mutex_cnt; i++) { + pmu->mutex[i].id = i; + pmu->mutex[i].index = i; + } + pmu_seq_init(pmu); + + gk20a_dbg_fn("skip init"); + goto skip_init; + } + + /* no infoRom script from vbios? */ + + /* TBD: sysmon subtask */ + + pmu->mutex_cnt = pwr_pmu_mutex__size_1_v(); + pmu->mutex = kzalloc(pmu->mutex_cnt * + sizeof(struct pmu_mutex), GFP_KERNEL); + if (!pmu->mutex) { + err = -ENOMEM; + goto err; + } + + for (i = 0; i < pmu->mutex_cnt; i++) { + pmu->mutex[i].id = i; + pmu->mutex[i].index = i; + } + + pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * + sizeof(struct pmu_sequence), GFP_KERNEL); + if (!pmu->seq) { + err = -ENOMEM; + goto err_free_mutex; + } + + pmu_seq_init(pmu); + + if (!g->pmu_fw) { + g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE); + if (!g->pmu_fw) { + gk20a_err(d, "failed to load pmu ucode!!"); + err = -ENOENT; + goto err_free_seq; + } + } + + gk20a_dbg_fn("firmware loaded"); + + pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data; + pmu->ucode_image = (u32 *)((u8 *)pmu->desc + + pmu->desc->descriptor_size); + + + INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow); + INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue); + + gk20a_init_pmu_vm(mm); + + dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); + pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, + &iova, + GFP_KERNEL, + &attrs); + if (!pmu->ucode.cpuva) { + gk20a_err(d, "failed to allocate memory\n"); + err = -ENOMEM; + goto err_release_fw; + } + + pmu->ucode.iova = iova; + pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, + &iova, + GFP_KERNEL); + if (!pmu->seq_buf.cpuva) { + gk20a_err(d, "failed to allocate memory\n"); + err = -ENOMEM; + goto err_free_pmu_ucode; + } + + pmu->seq_buf.iova = iova; + init_waitqueue_head(&pmu->pg_wq); + + err = gk20a_get_sgtable(d, &sgt_pmu_ucode, + pmu->ucode.cpuva, + pmu->ucode.iova, + GK20A_PMU_UCODE_SIZE_MAX); + if (err) { + gk20a_err(d, "failed to allocate sg table\n"); + goto err_free_seq_buf; + } + + pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, + GK20A_PMU_UCODE_SIZE_MAX, + 0, /* flags */ + gk20a_mem_flag_read_only); + if (!pmu->ucode.pmu_va) { + gk20a_err(d, "failed to map pmu ucode memory!!"); + goto err_free_ucode_sgt; + } + + err = gk20a_get_sgtable(d, &sgt_seq_buf, + pmu->seq_buf.cpuva, + pmu->seq_buf.iova, + GK20A_PMU_SEQ_BUF_SIZE); + if (err) { + gk20a_err(d, "failed to allocate sg table\n"); + goto err_unmap_ucode; + } + + pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf, + GK20A_PMU_SEQ_BUF_SIZE, + 0, /* flags */ + gk20a_mem_flag_none); + if (!pmu->seq_buf.pmu_va) { + gk20a_err(d, "failed to map pmu ucode memory!!"); + goto err_free_seq_buf_sgt; + } + + ptr = (u8 *)pmu->seq_buf.cpuva; + if (!ptr) { + gk20a_err(d, "failed to map cpu ptr for zbc buffer"); + goto err_unmap_seq_buf; + } + + /* TBD: remove this if ZBC save/restore is handled by PMU + * end an empty ZBC sequence for now */ + ptr[0] = 0x16; /* opcode EXIT */ + ptr[1] = 0; ptr[2] = 1; ptr[3] = 0; + ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0; + + pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; + + ucode_ptr = pmu->ucode.cpuva; + + for (i = 0; i < (pmu->desc->app_start_offset + + pmu->desc->app_size) >> 2; i++) + gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); + + gk20a_free_sgtable(&sgt_pmu_ucode); + gk20a_free_sgtable(&sgt_seq_buf); + +skip_init: + mutex_init(&pmu->elpg_mutex); + mutex_init(&pmu->isr_mutex); + mutex_init(&pmu->pmu_copy_lock); + mutex_init(&pmu->pmu_seq_lock); + + pmu->perfmon_counter.index = 3; /* GR & CE2 */ + pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE; + + pmu->remove_support = gk20a_remove_pmu_support; + err = gk20a_init_pmu(pmu); + if (err) { + gk20a_err(d, "failed to set function pointers\n"); + return err; + } + + gk20a_dbg_fn("done"); + return 0; + + err_unmap_seq_buf: + gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va, + GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none); + err_free_seq_buf_sgt: + gk20a_free_sgtable(&sgt_seq_buf); + err_unmap_ucode: + gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va, + GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none); + err_free_ucode_sgt: + gk20a_free_sgtable(&sgt_pmu_ucode); + err_free_seq_buf: + dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, + pmu->seq_buf.cpuva, pmu->seq_buf.iova); + pmu->seq_buf.cpuva = NULL; + pmu->seq_buf.iova = 0; + err_free_pmu_ucode: + dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, + pmu->ucode.cpuva, pmu->ucode.iova, &attrs); + pmu->ucode.cpuva = NULL; + pmu->ucode.iova = 0; + err_release_fw: + release_firmware(g->pmu_fw); + err_free_seq: + kfree(pmu->seq); + err_free_mutex: + kfree(pmu->mutex); + err: + gk20a_dbg_fn("fail"); + return err; +} + +static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 handle, u32 status); + +static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 handle, u32 status) +{ + struct pmu_gk20a *pmu = param; + struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat; + + gk20a_dbg_fn(""); + + if (status != 0) { + gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted"); + /* TBD: disable ELPG */ + return; + } + + if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) { + gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer"); + } + + pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); + wake_up(&pmu->pg_wq); +} + +int gk20a_init_pmu_setup_hw1(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + int err; + + gk20a_dbg_fn(""); + + pmu_reset(pmu); + + /* setup apertures - virtual */ + gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), + pwr_fbif_transcfg_mem_type_virtual_f()); + gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), + pwr_fbif_transcfg_mem_type_virtual_f()); + /* setup apertures - physical */ + gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), + pwr_fbif_transcfg_mem_type_physical_f() | + pwr_fbif_transcfg_target_local_fb_f()); + gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), + pwr_fbif_transcfg_mem_type_physical_f() | + pwr_fbif_transcfg_target_coherent_sysmem_f()); + gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), + pwr_fbif_transcfg_mem_type_physical_f() | + pwr_fbif_transcfg_target_noncoherent_sysmem_f()); + + /* TBD: load pmu ucode */ + err = pmu_bootstrap(pmu); + if (err) + return err; + + return 0; + +} + +static int gk20a_aelpg_init(struct gk20a *g); +static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id); + + +static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work) +{ + struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); + struct gk20a *g = pmu->g; + gk20a_init_pmu_setup_hw2(g); +} + +int gk20a_init_pmu_setup_hw2(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->pmu.vm; + struct device *d = dev_from_gk20a(g); + struct pmu_cmd cmd; + u32 desc; + long remain; + int err; + bool status; + u32 size; + struct sg_table *sgt_pg_buf; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (!support_gk20a_pmu()) + return 0; + + size = 0; + err = gr_gk20a_fecs_get_reglist_img_size(g, &size); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to query fecs pg buffer size"); + return err; + } + + if (!pmu->sw_ready) { + pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, + &iova, + GFP_KERNEL); + if (!pmu->pg_buf.cpuva) { + gk20a_err(d, "failed to allocate memory\n"); + err = -ENOMEM; + goto err; + } + + pmu->pg_buf.iova = iova; + pmu->pg_buf.size = size; + + err = gk20a_get_sgtable(d, &sgt_pg_buf, + pmu->pg_buf.cpuva, + pmu->pg_buf.iova, + size); + if (err) { + gk20a_err(d, "failed to create sg table\n"); + goto err_free_pg_buf; + } + + pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm, + &sgt_pg_buf, + size, + 0, /* flags */ + gk20a_mem_flag_none); + if (!pmu->pg_buf.pmu_va) { + gk20a_err(d, "failed to map fecs pg buffer"); + err = -ENOMEM; + goto err_free_sgtable; + } + + gk20a_free_sgtable(&sgt_pg_buf); + } + + /* + * This is the actual point at which sw setup is complete, so set the + * sw_ready flag here. + */ + pmu->sw_ready = true; + + /* TBD: acquire pmu hw mutex */ + + /* TBD: post reset again? */ + + /* PMU_INIT message handler will send PG_INIT */ + remain = wait_event_timeout( + pmu->pg_wq, + (status = (pmu->elpg_ready && + pmu->stat_dmem_offset != 0 && + pmu->elpg_stat == PMU_ELPG_STAT_OFF)), + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); + if (status == 0) { + gk20a_err(dev_from_gk20a(g), + "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain); + pmu_dump_falcon_stats(pmu); + return -EBUSY; + } + + err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to bind pmu inst to gr"); + return err; + } + + err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to set pg buffer pmu va"); + return err; + } + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); + cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; + cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; + cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; + cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); + cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); + cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; + + pmu->buf_loaded = false; + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); + + remain = wait_event_timeout( + pmu->pg_wq, + pmu->buf_loaded, + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); + if (!pmu->buf_loaded) { + gk20a_err(dev_from_gk20a(g), + "PGENG FECS buffer load failed, remaining timeout : 0x%lx", + remain); + return -EBUSY; + } + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load); + cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD; + cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; + cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; + cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); + cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); + cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; + + pmu->buf_loaded = false; + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); + + remain = wait_event_timeout( + pmu->pg_wq, + pmu->buf_loaded, + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); + if (!pmu->buf_loaded) { + gk20a_err(dev_from_gk20a(g), + "PGENG ZBC buffer load failed, remaining timeout 0x%lx", + remain); + return -EBUSY; + } + + /* + * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to + * 7. This prevents PMU stalling on Host register accesses. Once the + * cause for this hang is discovered and fixed, this WAR should be + * removed. + */ + gk20a_writel(g, 0x10a164, 0x109ff); + + pmu->initialized = true; + pmu->zbc_ready = true; + + /* Save zbc table after PMU is initialized. */ + pmu_save_zbc(g, 0xf); + + /* + * We can't guarantee that gr code to enable ELPG will be + * invoked, so we explicitly call disable-enable here + * to enable elpg. + */ + gk20a_pmu_disable_elpg(g); + + if (g->elpg_enabled) + gk20a_pmu_enable_elpg(g); + + udelay(50); + + /* Enable AELPG */ + if (g->aelpg_enabled) { + gk20a_aelpg_init(g); + gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); + } + + return 0; + + err_free_sgtable: + gk20a_free_sgtable(&sgt_pg_buf); + err_free_pg_buf: + dma_free_coherent(d, size, + pmu->pg_buf.cpuva, pmu->pg_buf.iova); + pmu->pg_buf.cpuva = NULL; + pmu->pg_buf.iova = 0; + err: + return err; +} + +int gk20a_init_pmu_support(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + u32 err; + + gk20a_dbg_fn(""); + + if (pmu->initialized) + return 0; + + pmu->g = g; + + err = gk20a_init_pmu_reset_enable_hw(g); + if (err) + return err; + + if (support_gk20a_pmu()) { + err = gk20a_init_pmu_setup_sw(g); + if (err) + return err; + + err = gk20a_init_pmu_setup_hw1(g); + if (err) + return err; + } + + return err; +} + +static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 handle, u32 status) +{ + struct pmu_gk20a *pmu = param; + struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg; + + gk20a_dbg_fn(""); + + if (status != 0) { + gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); + /* TBD: disable ELPG */ + return; + } + + switch (elpg_msg->msg) { + case PMU_PG_ELPG_MSG_INIT_ACK: + gk20a_dbg_pmu("INIT_PG is acknowledged from PMU"); + pmu->elpg_ready = true; + wake_up(&pmu->pg_wq); + break; + case PMU_PG_ELPG_MSG_ALLOW_ACK: + gk20a_dbg_pmu("ALLOW is acknowledged from PMU"); + pmu->elpg_stat = PMU_ELPG_STAT_ON; + wake_up(&pmu->pg_wq); + break; + case PMU_PG_ELPG_MSG_DISALLOW_ACK: + gk20a_dbg_pmu("DISALLOW is acknowledged from PMU"); + pmu->elpg_stat = PMU_ELPG_STAT_OFF; + wake_up(&pmu->pg_wq); + break; + default: + gk20a_err(dev_from_gk20a(g), + "unsupported ELPG message : 0x%04x", elpg_msg->msg); + } + + return; +} + +static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 handle, u32 status) +{ + struct pmu_gk20a *pmu = param; + + gk20a_dbg_fn(""); + + if (status != 0) { + gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted"); + /* TBD: disable ELPG */ + return; + } + + switch (msg->msg.pg.stat.sub_msg_id) { + case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: + gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU"); + pmu->stat_dmem_offset = msg->msg.pg.stat.data; + wake_up(&pmu->pg_wq); + break; + default: + break; + } +} + +static int pmu_init_powergating(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_cmd cmd; + u32 seq; + + gk20a_dbg_fn(""); + + if (tegra_cpu_is_asim()) { + /* TBD: calculate threshold for silicon */ + gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), + PMU_PG_IDLE_THRESHOLD_SIM); + gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), + PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM); + } else { + /* TBD: calculate threshold for silicon */ + gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), + PMU_PG_IDLE_THRESHOLD); + gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), + PMU_PG_POST_POWERUP_IDLE_THRESHOLD); + } + + /* init ELPG */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu, &seq, ~0); + + /* alloc dmem for powergating state log */ + pmu->stat_dmem_offset = 0; + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat); + cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; + cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; + cmd.cmd.pg.stat.data = 0; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_stat_msg, pmu, &seq, ~0); + + /* disallow ELPG initially + PMU ucode requires a disallow cmd before allow cmd */ + pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu, &seq, ~0); + + /* start with elpg disabled until first enable call */ + pmu->elpg_refcnt = 1; + + return 0; +} + +static int pmu_init_perfmon(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_v *pv = &g->ops.pmu_ver; + struct pmu_cmd cmd; + struct pmu_payload payload; + u32 seq; + u32 data; + int err; + + gk20a_dbg_fn(""); + + pmu->perfmon_ready = 0; + + /* use counter #3 for GR && CE2 busy cycles */ + gk20a_writel(g, pwr_pmu_idle_mask_r(3), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); + + /* disable idle filtering for counters 3 and 6 */ + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data); + + /* use counter #6 for total cycles */ + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data); + + /* + * We don't want to disturb counters #3 and #6, which are used by + * perfmon, so we add wiring also to counters #1 and #2 for + * exposing raw counter readings. + */ + gk20a_writel(g, pwr_pmu_idle_mask_r(1), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); + + pmu->sample_buffer = 0; + err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16)); + if (err) { + gk20a_err(dev_from_gk20a(g), + "failed to allocate perfmon sample buffer"); + return -ENOMEM; + } + + /* init PERFMON */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PERFMON; + cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size(); + cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; + /* buffer to save counter values for pmu perfmon */ + pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, + (u16)pmu->sample_buffer); + /* number of sample periods below lower threshold + before pmu triggers perfmon decrease event + TBD: = 15 */ + pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); + /* index of base counter, aka. always ticking counter */ + pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); + /* microseconds interval between pmu polls perf counters */ + pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); + /* number of perfmon counters + counter #3 (GR and CE2) for gk20a */ + pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); + /* moving average window for sample periods + TBD: = 3000000 / sample_period_us = 17 */ + pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); + + memset(&payload, 0, sizeof(struct pmu_payload)); + payload.in.buf = &pmu->perfmon_counter; + payload.in.size = sizeof(struct pmu_perfmon_counter); + payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC); + + gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + + return 0; +} + +static int pmu_process_init_msg(struct pmu_gk20a *pmu, + struct pmu_msg *msg) +{ + struct gk20a *g = pmu->g; + struct pmu_v *pv = &g->ops.pmu_ver; + union pmu_init_msg_pmu *init; + struct pmu_sha1_gid_data gid_data; + u32 i, tail = 0; + + tail = pwr_pmu_msgq_tail_val_v( + gk20a_readl(g, pwr_pmu_msgq_tail_r())); + + pmu_copy_from_dmem(pmu, tail, + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); + + if (msg->hdr.unit_id != PMU_UNIT_INIT) { + gk20a_err(dev_from_gk20a(g), + "expecting init msg"); + return -EINVAL; + } + + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); + + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { + gk20a_err(dev_from_gk20a(g), + "expecting init msg"); + return -EINVAL; + } + + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); + gk20a_writel(g, pwr_pmu_msgq_tail_r(), + pwr_pmu_msgq_tail_val_f(tail)); + + init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init)); + if (!pmu->gid_info.valid) { + + pmu_copy_from_dmem(pmu, + pv->get_pmu_init_msg_pmu_sw_mg_off(init), + (u8 *)&gid_data, + sizeof(struct pmu_sha1_gid_data), 0); + + pmu->gid_info.valid = + (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); + + if (pmu->gid_info.valid) { + + BUG_ON(sizeof(pmu->gid_info.gid) != + sizeof(gid_data.gid)); + + memcpy(pmu->gid_info.gid, gid_data.gid, + sizeof(pmu->gid_info.gid)); + } + } + + for (i = 0; i < PMU_QUEUE_COUNT; i++) + pmu_queue_init(pmu, i, init); + + gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", + pv->get_pmu_init_msg_pmu_sw_mg_off(init), + pv->get_pmu_init_msg_pmu_sw_mg_size(init), + PMU_DMEM_ALLOC_ALIGNMENT); + + pmu->pmu_ready = true; + + return 0; +} + +static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue, + struct pmu_msg *msg, int *status) +{ + struct gk20a *g = pmu->g; + u32 read_size, bytes_read; + int err; + + *status = 0; + + if (pmu_queue_is_empty(pmu, queue)) + return false; + + err = pmu_queue_open_read(pmu, queue); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to open queue %d for read", queue->id); + *status = err; + return false; + } + + err = pmu_queue_pop(pmu, queue, &msg->hdr, + PMU_MSG_HDR_SIZE, &bytes_read); + if (err || bytes_read != PMU_MSG_HDR_SIZE) { + gk20a_err(dev_from_gk20a(g), + "fail to read msg from queue %d", queue->id); + *status = err | -EINVAL; + goto clean_up; + } + + if (msg->hdr.unit_id == PMU_UNIT_REWIND) { + pmu_queue_rewind(pmu, queue); + /* read again after rewind */ + err = pmu_queue_pop(pmu, queue, &msg->hdr, + PMU_MSG_HDR_SIZE, &bytes_read); + if (err || bytes_read != PMU_MSG_HDR_SIZE) { + gk20a_err(dev_from_gk20a(g), + "fail to read msg from queue %d", queue->id); + *status = err | -EINVAL; + goto clean_up; + } + } + + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { + gk20a_err(dev_from_gk20a(g), + "read invalid unit_id %d from queue %d", + msg->hdr.unit_id, queue->id); + *status = -EINVAL; + goto clean_up; + } + + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; + err = pmu_queue_pop(pmu, queue, &msg->msg, + read_size, &bytes_read); + if (err || bytes_read != read_size) { + gk20a_err(dev_from_gk20a(g), + "fail to read msg from queue %d", queue->id); + *status = err; + goto clean_up; + } + } + + err = pmu_queue_close(pmu, queue, true); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to close queue %d", queue->id); + *status = err; + return false; + } + + return true; + +clean_up: + err = pmu_queue_close(pmu, queue, false); + if (err) + gk20a_err(dev_from_gk20a(g), + "fail to close queue %d", queue->id); + return false; +} + +static int pmu_response_handle(struct pmu_gk20a *pmu, + struct pmu_msg *msg) +{ + struct gk20a *g = pmu->g; + struct pmu_sequence *seq; + struct pmu_v *pv = &g->ops.pmu_ver; + int ret = 0; + + gk20a_dbg_fn(""); + + seq = &pmu->seq[msg->hdr.seq_id]; + if (seq->state != PMU_SEQ_STATE_USED && + seq->state != PMU_SEQ_STATE_CANCELLED) { + gk20a_err(dev_from_gk20a(g), + "msg for an unknown sequence %d", seq->id); + return -EINVAL; + } + + if (msg->hdr.unit_id == PMU_UNIT_RC && + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { + gk20a_err(dev_from_gk20a(g), + "unhandled cmd: seq %d", seq->id); + } + else if (seq->state != PMU_SEQ_STATE_CANCELLED) { + if (seq->msg) { + if (seq->msg->hdr.size >= msg->hdr.size) { + memcpy(seq->msg, msg, msg->hdr.size); + if (pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq)) != 0) { + pmu_copy_from_dmem(pmu, + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_out_a_ptr(seq)), + seq->out_payload, + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq)), 0); + } + } else { + gk20a_err(dev_from_gk20a(g), + "sequence %d msg buffer too small", + seq->id); + } + } + } else + seq->callback = NULL; + if (pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_in_a_ptr(seq)) != 0) + pmu->dmem.free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_in_a_ptr(seq)), + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_in_a_ptr(seq))); + if (pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq)) != 0) + pmu->dmem.free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_out_a_ptr(seq)), + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq))); + + if (seq->callback) + seq->callback(g, msg, seq->cb_params, seq->desc, ret); + + pmu_seq_release(pmu, seq); + + /* TBD: notify client waiting for available dmem */ + + gk20a_dbg_fn("done"); + + return 0; +} + +static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, + u32 *var, u32 val); + +static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 handle, u32 status) +{ + struct pmu_gk20a *pmu = param; + pmu->zbc_save_done = 1; +} + +static void pmu_save_zbc(struct gk20a *g, u32 entries) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_cmd cmd; + u32 seq; + + if (!pmu->pmu_ready || !entries || !pmu->zbc_ready) + return; + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); + cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update; + cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); + + pmu->zbc_save_done = 0; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_zbc_msg, pmu, &seq, ~0); + pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), + &pmu->zbc_save_done, 1); + if (!pmu->zbc_save_done) + gk20a_err(dev_from_gk20a(g), "ZBC save timeout"); +} + +void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) +{ + if (g->pmu.zbc_ready) + pmu_save_zbc(g, entries); +} + +static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_v *pv = &g->ops.pmu_ver; + struct pmu_cmd cmd; + struct pmu_payload payload; + u32 current_rate = 0; + u32 seq; + + /* PERFMON Start */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PERFMON; + cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size(); + pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, + PMU_PERFMON_CMD_ID_START); + pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, + PMU_DOMAIN_GROUP_PSTATE); + pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, + pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); + + current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g)); + if (current_rate >= gpc_pll_params.max_freq) + pv->perfmon_start_set_flags(&cmd.cmd.perfmon, + PMU_PERFMON_FLAG_ENABLE_DECREASE); + else if (current_rate <= gpc_pll_params.min_freq) + pv->perfmon_start_set_flags(&cmd.cmd.perfmon, + PMU_PERFMON_FLAG_ENABLE_INCREASE); + else + pv->perfmon_start_set_flags(&cmd.cmd.perfmon, + PMU_PERFMON_FLAG_ENABLE_INCREASE | + PMU_PERFMON_FLAG_ENABLE_DECREASE); + + pv->perfmon_start_set_flags(&cmd.cmd.perfmon, + pv->perfmon_start_get_flags(&cmd.cmd.perfmon) | + PMU_PERFMON_FLAG_CLEAR_PREV); + + memset(&payload, 0, sizeof(struct pmu_payload)); + + /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ + pmu->perfmon_counter.upper_threshold = 3000; /* 30% */ + /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ + pmu->perfmon_counter.lower_threshold = 1000; /* 10% */ + pmu->perfmon_counter.valid = true; + + payload.in.buf = &pmu->perfmon_counter; + payload.in.size = sizeof(pmu->perfmon_counter); + payload.in.offset = + pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC); + + gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + + return 0; +} + +static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_cmd cmd; + u32 seq; + + /* PERFMON Stop */ + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PERFMON; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); + cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL, &seq, ~0); + return 0; +} + +static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, + struct pmu_perfmon_msg *msg) +{ + struct gk20a *g = pmu->g; + u32 rate; + + gk20a_dbg_fn(""); + + switch (msg->msg_type) { + case PMU_PERFMON_MSG_ID_INCREASE_EVENT: + gk20a_dbg_pmu("perfmon increase event: " + "state_id %d, ground_id %d, pct %d", + msg->gen.state_id, msg->gen.group_id, msg->gen.data); + /* increase gk20a clock freq by 20% */ + rate = gk20a_clk_get_rate(g); + gk20a_clk_set_rate(g, rate * 6 / 5); + break; + case PMU_PERFMON_MSG_ID_DECREASE_EVENT: + gk20a_dbg_pmu("perfmon decrease event: " + "state_id %d, ground_id %d, pct %d", + msg->gen.state_id, msg->gen.group_id, msg->gen.data); + /* decrease gk20a clock freq by 10% */ + rate = gk20a_clk_get_rate(g); + gk20a_clk_set_rate(g, (rate / 10) * 7); + break; + case PMU_PERFMON_MSG_ID_INIT_EVENT: + pmu->perfmon_ready = 1; + gk20a_dbg_pmu("perfmon init event"); + break; + default: + break; + } + + /* restart sampling */ + if (IS_ENABLED(CONFIG_GK20A_PERFMON)) + return pmu_perfmon_start_sampling(pmu); + return 0; +} + + +static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg) +{ + int err; + + gk20a_dbg_fn(""); + + switch (msg->hdr.unit_id) { + case PMU_UNIT_PERFMON: + err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon); + break; + default: + break; + } + + return err; +} + +static int pmu_process_message(struct pmu_gk20a *pmu) +{ + struct pmu_msg msg; + int status; + + if (unlikely(!pmu->pmu_ready)) { + pmu_process_init_msg(pmu, &msg); + pmu_init_powergating(pmu); + pmu_init_perfmon(pmu); + return 0; + } + + while (pmu_read_message(pmu, + &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { + + gk20a_dbg_pmu("read msg hdr: " + "unit_id = 0x%08x, size = 0x%08x, " + "ctrl_flags = 0x%08x, seq_id = 0x%08x", + msg.hdr.unit_id, msg.hdr.size, + msg.hdr.ctrl_flags, msg.hdr.seq_id); + + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; + + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) { + pmu_handle_event(pmu, &msg); + } else { + pmu_response_handle(pmu, &msg); + } + } + + return 0; +} + +static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout, + u32 *var, u32 val) +{ + struct gk20a *g = pmu->g; + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + unsigned long delay = GR_IDLE_CHECK_DEFAULT; + + do { + if (*var == val) + return 0; + + if (gk20a_readl(g, pwr_falcon_irqstat_r())) + gk20a_pmu_isr(g); + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } while (time_before(jiffies, end_jiffies) || + !tegra_platform_is_silicon()); + + return -ETIMEDOUT; +} + +static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_pg_stats stats; + + pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, + (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); + + gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx", + stats.pg_entry_start_timestamp); + gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx", + stats.pg_exit_start_timestamp); + gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx", + stats.pg_ingating_start_timestamp); + gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx", + stats.pg_ungating_start_timestamp); + gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x", + stats.pg_avg_entry_time_us); + gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x", + stats.pg_avg_exit_time_us); + gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x", + stats.pg_ingating_cnt); + gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x", + stats.pg_ingating_time_us); + gk20a_dbg_pmu("pg_ungating_count : 0x%08x", + stats.pg_ungating_count); + gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ", + stats.pg_ungating_time_us); + gk20a_dbg_pmu("pg_gating_cnt : 0x%08x", + stats.pg_gating_cnt); + gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x", + stats.pg_gating_deny_cnt); + + /* + Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset + in .nm file, e.g. 0x1000066c. use 0x66c. + u32 i, val[20]; + pmu_copy_from_dmem(pmu, 0x66c, + (u8 *)val, sizeof(val), 0); + gk20a_dbg_pmu("elpg log begin"); + for (i = 0; i < 20; i++) + gk20a_dbg_pmu("0x%08x", val[i]); + gk20a_dbg_pmu("elpg log end"); + */ + + gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3))); + gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3))); + gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3))); + gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x", + gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0))); + gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x", + gk20a_readl(g, pwr_pmu_pg_intren_r(0))); + + gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_count_r(3))); + gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_count_r(4))); + gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_count_r(7))); + + /* + TBD: script can't generate those registers correctly + gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x", + gk20a_readl(g, pwr_pmu_idle_status_r())); + gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x", + gk20a_readl(g, pwr_pmu_pg_ctrl_r())); + */ +} + +static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu) +{ + struct gk20a *g = pmu->g; + int i; + + gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d", + gk20a_readl(g, pwr_falcon_os_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x", + gk20a_readl(g, pwr_falcon_cpuctl_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x", + gk20a_readl(g, pwr_falcon_idlestate_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x", + gk20a_readl(g, pwr_falcon_mailbox0_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x", + gk20a_readl(g, pwr_falcon_mailbox1_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x", + gk20a_readl(g, pwr_falcon_irqstat_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x", + gk20a_readl(g, pwr_falcon_irqmode_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x", + gk20a_readl(g, pwr_falcon_irqmask_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x", + gk20a_readl(g, pwr_falcon_irqdest_r())); + + for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++) + gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x", + i, gk20a_readl(g, pwr_pmu_mailbox_r(i))); + + for (i = 0; i < pwr_pmu_debug__size_1_v(); i++) + gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x", + i, gk20a_readl(g, pwr_pmu_debug_r(i))); + + for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rstat_f() | + pwr_pmu_falcon_icd_cmd_idx_f(i)); + gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x", + i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + } + + i = gk20a_readl(g, pwr_pmu_bar0_error_status_r()); + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i); + if (i != 0) { + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x", + gk20a_readl(g, pwr_pmu_bar0_addr_r())); + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x", + gk20a_readl(g, pwr_pmu_bar0_data_r())); + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x", + gk20a_readl(g, pwr_pmu_bar0_timeout_r())); + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x", + gk20a_readl(g, pwr_pmu_bar0_ctl_r())); + } + + i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r()); + gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i); + + i = gk20a_readl(g, pwr_falcon_exterrstat_r()); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i); + if (pwr_falcon_exterrstat_valid_v(i) == + pwr_falcon_exterrstat_valid_true_v()) { + gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x", + gk20a_readl(g, pwr_falcon_exterraddr_r())); + gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x", + gk20a_readl(g, top_fs_status_r())); + gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x", + gk20a_readl(g, mc_enable_r())); + } + + gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x", + gk20a_readl(g, pwr_falcon_engctl_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x", + gk20a_readl(g, pwr_falcon_curctx_r())); + gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x", + gk20a_readl(g, pwr_falcon_nxtctx_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + for (i = 0; i < 4; i++) { + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + + gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(), + pwr_pmu_falcon_icd_cmd_opc_rreg_f() | + pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP)); + gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x", + gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r())); + } + + /* PMU may crash due to FECS crash. Dump FECS status */ + gk20a_fecs_dump_falcon_stats(g); +} + +void gk20a_pmu_isr(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_queue *queue; + u32 intr, mask; + bool recheck = false; + + gk20a_dbg_fn(""); + + mutex_lock(&pmu->isr_mutex); + + mask = gk20a_readl(g, pwr_falcon_irqmask_r()) & + gk20a_readl(g, pwr_falcon_irqdest_r()); + + intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask; + + gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr); + + if (!intr) { + mutex_unlock(&pmu->isr_mutex); + return; + } + + if (intr & pwr_falcon_irqstat_halt_true_f()) { + gk20a_err(dev_from_gk20a(g), + "pmu halt intr not implemented"); + pmu_dump_falcon_stats(pmu); + } + if (intr & pwr_falcon_irqstat_exterr_true_f()) { + gk20a_err(dev_from_gk20a(g), + "pmu exterr intr not implemented. Clearing interrupt."); + pmu_dump_falcon_stats(pmu); + + gk20a_writel(g, pwr_falcon_exterrstat_r(), + gk20a_readl(g, pwr_falcon_exterrstat_r()) & + ~pwr_falcon_exterrstat_valid_m()); + } + if (intr & pwr_falcon_irqstat_swgen0_true_f()) { + pmu_process_message(pmu); + recheck = true; + } + + gk20a_writel(g, pwr_falcon_irqsclr_r(), intr); + + if (recheck) { + queue = &pmu->queue[PMU_MESSAGE_QUEUE]; + if (!pmu_queue_is_empty(pmu, queue)) + gk20a_writel(g, pwr_falcon_irqsset_r(), + pwr_falcon_irqsset_swgen0_set_f()); + } + + mutex_unlock(&pmu->isr_mutex); +} + +static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, + struct pmu_msg *msg, struct pmu_payload *payload, + u32 queue_id) +{ + struct gk20a *g = pmu->g; + struct pmu_queue *queue; + u32 in_size, out_size; + + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) + goto invalid_cmd; + + queue = &pmu->queue[queue_id]; + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) + goto invalid_cmd; + + if (cmd->hdr.size > (queue->size >> 1)) + goto invalid_cmd; + + if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) + goto invalid_cmd; + + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) + goto invalid_cmd; + + if (payload == NULL) + return true; + + if (payload->in.buf == NULL && payload->out.buf == NULL) + goto invalid_cmd; + + if ((payload->in.buf != NULL && payload->in.size == 0) || + (payload->out.buf != NULL && payload->out.size == 0)) + goto invalid_cmd; + + in_size = PMU_CMD_HDR_SIZE; + if (payload->in.buf) { + in_size += payload->in.offset; + in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); + } + + out_size = PMU_CMD_HDR_SIZE; + if (payload->out.buf) { + out_size += payload->out.offset; + out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu); + } + + if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) + goto invalid_cmd; + + + if ((payload->in.offset != 0 && payload->in.buf == NULL) || + (payload->out.offset != 0 && payload->out.buf == NULL)) + goto invalid_cmd; + + return true; + +invalid_cmd: + gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n" + "queue_id=%d,\n" + "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" + "payload in=%p, in_size=%d, in_offset=%d,\n" + "payload out=%p, out_size=%d, out_offset=%d", + queue_id, cmd->hdr.size, cmd->hdr.unit_id, + msg, msg?msg->hdr.unit_id:~0, + &payload->in, payload->in.size, payload->in.offset, + &payload->out, payload->out.size, payload->out.offset); + + return false; +} + +static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd, + u32 queue_id, unsigned long timeout) +{ + struct gk20a *g = pmu->g; + struct pmu_queue *queue; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(timeout); + int err; + + gk20a_dbg_fn(""); + + queue = &pmu->queue[queue_id]; + + do { + err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); + if (err == -EAGAIN && time_before(jiffies, end_jiffies)) + usleep_range(1000, 2000); + else + break; + } while (1); + + if (err) + goto clean_up; + + pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); + + err = pmu_queue_close(pmu, queue, true); + +clean_up: + if (err) + gk20a_err(dev_from_gk20a(g), + "fail to write cmd to queue %d", queue_id); + else + gk20a_dbg_fn("done"); + + return err; +} + +int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_msg *msg, struct pmu_payload *payload, + u32 queue_id, pmu_callback callback, void* cb_param, + u32 *seq_desc, unsigned long timeout) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_v *pv = &g->ops.pmu_ver; + struct pmu_sequence *seq; + void *in = NULL, *out = NULL; + int err; + + gk20a_dbg_fn(""); + + BUG_ON(!cmd); + BUG_ON(!seq_desc); + BUG_ON(!pmu->pmu_ready); + + if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) + return -EINVAL; + + err = pmu_seq_acquire(pmu, &seq); + if (err) + return err; + + cmd->hdr.seq_id = seq->id; + + cmd->hdr.ctrl_flags = 0; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; + + seq->callback = callback; + seq->cb_params = cb_param; + seq->msg = msg; + seq->out_payload = NULL; + seq->desc = pmu->next_seq_desc++; + + if (payload) + seq->out_payload = payload->out.buf; + + *seq_desc = seq->desc; + + if (payload && payload->in.offset != 0) { + pv->set_pmu_allocation_ptr(pmu, &in, + ((u8 *)&cmd->cmd + payload->in.offset)); + + if (payload->in.buf != payload->out.buf) + pv->pmu_allocation_set_dmem_size(pmu, in, + (u16)payload->in.size); + else + pv->pmu_allocation_set_dmem_size(pmu, in, + (u16)max(payload->in.size, payload->out.size)); + + err = pmu->dmem.alloc(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset_addr(pmu, in), + pv->pmu_allocation_get_dmem_size(pmu, in)); + if (err) + goto clean_up; + + pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, + in)), + payload->in.buf, payload->in.size, 0); + pv->pmu_allocation_set_dmem_size(pmu, + pv->get_pmu_seq_in_a_ptr(seq), + pv->pmu_allocation_get_dmem_size(pmu, in)); + pv->pmu_allocation_set_dmem_offset(pmu, + pv->get_pmu_seq_in_a_ptr(seq), + pv->pmu_allocation_get_dmem_offset(pmu, in)); + } + + if (payload && payload->out.offset != 0) { + pv->set_pmu_allocation_ptr(pmu, &out, + ((u8 *)&cmd->cmd + payload->out.offset)); + pv->pmu_allocation_set_dmem_size(pmu, out, + (u16)payload->out.size); + + if (payload->out.buf != payload->in.buf) { + err = pmu->dmem.alloc(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset_addr(pmu, out), + pv->pmu_allocation_get_dmem_size(pmu, out)); + if (err) + goto clean_up; + } else { + BUG_ON(in == NULL); + pv->pmu_allocation_set_dmem_offset(pmu, out, + pv->pmu_allocation_get_dmem_offset(pmu, in)); + } + + pv->pmu_allocation_set_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq), + pv->pmu_allocation_get_dmem_size(pmu, out)); + pv->pmu_allocation_set_dmem_offset(pmu, + pv->get_pmu_seq_out_a_ptr(seq), + pv->pmu_allocation_get_dmem_offset(pmu, out)); + } + + seq->state = PMU_SEQ_STATE_USED; + err = pmu_write_cmd(pmu, cmd, queue_id, timeout); + if (err) + seq->state = PMU_SEQ_STATE_PENDING; + + gk20a_dbg_fn("done"); + + return 0; + +clean_up: + gk20a_dbg_fn("fail"); + if (in) + pmu->dmem.free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, in), + pv->pmu_allocation_get_dmem_size(pmu, in)); + if (out) + pmu->dmem.free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, out), + pv->pmu_allocation_get_dmem_size(pmu, out)); + + pmu_seq_release(pmu, seq); + return err; +} + +static int gk20a_pmu_enable_elpg_locked(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_cmd cmd; + u32 seq, status; + + gk20a_dbg_fn(""); + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; + + /* no need to wait ack for ELPG enable but set pending to sync + with follow up ELPG disable */ + pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING; + + status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu, &seq, ~0); + + BUG_ON(status != 0); + + gk20a_dbg_fn("done"); + return 0; +} + +int gk20a_pmu_enable_elpg(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct gr_gk20a *gr = &g->gr; + + int ret = 0; + + gk20a_dbg_fn(""); + + if (!pmu->elpg_ready || !pmu->initialized) + goto exit; + + mutex_lock(&pmu->elpg_mutex); + + pmu->elpg_refcnt++; + if (pmu->elpg_refcnt <= 0) + goto exit_unlock; + + /* something is not right if we end up in following code path */ + if (unlikely(pmu->elpg_refcnt > 1)) { + gk20a_warn(dev_from_gk20a(g), + "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", + __func__, pmu->elpg_refcnt); + WARN_ON(1); + } + + /* do NOT enable elpg until golden ctx is created, + which is related with the ctx that ELPG save and restore. */ + if (unlikely(!gr->ctx_vars.golden_image_initialized)) + goto exit_unlock; + + /* return if ELPG is already on or on_pending or off_on_pending */ + if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) + goto exit_unlock; + + /* if ELPG is not allowed right now, mark that it should be enabled + * immediately after it is allowed */ + if (!pmu->elpg_enable_allow) { + pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING; + goto exit_unlock; + } + + ret = gk20a_pmu_enable_elpg_locked(g); + +exit_unlock: + mutex_unlock(&pmu->elpg_mutex); +exit: + gk20a_dbg_fn("done"); + return ret; +} + +static void pmu_elpg_enable_allow(struct work_struct *work) +{ + struct pmu_gk20a *pmu = container_of(to_delayed_work(work), + struct pmu_gk20a, elpg_enable); + + gk20a_dbg_fn(""); + + mutex_lock(&pmu->elpg_mutex); + + /* It is ok to enabled powergating now */ + pmu->elpg_enable_allow = true; + + /* do we have pending requests? */ + if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { + pmu->elpg_stat = PMU_ELPG_STAT_OFF; + gk20a_pmu_enable_elpg_locked(pmu->g); + } + + mutex_unlock(&pmu->elpg_mutex); + + gk20a_dbg_fn("done"); +} + +static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_cmd cmd; + u32 seq; + int ret = 0; + + gk20a_dbg_fn(""); + + if (!pmu->elpg_ready || !pmu->initialized) + return 0; + + /* remove the work from queue */ + cancel_delayed_work_sync(&pmu->elpg_enable); + + mutex_lock(&pmu->elpg_mutex); + + pmu->elpg_refcnt--; + if (pmu->elpg_refcnt > 0) { + gk20a_warn(dev_from_gk20a(g), + "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", + __func__, pmu->elpg_refcnt); + WARN_ON(1); + ret = 0; + goto exit_unlock; + } + + /* cancel off_on_pending and return */ + if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { + pmu->elpg_stat = PMU_ELPG_STAT_OFF; + ret = 0; + goto exit_reschedule; + } + /* wait if on_pending */ + else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) { + + pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), + &pmu->elpg_stat, PMU_ELPG_STAT_ON); + + if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { + gk20a_err(dev_from_gk20a(g), + "ELPG_ALLOW_ACK failed, elpg_stat=%d", + pmu->elpg_stat); + pmu_dump_elpg_stats(pmu); + pmu_dump_falcon_stats(pmu); + ret = -EBUSY; + goto exit_unlock; + } + } + /* return if ELPG is already off */ + else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) { + ret = 0; + goto exit_reschedule; + } + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; + + pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; + + gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu, &seq, ~0); + + pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g), + &pmu->elpg_stat, PMU_ELPG_STAT_OFF); + if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) { + gk20a_err(dev_from_gk20a(g), + "ELPG_DISALLOW_ACK failed"); + pmu_dump_elpg_stats(pmu); + pmu_dump_falcon_stats(pmu); + ret = -EBUSY; + goto exit_unlock; + } + +exit_reschedule: + if (enable) { + pmu->elpg_enable_allow = false; + schedule_delayed_work(&pmu->elpg_enable, + msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC)); + } else + pmu->elpg_enable_allow = true; + + +exit_unlock: + mutex_unlock(&pmu->elpg_mutex); + gk20a_dbg_fn("done"); + return ret; +} + +int gk20a_pmu_disable_elpg(struct gk20a *g) +{ + return gk20a_pmu_disable_elpg_defer_enable(g, true); +} + +int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable) +{ + struct pmu_gk20a *pmu = &g->pmu; + int err; + + gk20a_dbg_fn(""); + + if (enable) + err = pmu_perfmon_start_sampling(pmu); + else + err = pmu_perfmon_stop_sampling(pmu); + + return err; +} + +int gk20a_pmu_destroy(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + u32 elpg_ingating_time, elpg_ungating_time, gating_cnt; + + gk20a_dbg_fn(""); + + if (!support_gk20a_pmu()) + return 0; + + /* make sure the pending operations are finished before we continue */ + cancel_delayed_work_sync(&pmu->elpg_enable); + cancel_work_sync(&pmu->pg_init); + + gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time, + &elpg_ungating_time, &gating_cnt); + + gk20a_pmu_disable_elpg_defer_enable(g, false); + pmu->initialized = false; + + /* update the s/w ELPG residency counters */ + g->pg_ingating_time_us += (u64)elpg_ingating_time; + g->pg_ungating_time_us += (u64)elpg_ungating_time; + g->pg_gating_cnt += gating_cnt; + + pmu_enable(pmu, false); + + if (pmu->remove_support) { + pmu->remove_support(pmu); + pmu->remove_support = NULL; + } + + gk20a_dbg_fn("done"); + return 0; +} + +int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) +{ + struct pmu_gk20a *pmu = &g->pmu; + u16 _load = 0; + + if (!pmu->perfmon_ready) { + *load = 0; + return 0; + } + + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); + *load = _load / 10; + + return 0; +} + +void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles) +{ + if (!g->power_on) { + *busy_cycles = 0; + *total_cycles = 0; + return; + } + + gk20a_busy(g->dev); + *busy_cycles = pwr_pmu_idle_count_value_v( + gk20a_readl(g, pwr_pmu_idle_count_r(1))); + rmb(); + *total_cycles = pwr_pmu_idle_count_value_v( + gk20a_readl(g, pwr_pmu_idle_count_r(2))); + gk20a_idle(g->dev); +} + +void gk20a_pmu_reset_load_counters(struct gk20a *g) +{ + u32 reg_val = pwr_pmu_idle_count_reset_f(1); + + if (!g->power_on) + return; + + gk20a_busy(g->dev); + gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val); + wmb(); + gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val); + gk20a_idle(g->dev); +} + +static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, + u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_pg_stats stats; + + if (!pmu->initialized) { + *ingating_time = 0; + *ungating_time = 0; + *gating_cnt = 0; + return 0; + } + + pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset, + (u8 *)&stats, sizeof(struct pmu_pg_stats), 0); + + *ingating_time = stats.pg_ingating_time_us; + *ungating_time = stats.pg_ungating_time_us; + *gating_cnt = stats.pg_gating_cnt; + + return 0; +} + +/* Send an Adaptive Power (AP) related command to PMU */ +static int gk20a_pmu_ap_send_command(struct gk20a *g, + union pmu_ap_cmd *p_ap_cmd, bool b_block) +{ + struct pmu_gk20a *pmu = &g->pmu; + /* FIXME: where is the PG structure defined?? */ + u32 status = 0; + struct pmu_cmd cmd; + u32 seq; + pmu_callback p_callback = NULL; + + memset(&cmd, 0, sizeof(struct pmu_cmd)); + + /* Copy common members */ + cmd.hdr.unit_id = PMU_UNIT_PG; + cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd); + + cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP; + cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id; + + /* Copy other members of command */ + switch (p_ap_cmd->cmn.cmd_id) { + case PMU_AP_CMD_ID_INIT: + cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us = + p_ap_cmd->init.pg_sampling_period_us; + p_callback = ap_callback_init_and_enable_ctrl; + break; + + case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL: + cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id = + p_ap_cmd->init_and_enable_ctrl.ctrl_id; + memcpy( + (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params), + (void *)&(p_ap_cmd->init_and_enable_ctrl.params), + sizeof(struct pmu_ap_ctrl_init_params)); + + p_callback = ap_callback_init_and_enable_ctrl; + break; + + case PMU_AP_CMD_ID_ENABLE_CTRL: + cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id = + p_ap_cmd->enable_ctrl.ctrl_id; + break; + + case PMU_AP_CMD_ID_DISABLE_CTRL: + cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id = + p_ap_cmd->disable_ctrl.ctrl_id; + break; + + case PMU_AP_CMD_ID_KICK_CTRL: + cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id = + p_ap_cmd->kick_ctrl.ctrl_id; + cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count = + p_ap_cmd->kick_ctrl.skip_count; + break; + + default: + gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n", + __func__, p_ap_cmd->cmn.cmd_id); + return 0x2f; + } + + status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, + p_callback, pmu, &seq, ~0); + + if (!status) { + gk20a_dbg_pmu( + "%s: Unable to submit Adaptive Power Command %d\n", + __func__, p_ap_cmd->cmn.cmd_id); + goto err_return; + } + + /* TODO: Implement blocking calls (b_block) */ + +err_return: + return status; +} + +static void ap_callback_init_and_enable_ctrl( + struct gk20a *g, struct pmu_msg *msg, + void *param, u32 seq_desc, u32 status) +{ + /* Define p_ap (i.e pointer to pmu_ap structure) */ + WARN_ON(!msg); + + if (!status) { + switch (msg->msg.pg.ap_msg.cmn.msg_id) { + case PMU_AP_MSG_ID_INIT_ACK: + break; + + default: + gk20a_dbg_pmu( + "%s: Invalid Adaptive Power Message: %x\n", + __func__, msg->msg.pg.ap_msg.cmn.msg_id); + break; + } + } +} + +static int gk20a_aelpg_init(struct gk20a *g) +{ + int status = 0; + + /* Remove reliance on app_ctrl field. */ + union pmu_ap_cmd ap_cmd; + + /* TODO: Check for elpg being ready? */ + ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT; + ap_cmd.init.pg_sampling_period_us = + APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; + + status = gk20a_pmu_ap_send_command(g, &ap_cmd, false); + return status; +} + +static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) +{ + int status = 0; + union pmu_ap_cmd ap_cmd; + + /* TODO: Probably check if ELPG is ready? */ + + ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL; + ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id; + ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us = + APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; + ap_cmd.init_and_enable_ctrl.params.min_target_saving_us = + APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; + ap_cmd.init_and_enable_ctrl.params.power_break_even_us = + APCTRL_POWER_BREAKEVEN_DEFAULT_US; + ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max = + APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; + + switch (ctrl_id) { + case PMU_AP_CTRL_ID_GRAPHICS: + break; + default: + break; + } + + status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); + return status; +} + +#if CONFIG_DEBUG_FS +static int elpg_residency_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + u32 ingating_time = 0; + u32 ungating_time = 0; + u32 gating_cnt; + u64 total_ingating, total_ungating, residency, divisor, dividend; + + /* Don't unnecessarily power on the device */ + if (g->power_on) { + gk20a_busy(g->dev); + gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, + &ungating_time, &gating_cnt); + gk20a_idle(g->dev); + } + total_ingating = g->pg_ingating_time_us + (u64)ingating_time; + total_ungating = g->pg_ungating_time_us + (u64)ungating_time; + divisor = total_ingating + total_ungating; + + /* We compute the residency on a scale of 1000 */ + dividend = total_ingating * 1000; + + if (divisor) + residency = div64_u64(dividend, divisor); + else + residency = 0; + + seq_printf(s, "Time in ELPG: %llu us\n" + "Time out of ELPG: %llu us\n" + "ELPG residency ratio: %llu\n", + total_ingating, total_ungating, residency); + return 0; + +} + +static int elpg_residency_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_residency_show, inode->i_private); +} + +static const struct file_operations elpg_residency_fops = { + .open = elpg_residency_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int elpg_transitions_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + u32 ingating_time, ungating_time, total_gating_cnt; + u32 gating_cnt = 0; + + if (g->power_on) { + gk20a_busy(g->dev); + gk20a_pmu_get_elpg_residency_gating(g, &ingating_time, + &ungating_time, &gating_cnt); + gk20a_idle(g->dev); + } + total_gating_cnt = g->pg_gating_cnt + gating_cnt; + + seq_printf(s, "%u\n", total_gating_cnt); + return 0; + +} + +static int elpg_transitions_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_transitions_show, inode->i_private); +} + +static const struct file_operations elpg_transitions_fops = { + .open = elpg_transitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int gk20a_pmu_debugfs_init(struct platform_device *dev) +{ + struct dentry *d; + struct gk20a_platform *platform = platform_get_drvdata(dev); + struct gk20a *g = get_gk20a(dev); + + d = debugfs_create_file( + "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g, + &elpg_residency_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "elpg_transitions", S_IRUGO, platform->debugfs, g, + &elpg_transitions_fops); + if (!d) + goto err_out; + + return 0; + +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + debugfs_remove_recursive(platform->debugfs); + return -ENOMEM; +} +#endif diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h new file mode 100644 index 00000000..c1b8ff1f --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -0,0 +1,1097 @@ +/* + * drivers/video/tegra/host/gk20a/pmu_gk20a.h + * + * GK20A PMU (aka. gPMU outside gk20a context) + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __PMU_GK20A_H__ +#define __PMU_GK20A_H__ + +/* defined by pmu hw spec */ +#define GK20A_PMU_VA_START ((128 * 1024) << 10) +#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) +#define GK20A_PMU_INST_SIZE (4 * 1024) +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) +#define GK20A_PMU_SEQ_BUF_SIZE 4096 + +#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe) + +/* PMU Command/Message Interfaces for Adaptive Power */ +/* Macro to get Histogram index */ +#define PMU_AP_HISTOGRAM(idx) (idx) +#define PMU_AP_HISTOGRAM_CONT (4) + +/* Total number of histogram bins */ +#define PMU_AP_CFG_HISTOGRAM_BIN_N (16) + +/* Mapping between Idle counters and histograms */ +#define PMU_AP_IDLE_MASK_HIST_IDX_0 (2) +#define PMU_AP_IDLE_MASK_HIST_IDX_1 (3) +#define PMU_AP_IDLE_MASK_HIST_IDX_2 (5) +#define PMU_AP_IDLE_MASK_HIST_IDX_3 (6) + + +/* Mapping between AP_CTRLs and Histograms */ +#define PMU_AP_HISTOGRAM_IDX_GRAPHICS (PMU_AP_HISTOGRAM(1)) + +/* Mapping between AP_CTRLs and Idle counters */ +#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1) + +#define APP_VERSION_1 17997577 +#define APP_VERSION_0 16856675 + + +enum pmu_perfmon_cmd_start_fields { + COUNTER_ALLOC +}; + +/* Adaptive Power Controls (AP_CTRL) */ +enum { + PMU_AP_CTRL_ID_GRAPHICS = 0x0, + /* PMU_AP_CTRL_ID_MS ,*/ + PMU_AP_CTRL_ID_MAX , +}; + +/* AP_CTRL Statistics */ +struct pmu_ap_ctrl_stat { + /* + * Represents whether AP is active or not + * TODO: This is NvBool in RM; is that 1 byte of 4 bytes? + */ + u8 b_active; + + /* Idle filter represented by histogram bin index */ + u8 idle_filter_x; + u8 rsvd[2]; + + /* Total predicted power saving cycles. */ + s32 power_saving_h_cycles; + + /* Counts how many times AP gave us -ve power benefits. */ + u32 bad_decision_count; + + /* + * Number of times ap structure needs to skip AP iterations + * KICK_CTRL from kernel updates this parameter. + */ + u32 skip_count; + u8 bin[PMU_AP_CFG_HISTOGRAM_BIN_N]; +}; + +/* Parameters initialized by INITn APCTRL command */ +struct pmu_ap_ctrl_init_params { + /* Minimum idle filter value in Us */ + u32 min_idle_filter_us; + + /* + * Minimum Targeted Saving in Us. AP will update idle thresholds only + * if power saving achieved by updating idle thresholds is greater than + * Minimum targeted saving. + */ + u32 min_target_saving_us; + + /* Minimum targeted residency of power feature in Us */ + u32 power_break_even_us; + + /* + * Maximum number of allowed power feature cycles per sample. + * + * We are allowing at max "pgPerSampleMax" cycles in one iteration of AP + * AKA pgPerSampleMax in original algorithm. + */ + u32 cycles_per_sample_max; +}; + +/* AP Commands/Message structures */ + +/* + * Structure for Generic AP Commands + */ +struct pmu_ap_cmd_common { + u8 cmd_type; + u16 cmd_id; +}; + +/* + * Structure for INIT AP command + */ +struct pmu_ap_cmd_init { + u8 cmd_type; + u16 cmd_id; + u8 rsvd; + u32 pg_sampling_period_us; +}; + +/* + * Structure for Enable/Disable ApCtrl Commands + */ +struct pmu_ap_cmd_enable_ctrl { + u8 cmd_type; + u16 cmd_id; + + u8 ctrl_id; +}; + +struct pmu_ap_cmd_disable_ctrl { + u8 cmd_type; + u16 cmd_id; + + u8 ctrl_id; +}; + +/* + * Structure for INIT command + */ +struct pmu_ap_cmd_init_ctrl { + u8 cmd_type; + u16 cmd_id; + u8 ctrl_id; + struct pmu_ap_ctrl_init_params params; +}; + +struct pmu_ap_cmd_init_and_enable_ctrl { + u8 cmd_type; + u16 cmd_id; + u8 ctrl_id; + struct pmu_ap_ctrl_init_params params; +}; + +/* + * Structure for KICK_CTRL command + */ +struct pmu_ap_cmd_kick_ctrl { + u8 cmd_type; + u16 cmd_id; + u8 ctrl_id; + + u32 skip_count; +}; + +/* + * Structure for PARAM command + */ +struct pmu_ap_cmd_param { + u8 cmd_type; + u16 cmd_id; + u8 ctrl_id; + + u32 data; +}; + +/* + * Defines for AP commands + */ +enum { + PMU_AP_CMD_ID_INIT = 0x0 , + PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL, + PMU_AP_CMD_ID_ENABLE_CTRL , + PMU_AP_CMD_ID_DISABLE_CTRL , + PMU_AP_CMD_ID_KICK_CTRL , +}; + +/* + * AP Command + */ +union pmu_ap_cmd { + u8 cmd_type; + struct pmu_ap_cmd_common cmn; + struct pmu_ap_cmd_init init; + struct pmu_ap_cmd_init_and_enable_ctrl init_and_enable_ctrl; + struct pmu_ap_cmd_enable_ctrl enable_ctrl; + struct pmu_ap_cmd_disable_ctrl disable_ctrl; + struct pmu_ap_cmd_kick_ctrl kick_ctrl; +}; + +/* + * Structure for generic AP Message + */ +struct pmu_ap_msg_common { + u8 msg_type; + u16 msg_id; +}; + +/* + * Structure for INIT_ACK Message + */ +struct pmu_ap_msg_init_ack { + u8 msg_type; + u16 msg_id; + u8 ctrl_id; + u32 stats_dmem_offset; +}; + +/* + * Defines for AP messages + */ +enum { + PMU_AP_MSG_ID_INIT_ACK = 0x0, +}; + +/* + * AP Message + */ +union pmu_ap_msg { + u8 msg_type; + struct pmu_ap_msg_common cmn; + struct pmu_ap_msg_init_ack init_ack; +}; + +/* Default Sampling Period of AELPG */ +#define APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US (1000000) + +/* Default values of APCTRL parameters */ +#define APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US (100) +#define APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US (10000) +#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000) +#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (100) + +/* + * Disable reason for Adaptive Power Controller + */ +enum { + APCTRL_DISABLE_REASON_RM_UNLOAD, + APCTRL_DISABLE_REASON_RMCTRL, +}; + +/* + * Adaptive Power Controller + */ +struct ap_ctrl { + u32 stats_dmem_offset; + u32 disable_reason_mask; + struct pmu_ap_ctrl_stat stat_cache; + u8 b_ready; +}; + +/* + * Adaptive Power structure + * + * ap structure provides generic infrastructure to make any power feature + * adaptive. + */ +struct pmu_ap { + u32 supported_mask; + struct ap_ctrl ap_ctrl[PMU_AP_CTRL_ID_MAX]; +}; + + +enum { + GK20A_PMU_DMAIDX_UCODE = 0, + GK20A_PMU_DMAIDX_VIRT = 1, + GK20A_PMU_DMAIDX_PHYS_VID = 2, + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, + GK20A_PMU_DMAIDX_RSVD = 5, + GK20A_PMU_DMAIDX_PELPG = 6, + GK20A_PMU_DMAIDX_END = 7 +}; + +struct pmu_mem_v0 { + u32 dma_base; + u8 dma_offset; + u8 dma_idx; +}; + +struct pmu_mem_v1 { + u32 dma_base; + u8 dma_offset; + u8 dma_idx; + u16 fb_size; +}; + +struct pmu_dmem { + u16 size; + u32 offset; +}; + +/* Make sure size of this structure is a multiple of 4 bytes */ +struct pmu_cmdline_args_v0 { + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ + u32 falc_trace_size; /* falctrace buffer size (bytes) */ + u32 falc_trace_dma_base; /* 256-byte block address */ + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ + struct pmu_mem_v0 gc6_ctx; /* dmem offset of gc6 context */ +}; + +struct pmu_cmdline_args_v1 { + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ + u32 falc_trace_size; /* falctrace buffer size (bytes) */ + u32 falc_trace_dma_base; /* 256-byte block address */ + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ + u8 secure_mode; + struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */ +}; + +#define GK20A_PMU_DMEM_BLKSIZE2 8 + +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 + +struct pmu_ucode_desc { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; /* Offset from appStartOffset */ + u32 app_resident_code_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */ + u32 app_resident_data_offset; /* Offset from appStartOffset */ + u32 app_resident_data_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */ + u32 nb_overlays; + struct {u32 start; u32 size;} load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; + u32 compressed; +}; + +#define PMU_UNIT_REWIND (0x00) +#define PMU_UNIT_I2C (0x01) +#define PMU_UNIT_SEQ (0x02) +#define PMU_UNIT_PG (0x03) +#define PMU_UNIT_AVAILABLE1 (0x04) +#define PMU_UNIT_AVAILABLE2 (0x05) +#define PMU_UNIT_MEM (0x06) +#define PMU_UNIT_INIT (0x07) +#define PMU_UNIT_FBBA (0x08) +#define PMU_UNIT_DIDLE (0x09) +#define PMU_UNIT_AVAILABLE3 (0x0A) +#define PMU_UNIT_AVAILABLE4 (0x0B) +#define PMU_UNIT_HDCP_MAIN (0x0C) +#define PMU_UNIT_HDCP_V (0x0D) +#define PMU_UNIT_HDCP_SRM (0x0E) +#define PMU_UNIT_NVDPS (0x0F) +#define PMU_UNIT_DEINIT (0x10) +#define PMU_UNIT_AVAILABLE5 (0x11) +#define PMU_UNIT_PERFMON (0x12) +#define PMU_UNIT_FAN (0x13) +#define PMU_UNIT_PBI (0x14) +#define PMU_UNIT_ISOBLIT (0x15) +#define PMU_UNIT_DETACH (0x16) +#define PMU_UNIT_DISP (0x17) +#define PMU_UNIT_HDCP (0x18) +#define PMU_UNIT_REGCACHE (0x19) +#define PMU_UNIT_SYSMON (0x1A) +#define PMU_UNIT_THERM (0x1B) +#define PMU_UNIT_PMGR (0x1C) +#define PMU_UNIT_PERF (0x1D) +#define PMU_UNIT_PCM (0x1E) +#define PMU_UNIT_RC (0x1F) +#define PMU_UNIT_NULL (0x20) +#define PMU_UNIT_LOGGER (0x21) +#define PMU_UNIT_SMBPBI (0x22) +#define PMU_UNIT_END (0x23) + +#define PMU_UNIT_TEST_START (0xFE) +#define PMU_UNIT_END_SIM (0xFF) +#define PMU_UNIT_TEST_END (0xFF) + +#define PMU_UNIT_ID_IS_VALID(id) \ + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) + +#define PMU_DMEM_ALLOC_ALIGNMENT (32) +#define PMU_DMEM_ALIGNMENT (4) + +#define PMU_CMD_FLAGS_PMU_MASK (0xF0) + +#define PMU_CMD_FLAGS_STATUS BIT(0) +#define PMU_CMD_FLAGS_INTR BIT(1) +#define PMU_CMD_FLAGS_EVENT BIT(2) +#define PMU_CMD_FLAGS_WATERMARK BIT(3) + +struct pmu_hdr { + u8 unit_id; + u8 size; + u8 ctrl_flags; + u8 seq_id; +}; +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) +#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr) + +#define PMU_QUEUE_COUNT 5 + +struct pmu_allocation_v0 { + u8 pad[3]; + u8 fb_mem_use; + struct { + struct pmu_dmem dmem; + struct pmu_mem_v0 fb; + } alloc; +}; + +struct pmu_allocation_v1 { + struct { + struct pmu_dmem dmem; + struct pmu_mem_v1 fb; + } alloc; +}; + +enum { + PMU_INIT_MSG_TYPE_PMU_INIT = 0, +}; + +struct pmu_init_msg_pmu_v0 { + u8 msg_type; + u8 pad; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[PMU_QUEUE_COUNT]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; +}; + +struct pmu_init_msg_pmu_v1 { + u8 msg_type; + u8 pad; + u16 os_debug_entry_point; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[PMU_QUEUE_COUNT]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; +}; + +union pmu_init_msg_pmu { + struct pmu_init_msg_pmu_v0 v0; + struct pmu_init_msg_pmu_v1 v1; +}; + +struct pmu_init_msg { + union { + u8 msg_type; + struct pmu_init_msg_pmu_v1 pmu_init_v1; + struct pmu_init_msg_pmu_v0 pmu_init_v0; + }; +}; + +enum { + PMU_PG_ELPG_MSG_INIT_ACK, + PMU_PG_ELPG_MSG_DISALLOW_ACK, + PMU_PG_ELPG_MSG_ALLOW_ACK, + PMU_PG_ELPG_MSG_FREEZE_ACK, + PMU_PG_ELPG_MSG_FREEZE_ABORT, + PMU_PG_ELPG_MSG_UNFREEZE_ACK, +}; + +struct pmu_pg_msg_elpg_msg { + u8 msg_type; + u8 engine_id; + u16 msg; +}; + +enum { + PMU_PG_STAT_MSG_RESP_DMEM_OFFSET = 0, +}; + +struct pmu_pg_msg_stat { + u8 msg_type; + u8 engine_id; + u16 sub_msg_id; + u32 data; +}; + +enum { + PMU_PG_MSG_ENG_BUF_LOADED, + PMU_PG_MSG_ENG_BUF_UNLOADED, + PMU_PG_MSG_ENG_BUF_FAILED, +}; + +struct pmu_pg_msg_eng_buf_stat { + u8 msg_type; + u8 engine_id; + u8 buf_idx; + u8 status; +}; + +struct pmu_pg_msg { + union { + u8 msg_type; + struct pmu_pg_msg_elpg_msg elpg_msg; + struct pmu_pg_msg_stat stat; + struct pmu_pg_msg_eng_buf_stat eng_buf_stat; + /* TBD: other pg messages */ + union pmu_ap_msg ap_msg; + }; +}; + +enum { + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, +}; + +struct pmu_rc_msg_unhandled_cmd { + u8 msg_type; + u8 unit_id; +}; + +struct pmu_rc_msg { + u8 msg_type; + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; +}; + +enum { + PMU_PG_CMD_ID_ELPG_CMD = 0, + PMU_PG_CMD_ID_ENG_BUF_LOAD, + PMU_PG_CMD_ID_ENG_BUF_UNLOAD, + PMU_PG_CMD_ID_PG_STAT, + PMU_PG_CMD_ID_PG_LOG_INIT, + PMU_PG_CMD_ID_PG_LOG_FLUSH, + PMU_PG_CMD_ID_PG_PARAM, + PMU_PG_CMD_ID_ELPG_INIT, + PMU_PG_CMD_ID_ELPG_POLL_CTXSAVE, + PMU_PG_CMD_ID_ELPG_ABORT_POLL, + PMU_PG_CMD_ID_ELPG_PWR_UP, + PMU_PG_CMD_ID_ELPG_DISALLOW, + PMU_PG_CMD_ID_ELPG_ALLOW, + PMU_PG_CMD_ID_AP, + RM_PMU_PG_CMD_ID_PSI, + RM_PMU_PG_CMD_ID_CG, + PMU_PG_CMD_ID_ZBC_TABLE_UPDATE, + PMU_PG_CMD_ID_PWR_RAIL_GATE_DISABLE = 0x20, + PMU_PG_CMD_ID_PWR_RAIL_GATE_ENABLE, + PMU_PG_CMD_ID_PWR_RAIL_SMU_MSG_DISABLE +}; + +enum { + PMU_PG_ELPG_CMD_INIT, + PMU_PG_ELPG_CMD_DISALLOW, + PMU_PG_ELPG_CMD_ALLOW, + PMU_PG_ELPG_CMD_FREEZE, + PMU_PG_ELPG_CMD_UNFREEZE, +}; + +struct pmu_pg_cmd_elpg_cmd { + u8 cmd_type; + u8 engine_id; + u16 cmd; +}; + +struct pmu_pg_cmd_eng_buf_load { + u8 cmd_type; + u8 engine_id; + u8 buf_idx; + u8 pad; + u16 buf_size; + u32 dma_base; + u8 dma_offset; + u8 dma_idx; +}; + +enum { + PMU_PG_STAT_CMD_ALLOC_DMEM = 0, +}; + +struct pmu_pg_cmd_stat { + u8 cmd_type; + u8 engine_id; + u16 sub_cmd_id; + u32 data; +}; + +struct pmu_pg_cmd { + union { + u8 cmd_type; + struct pmu_pg_cmd_elpg_cmd elpg_cmd; + struct pmu_pg_cmd_eng_buf_load eng_buf_load; + struct pmu_pg_cmd_stat stat; + /* TBD: other pg commands */ + union pmu_ap_cmd ap_cmd; + }; +}; + +/* PERFMON */ +#define PMU_DOMAIN_GROUP_PSTATE 0 +#define PMU_DOMAIN_GROUP_GPC2CLK 1 +#define PMU_DOMAIN_GROUP_NUM 2 + +/* TBD: smart strategy */ +#define PMU_PERFMON_PCT_TO_INC 58 +#define PMU_PERFMON_PCT_TO_DEC 23 + +struct pmu_perfmon_counter { + u8 index; + u8 flags; + u8 group_id; + u8 valid; + u16 upper_threshold; /* units of 0.01% */ + u16 lower_threshold; /* units of 0.01% */ +}; + +#define PMU_PERFMON_FLAG_ENABLE_INCREASE (0x00000001) +#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002) +#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004) + +/* PERFMON CMD */ +enum { + PMU_PERFMON_CMD_ID_START = 0, + PMU_PERFMON_CMD_ID_STOP = 1, + PMU_PERFMON_CMD_ID_INIT = 2 +}; + +struct pmu_perfmon_cmd_start_v1 { + u8 cmd_type; + u8 group_id; + u8 state_id; + u8 flags; + struct pmu_allocation_v1 counter_alloc; +}; + +struct pmu_perfmon_cmd_start_v0 { + u8 cmd_type; + u8 group_id; + u8 state_id; + u8 flags; + struct pmu_allocation_v0 counter_alloc; +}; + +struct pmu_perfmon_cmd_stop { + u8 cmd_type; +}; + +struct pmu_perfmon_cmd_init_v1 { + u8 cmd_type; + u8 to_decrease_count; + u8 base_counter_id; + u32 sample_period_us; + struct pmu_allocation_v1 counter_alloc; + u8 num_counters; + u8 samples_in_moving_avg; + u16 sample_buffer; +}; + +struct pmu_perfmon_cmd_init_v0 { + u8 cmd_type; + u8 to_decrease_count; + u8 base_counter_id; + u32 sample_period_us; + struct pmu_allocation_v0 counter_alloc; + u8 num_counters; + u8 samples_in_moving_avg; + u16 sample_buffer; +}; + +struct pmu_perfmon_cmd { + union { + u8 cmd_type; + struct pmu_perfmon_cmd_start_v0 start_v0; + struct pmu_perfmon_cmd_start_v1 start_v1; + struct pmu_perfmon_cmd_stop stop; + struct pmu_perfmon_cmd_init_v0 init_v0; + struct pmu_perfmon_cmd_init_v1 init_v1; + }; +}; + +struct pmu_zbc_cmd { + u8 cmd_type; + u8 pad; + u16 entry_mask; +}; + +/* PERFMON MSG */ +enum { + PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0, + PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1, + PMU_PERFMON_MSG_ID_INIT_EVENT = 2, + PMU_PERFMON_MSG_ID_ACK = 3 +}; + +struct pmu_perfmon_msg_generic { + u8 msg_type; + u8 state_id; + u8 group_id; + u8 data; +}; + +struct pmu_perfmon_msg { + union { + u8 msg_type; + struct pmu_perfmon_msg_generic gen; + }; +}; + + +struct pmu_cmd { + struct pmu_hdr hdr; + union { + struct pmu_perfmon_cmd perfmon; + struct pmu_pg_cmd pg; + struct pmu_zbc_cmd zbc; + } cmd; +}; + +struct pmu_msg { + struct pmu_hdr hdr; + union { + struct pmu_init_msg init; + struct pmu_perfmon_msg perfmon; + struct pmu_pg_msg pg; + struct pmu_rc_msg rc; + } msg; +}; + +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 +#define PMU_SHA1_GID_SIGNATURE_SIZE 4 + +#define PMU_SHA1_GID_SIZE 16 + +struct pmu_sha1_gid { + bool valid; + u8 gid[PMU_SHA1_GID_SIZE]; +}; + +struct pmu_sha1_gid_data { + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; + u8 gid[PMU_SHA1_GID_SIZE]; +}; + +#define PMU_COMMAND_QUEUE_HPQ 0 /* write by sw, read by pmu, protected by sw mutex lock */ +#define PMU_COMMAND_QUEUE_LPQ 1 /* write by sw, read by pmu, protected by sw mutex lock */ +#define PMU_COMMAND_QUEUE_BIOS 2 /* read/write by sw/hw, protected by hw pmu mutex, id = 2 */ +#define PMU_COMMAND_QUEUE_SMI 3 /* read/write by sw/hw, protected by hw pmu mutex, id = 3 */ +#define PMU_MESSAGE_QUEUE 4 /* write by pmu, read by sw, accessed by interrupt handler, no lock */ +#define PMU_QUEUE_COUNT 5 + +enum { + PMU_MUTEX_ID_RSVD1 = 0 , + PMU_MUTEX_ID_GPUSER , + PMU_MUTEX_ID_QUEUE_BIOS , + PMU_MUTEX_ID_QUEUE_SMI , + PMU_MUTEX_ID_GPMUTEX , + PMU_MUTEX_ID_I2C , + PMU_MUTEX_ID_RMLOCK , + PMU_MUTEX_ID_MSGBOX , + PMU_MUTEX_ID_FIFO , + PMU_MUTEX_ID_PG , + PMU_MUTEX_ID_GR , + PMU_MUTEX_ID_CLK , + PMU_MUTEX_ID_RSVD6 , + PMU_MUTEX_ID_RSVD7 , + PMU_MUTEX_ID_RSVD8 , + PMU_MUTEX_ID_RSVD9 , + PMU_MUTEX_ID_INVALID +}; + +#define PMU_IS_COMMAND_QUEUE(id) \ + ((id) < PMU_MESSAGE_QUEUE) + +#define PMU_IS_SW_COMMAND_QUEUE(id) \ + (((id) == PMU_COMMAND_QUEUE_HPQ) || \ + ((id) == PMU_COMMAND_QUEUE_LPQ)) + +#define PMU_IS_MESSAGE_QUEUE(id) \ + ((id) == PMU_MESSAGE_QUEUE) + +enum +{ + OFLAG_READ = 0, + OFLAG_WRITE +}; + +#define QUEUE_SET (true) +#define QUEUE_GET (false) + +#define QUEUE_ALIGNMENT (4) + +#define PMU_PGENG_GR_BUFFER_IDX_INIT (0) +#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1) +#define PMU_PGENG_GR_BUFFER_IDX_FECS (2) + +enum +{ + PMU_DMAIDX_UCODE = 0, + PMU_DMAIDX_VIRT = 1, + PMU_DMAIDX_PHYS_VID = 2, + PMU_DMAIDX_PHYS_SYS_COH = 3, + PMU_DMAIDX_PHYS_SYS_NCOH = 4, + PMU_DMAIDX_RSVD = 5, + PMU_DMAIDX_PELPG = 6, + PMU_DMAIDX_END = 7 +}; + +struct pmu_gk20a; +struct pmu_queue; + +struct pmu_queue { + + /* used by hw, for BIOS/SMI queue */ + u32 mutex_id; + u32 mutex_lock; + /* used by sw, for LPQ/HPQ queue */ + struct mutex mutex; + + /* current write position */ + u32 position; + /* physical dmem offset where this queue begins */ + u32 offset; + /* logical queue identifier */ + u32 id; + /* physical queue index */ + u32 index; + /* in bytes */ + u32 size; + + /* open-flag */ + u32 oflag; + bool opened; /* opened implies locked */ + bool locked; /* check free space after setting locked but before setting opened */ +}; + + +#define PMU_MUTEX_ID_IS_VALID(id) \ + ((id) < PMU_MUTEX_ID_INVALID) + +#define PMU_INVALID_MUTEX_OWNER_ID (0) + +struct pmu_mutex { + u32 id; + u32 index; + u32 ref_cnt; +}; + +#define PMU_MAX_NUM_SEQUENCES (256) +#define PMU_SEQ_BIT_SHIFT (5) +#define PMU_SEQ_TBL_SIZE \ + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) + +#define PMU_INVALID_SEQ_DESC (~0) + +enum +{ + PMU_SEQ_STATE_FREE = 0, + PMU_SEQ_STATE_PENDING, + PMU_SEQ_STATE_USED, + PMU_SEQ_STATE_CANCELLED +}; + +struct pmu_payload { + struct { + void *buf; + u32 offset; + u32 size; + } in, out; +}; + +typedef void (*pmu_callback)(struct gk20a *, struct pmu_msg *, void *, u32, + u32); + +struct pmu_sequence { + u8 id; + u32 state; + u32 desc; + struct pmu_msg *msg; + union { + struct pmu_allocation_v0 in_v0; + struct pmu_allocation_v1 in_v1; + }; + union { + struct pmu_allocation_v0 out_v0; + struct pmu_allocation_v1 out_v1; + }; + u8 *out_payload; + pmu_callback callback; + void* cb_params; +}; + +struct pmu_pg_stats { + u64 pg_entry_start_timestamp; + u64 pg_ingating_start_timestamp; + u64 pg_exit_start_timestamp; + u64 pg_ungating_start_timestamp; + u32 pg_avg_entry_time_us; + u32 pg_ingating_cnt; + u32 pg_ingating_time_us; + u32 pg_avg_exit_time_us; + u32 pg_ungating_count; + u32 pg_ungating_time_us; + u32 pg_gating_cnt; + u32 pg_gating_deny_cnt; +}; + +#define PMU_PG_IDLE_THRESHOLD_SIM 1000 +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM 4000000 +/* TBD: QT or else ? */ +#define PMU_PG_IDLE_THRESHOLD 15000 +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 + +/* state transition : + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF + ON => OFF is always synchronized */ +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */ +#define PMU_ELPG_STAT_ON 1 /* elpg is on */ +#define PMU_ELPG_STAT_ON_PENDING 2 /* elpg is off, ALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_OFF_PENDING 3 /* elpg is on, DISALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_OFF_ON_PENDING 4 /* elpg is off, caller has requested on, but ALLOW + cmd hasn't been sent due to ENABLE_ALLOW delay */ + +/* Falcon Register index */ +#define PMU_FALCON_REG_R0 (0) +#define PMU_FALCON_REG_R1 (1) +#define PMU_FALCON_REG_R2 (2) +#define PMU_FALCON_REG_R3 (3) +#define PMU_FALCON_REG_R4 (4) +#define PMU_FALCON_REG_R5 (5) +#define PMU_FALCON_REG_R6 (6) +#define PMU_FALCON_REG_R7 (7) +#define PMU_FALCON_REG_R8 (8) +#define PMU_FALCON_REG_R9 (9) +#define PMU_FALCON_REG_R10 (10) +#define PMU_FALCON_REG_R11 (11) +#define PMU_FALCON_REG_R12 (12) +#define PMU_FALCON_REG_R13 (13) +#define PMU_FALCON_REG_R14 (14) +#define PMU_FALCON_REG_R15 (15) +#define PMU_FALCON_REG_IV0 (16) +#define PMU_FALCON_REG_IV1 (17) +#define PMU_FALCON_REG_UNDEFINED (18) +#define PMU_FALCON_REG_EV (19) +#define PMU_FALCON_REG_SP (20) +#define PMU_FALCON_REG_PC (21) +#define PMU_FALCON_REG_IMB (22) +#define PMU_FALCON_REG_DMB (23) +#define PMU_FALCON_REG_CSW (24) +#define PMU_FALCON_REG_CCR (25) +#define PMU_FALCON_REG_SEC (26) +#define PMU_FALCON_REG_CTX (27) +#define PMU_FALCON_REG_EXCI (28) +#define PMU_FALCON_REG_RSVD0 (29) +#define PMU_FALCON_REG_RSVD1 (30) +#define PMU_FALCON_REG_RSVD2 (31) +#define PMU_FALCON_REG_SIZE (32) + +struct pmu_gk20a { + + struct gk20a *g; + + struct pmu_ucode_desc *desc; + struct pmu_mem_desc ucode; + + struct pmu_mem_desc pg_buf; + /* TBD: remove this if ZBC seq is fixed */ + struct pmu_mem_desc seq_buf; + bool buf_loaded; + + struct pmu_sha1_gid gid_info; + + struct pmu_queue queue[PMU_QUEUE_COUNT]; + + struct pmu_sequence *seq; + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; + u32 next_seq_desc; + + struct pmu_mutex *mutex; + u32 mutex_cnt; + + struct mutex pmu_copy_lock; + struct mutex pmu_seq_lock; + + struct gk20a_allocator dmem; + + u32 *ucode_image; + bool pmu_ready; + + u32 zbc_save_done; + + u32 stat_dmem_offset; + + bool elpg_ready; + u32 elpg_stat; + wait_queue_head_t pg_wq; + +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ + struct delayed_work elpg_enable; /* deferred elpg enable */ + struct work_struct pg_init; + bool elpg_enable_allow; /* true after init, false after disable, true after delay */ + struct mutex elpg_mutex; /* protect elpg enable/disable */ + int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ + + struct pmu_perfmon_counter perfmon_counter; + u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; + + bool initialized; + + void (*remove_support)(struct pmu_gk20a *pmu); + bool sw_ready; + bool perfmon_ready; + + u32 sample_buffer; + + struct mutex isr_mutex; + bool zbc_ready; + union { + struct pmu_cmdline_args_v0 args_v0; + struct pmu_cmdline_args_v1 args_v1; + }; +}; + +struct gk20a_pmu_save_state { + struct pmu_sequence *seq; + u32 next_seq_desc; + struct pmu_mutex *mutex; + u32 mutex_cnt; + struct pmu_ucode_desc *desc; + struct pmu_mem_desc ucode; + struct pmu_mem_desc seq_buf; + struct pmu_mem_desc pg_buf; + struct delayed_work elpg_enable; + wait_queue_head_t pg_wq; + bool sw_ready; + struct work_struct pg_init; +}; + +int gk20a_init_pmu_support(struct gk20a *g); +int gk20a_init_pmu_setup_hw2(struct gk20a *g); + +void gk20a_pmu_isr(struct gk20a *g); + +/* send a cmd to pmu */ +int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, + struct pmu_payload *payload, u32 queue_id, + pmu_callback callback, void* cb_param, + u32 *seq_desc, unsigned long timeout); + +int gk20a_pmu_enable_elpg(struct gk20a *g); +int gk20a_pmu_disable_elpg(struct gk20a *g); + +void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); + +int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable); + +int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token); +int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token); +int gk20a_pmu_destroy(struct gk20a *g); +int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); +int gk20a_pmu_debugfs_init(struct platform_device *dev); +void gk20a_pmu_reset_load_counters(struct gk20a *g); +void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles); + +#endif /*__PMU_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c new file mode 100644 index 00000000..aea1a80b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c @@ -0,0 +1,91 @@ +/* + * GK20A priv ring + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include /* for mdelay */ + +#include "gk20a.h" +#include "hw_mc_gk20a.h" +#include "hw_pri_ringmaster_gk20a.h" +#include "hw_pri_ringstation_sys_gk20a.h" +#include "hw_trim_gk20a.h" + +void gk20a_reset_priv_ring(struct gk20a *g) +{ + u32 data; + + if (tegra_platform_is_linsim()) + return; + + data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); + data = set_field(data, + trim_sys_gpc2clk_out_bypdiv_m(), + trim_sys_gpc2clk_out_bypdiv_f(0)); + gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); + + gk20a_reset(g, mc_enable_priv_ring_enabled_f()); + + gk20a_writel(g,pri_ringmaster_command_r(), + 0x4); + + gk20a_writel(g, pri_ringstation_sys_decode_config_r(), + 0x2); + + gk20a_readl(g, pri_ringstation_sys_decode_config_r()); +} + +void gk20a_priv_ring_isr(struct gk20a *g) +{ + u32 status0, status1; + u32 cmd; + s32 retry = 100; + + if (tegra_platform_is_linsim()) + return; + + status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); + status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); + + gk20a_dbg_info("ringmaster intr status0: 0x%08x," + "status1: 0x%08x", status0, status1); + + if (status0 & (0x1 | 0x2 | 0x4)) { + gk20a_reset_priv_ring(g); + } + + cmd = gk20a_readl(g, pri_ringmaster_command_r()); + cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), + pri_ringmaster_command_cmd_ack_interrupt_f()); + gk20a_writel(g, pri_ringmaster_command_r(), cmd); + + do { + cmd = pri_ringmaster_command_cmd_v( + gk20a_readl(g, pri_ringmaster_command_r())); + usleep_range(20, 40); + } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); + + if (retry <= 0) + gk20a_warn(dev_from_gk20a(g), + "priv ringmaster cmd ack too many retries"); + + status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); + status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); + + gk20a_dbg_info("ringmaster intr status0: 0x%08x," + " status1: 0x%08x", status0, status1); +} + diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h new file mode 100644 index 00000000..cb9d49c7 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h @@ -0,0 +1,27 @@ +/* + * drivers/video/tegra/host/gk20a/priv_ring_gk20a.h + * + * GK20A PRIV ringmaster + * + * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __PRIV_RING_GK20A_H__ +#define __PRIV_RING_GK20A_H__ + +void gk20a_reset_priv_ring(struct gk20a *g); +void gk20a_priv_ring_isr(struct gk20a *g); + +#endif /*__PRIV_RING_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c new file mode 100644 index 00000000..4a115fb1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c @@ -0,0 +1,704 @@ +/* + * + * Tegra GK20A GPU Debugger Driver Register Ops + * + * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "gk20a.h" +#include "gr_gk20a.h" +#include "dbg_gpu_gk20a.h" +#include "regops_gk20a.h" + + + +struct regop_offset_range { + u32 base:24; + u32 count:8; +}; + +static int regop_bsearch_range_cmp(const void *pkey, const void *pelem) +{ + u32 key = *(u32 *)pkey; + struct regop_offset_range *prange = (struct regop_offset_range *)pelem; + if (key < prange->base) + return -1; + else if (prange->base <= key && key < (prange->base + + (prange->count * 4))) + return 0; + return 1; +} + +static inline bool linear_search(u32 offset, const u32 *list, int size) +{ + int i; + for (i = 0; i < size; i++) + if (list[i] == offset) + return true; + return false; +} + +static const struct regop_offset_range gk20a_global_whitelist_ranges[] = { + { 0x000004f0, 1 }, + { 0x00001a00, 3 }, + { 0x0000259c, 1 }, + { 0x0000280c, 1 }, + { 0x00009400, 1 }, + { 0x00009410, 1 }, + { 0x00020200, 1 }, + { 0x00022430, 7 }, + { 0x00022548, 1 }, + { 0x00100c18, 3 }, + { 0x00100c84, 1 }, + { 0x00100cc4, 1 }, + { 0x00106640, 1 }, + { 0x0010a0a8, 1 }, + { 0x0010a4f0, 1 }, + { 0x0010e064, 1 }, + { 0x0010e164, 1 }, + { 0x0010e490, 1 }, + { 0x00110100, 1 }, + { 0x00140028, 1 }, + { 0x001408dc, 1 }, + { 0x00140a5c, 1 }, + { 0x001410dc, 1 }, + { 0x0014125c, 1 }, + { 0x0017e028, 1 }, + { 0x0017e8dc, 1 }, + { 0x0017ea5c, 1 }, + { 0x0017f0dc, 1 }, + { 0x0017f25c, 1 }, + { 0x00180000, 68 }, + { 0x00180200, 68 }, + { 0x001a0000, 68 }, + { 0x001b0000, 68 }, + { 0x001b0200, 68 }, + { 0x001b0400, 68 }, + { 0x001b0600, 68 }, + { 0x001b4000, 3 }, + { 0x001b4010, 3 }, + { 0x001b4020, 3 }, + { 0x001b4040, 3 }, + { 0x001b4050, 3 }, + { 0x001b4060, 16 }, + { 0x001b40a4, 1 }, + { 0x001b4100, 6 }, + { 0x001b4124, 2 }, + { 0x001b8000, 7 }, + { 0x001bc000, 7 }, + { 0x001be000, 7 }, + { 0x00400500, 1 }, + { 0x00400700, 1 }, + { 0x0040415c, 1 }, + { 0x00405850, 1 }, + { 0x00405908, 1 }, + { 0x00405b40, 1 }, + { 0x00405b50, 1 }, + { 0x00406024, 1 }, + { 0x00407010, 1 }, + { 0x00407808, 1 }, + { 0x0040803c, 1 }, + { 0x0040880c, 1 }, + { 0x00408910, 1 }, + { 0x00408984, 1 }, + { 0x004090a8, 1 }, + { 0x004098a0, 1 }, + { 0x0041000c, 1 }, + { 0x00410110, 1 }, + { 0x00410184, 1 }, + { 0x00418384, 1 }, + { 0x004184a0, 1 }, + { 0x00418604, 1 }, + { 0x00418680, 1 }, + { 0x00418714, 1 }, + { 0x0041881c, 1 }, + { 0x004188c8, 2 }, + { 0x00418b04, 1 }, + { 0x00418c04, 1 }, + { 0x00418c64, 2 }, + { 0x00418c88, 1 }, + { 0x00418cb4, 2 }, + { 0x00418d00, 1 }, + { 0x00418d28, 2 }, + { 0x00418e08, 1 }, + { 0x00418e1c, 2 }, + { 0x00418f08, 1 }, + { 0x00418f20, 2 }, + { 0x00419000, 1 }, + { 0x0041900c, 1 }, + { 0x00419018, 1 }, + { 0x00419854, 1 }, + { 0x00419ab0, 1 }, + { 0x00419ab8, 3 }, + { 0x00419ac8, 1 }, + { 0x00419c0c, 1 }, + { 0x00419c8c, 3 }, + { 0x00419ca8, 1 }, + { 0x00419d08, 2 }, + { 0x00419e00, 1 }, + { 0x00419e0c, 1 }, + { 0x00419e14, 2 }, + { 0x00419e24, 2 }, + { 0x00419e34, 2 }, + { 0x00419e44, 4 }, + { 0x00419ea4, 1 }, + { 0x00419eb0, 1 }, + { 0x0041a0a0, 1 }, + { 0x0041a0a8, 1 }, + { 0x0041a17c, 1 }, + { 0x0041a890, 2 }, + { 0x0041a8a0, 3 }, + { 0x0041a8b0, 2 }, + { 0x0041b014, 1 }, + { 0x0041b0a0, 1 }, + { 0x0041b0cc, 1 }, + { 0x0041b0e8, 2 }, + { 0x0041b1dc, 1 }, + { 0x0041b1f8, 2 }, + { 0x0041be14, 1 }, + { 0x0041bea0, 1 }, + { 0x0041becc, 1 }, + { 0x0041bee8, 2 }, + { 0x0041bfdc, 1 }, + { 0x0041bff8, 2 }, + { 0x0041c054, 1 }, + { 0x0041c2b0, 1 }, + { 0x0041c2b8, 3 }, + { 0x0041c2c8, 1 }, + { 0x0041c40c, 1 }, + { 0x0041c48c, 3 }, + { 0x0041c4a8, 1 }, + { 0x0041c508, 2 }, + { 0x0041c600, 1 }, + { 0x0041c60c, 1 }, + { 0x0041c614, 2 }, + { 0x0041c624, 2 }, + { 0x0041c634, 2 }, + { 0x0041c644, 4 }, + { 0x0041c6a4, 1 }, + { 0x0041c6b0, 1 }, + { 0x00500384, 1 }, + { 0x005004a0, 1 }, + { 0x00500604, 1 }, + { 0x00500680, 1 }, + { 0x00500714, 1 }, + { 0x0050081c, 1 }, + { 0x005008c8, 2 }, + { 0x00500b04, 1 }, + { 0x00500c04, 1 }, + { 0x00500c64, 2 }, + { 0x00500c88, 1 }, + { 0x00500cb4, 2 }, + { 0x00500d00, 1 }, + { 0x00500d28, 2 }, + { 0x00500e08, 1 }, + { 0x00500e1c, 2 }, + { 0x00500f08, 1 }, + { 0x00500f20, 2 }, + { 0x00501000, 1 }, + { 0x0050100c, 1 }, + { 0x00501018, 1 }, + { 0x00501854, 1 }, + { 0x00501ab0, 1 }, + { 0x00501ab8, 3 }, + { 0x00501ac8, 1 }, + { 0x00501c0c, 1 }, + { 0x00501c8c, 3 }, + { 0x00501ca8, 1 }, + { 0x00501d08, 2 }, + { 0x00501e00, 1 }, + { 0x00501e0c, 1 }, + { 0x00501e14, 2 }, + { 0x00501e24, 2 }, + { 0x00501e34, 2 }, + { 0x00501e44, 4 }, + { 0x00501ea4, 1 }, + { 0x00501eb0, 1 }, + { 0x005020a0, 1 }, + { 0x005020a8, 1 }, + { 0x0050217c, 1 }, + { 0x00502890, 2 }, + { 0x005028a0, 3 }, + { 0x005028b0, 2 }, + { 0x00503014, 1 }, + { 0x005030a0, 1 }, + { 0x005030cc, 1 }, + { 0x005030e8, 2 }, + { 0x005031dc, 1 }, + { 0x005031f8, 2 }, + { 0x00503e14, 1 }, + { 0x00503ea0, 1 }, + { 0x00503ecc, 1 }, + { 0x00503ee8, 2 }, + { 0x00503fdc, 1 }, + { 0x00503ff8, 2 }, + { 0x00504054, 1 }, + { 0x005042b0, 1 }, + { 0x005042b8, 3 }, + { 0x005042c8, 1 }, + { 0x0050440c, 1 }, + { 0x0050448c, 3 }, + { 0x005044a8, 1 }, + { 0x00504508, 2 }, + { 0x00504600, 1 }, + { 0x0050460c, 1 }, + { 0x00504614, 2 }, + { 0x00504624, 2 }, + { 0x00504634, 2 }, + { 0x00504644, 4 }, + { 0x005046a4, 1 }, + { 0x005046b0, 1 }, +}; +static const u32 gk20a_global_whitelist_ranges_count = + ARRAY_SIZE(gk20a_global_whitelist_ranges); + +/* context */ + +static const struct regop_offset_range gk20a_context_whitelist_ranges[] = { + { 0x0000280c, 1 }, + { 0x00100cc4, 1 }, + { 0x00400500, 1 }, + { 0x00405b40, 1 }, + { 0x00419000, 1 }, + { 0x00419c8c, 3 }, + { 0x00419d08, 2 }, + { 0x00419e04, 3 }, + { 0x00419e14, 2 }, + { 0x00419e24, 2 }, + { 0x00419e34, 2 }, + { 0x00419e44, 4 }, + { 0x00419e58, 6 }, + { 0x00419e84, 5 }, + { 0x00419ea4, 1 }, + { 0x00419eac, 2 }, + { 0x00419f30, 8 }, + { 0x0041c48c, 3 }, + { 0x0041c508, 2 }, + { 0x0041c604, 3 }, + { 0x0041c614, 2 }, + { 0x0041c624, 2 }, + { 0x0041c634, 2 }, + { 0x0041c644, 4 }, + { 0x0041c658, 6 }, + { 0x0041c684, 5 }, + { 0x0041c6a4, 1 }, + { 0x0041c6ac, 2 }, + { 0x0041c730, 8 }, + { 0x00501000, 1 }, + { 0x00501c8c, 3 }, + { 0x00501d08, 2 }, + { 0x00501e04, 3 }, + { 0x00501e14, 2 }, + { 0x00501e24, 2 }, + { 0x00501e34, 2 }, + { 0x00501e44, 4 }, + { 0x00501e58, 6 }, + { 0x00501e84, 5 }, + { 0x00501ea4, 1 }, + { 0x00501eac, 2 }, + { 0x00501f30, 8 }, + { 0x0050448c, 3 }, + { 0x00504508, 2 }, + { 0x00504604, 3 }, + { 0x00504614, 2 }, + { 0x00504624, 2 }, + { 0x00504634, 2 }, + { 0x00504644, 4 }, + { 0x00504658, 6 }, + { 0x00504684, 5 }, + { 0x005046a4, 1 }, + { 0x005046ac, 2 }, + { 0x00504730, 8 }, +}; +static const u32 gk20a_context_whitelist_ranges_count = + ARRAY_SIZE(gk20a_context_whitelist_ranges); + +/* runcontrol */ +static const u32 gk20a_runcontrol_whitelist[] = { + 0x00419e10, + 0x0041c610, + 0x00501e10, + 0x00504610, +}; +static const u32 gk20a_runcontrol_whitelist_count = + ARRAY_SIZE(gk20a_runcontrol_whitelist); + +static const struct regop_offset_range gk20a_runcontrol_whitelist_ranges[] = { + { 0x00419e10, 1 }, + { 0x0041c610, 1 }, + { 0x00501e10, 1 }, + { 0x00504610, 1 }, +}; +static const u32 gk20a_runcontrol_whitelist_ranges_count = + ARRAY_SIZE(gk20a_runcontrol_whitelist_ranges); + + +/* quad ctl */ +static const u32 gk20a_qctl_whitelist[] = { + 0x00504670, + 0x00504674, + 0x00504678, + 0x0050467c, + 0x00504680, + 0x00504730, + 0x00504734, + 0x00504738, + 0x0050473c, +}; +static const u32 gk20a_qctl_whitelist_count = + ARRAY_SIZE(gk20a_qctl_whitelist); + +static const struct regop_offset_range gk20a_qctl_whitelist_ranges[] = { + { 0x00504670, 1 }, + { 0x00504730, 4 }, +}; +static const u32 gk20a_qctl_whitelist_ranges_count = + ARRAY_SIZE(gk20a_qctl_whitelist_ranges); + + + + +static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, + u32 *ctx_rd_count, u32 *ctx_wr_count, + struct nvhost_dbg_gpu_reg_op *ops, + u32 op_count); + + +int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *ops, + u64 num_ops) +{ + int err = 0, i; + struct channel_gk20a *ch = NULL; + struct gk20a *g = dbg_s->g; + /*struct gr_gk20a *gr = &g->gr;*/ + u32 data32_lo = 0, data32_hi = 0; + u32 ctx_rd_count = 0, ctx_wr_count = 0; + bool skip_read_lo, skip_read_hi; + bool ok; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + ch = dbg_s->ch; + + ok = validate_reg_ops(dbg_s, + &ctx_rd_count, &ctx_wr_count, + ops, num_ops); + if (!ok) { + dev_err(dbg_s->dev, "invalid op(s)"); + err = -EINVAL; + /* each op has its own err/status */ + goto clean_up; + } + + for (i = 0; i < num_ops; i++) { + /* if it isn't global then it is done in the ctx ops... */ + if (ops[i].type != REGOP(TYPE_GLOBAL)) + continue; + + switch (ops[i].op) { + + case REGOP(READ_32): + ops[i].value_hi = 0; + ops[i].value_lo = gk20a_readl(g, ops[i].offset); + gk20a_dbg(gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x", + ops[i].value_lo, ops[i].offset); + + break; + + case REGOP(READ_64): + ops[i].value_lo = gk20a_readl(g, ops[i].offset); + ops[i].value_hi = + gk20a_readl(g, ops[i].offset + 4); + + gk20a_dbg(gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x", + ops[i].value_hi, ops[i].value_lo, + ops[i].offset); + break; + + case REGOP(WRITE_32): + case REGOP(WRITE_64): + /* some of this appears wonky/unnecessary but + we've kept it for compat with existing + debugger code. just in case... */ + skip_read_lo = skip_read_hi = false; + if (ops[i].and_n_mask_lo == ~(u32)0) { + data32_lo = ops[i].value_lo; + skip_read_lo = true; + } + + if ((ops[i].op == REGOP(WRITE_64)) && + (ops[i].and_n_mask_hi == ~(u32)0)) { + data32_hi = ops[i].value_hi; + skip_read_hi = true; + } + + /* read first 32bits */ + if (unlikely(skip_read_lo == false)) { + data32_lo = gk20a_readl(g, ops[i].offset); + data32_lo &= ~ops[i].and_n_mask_lo; + data32_lo |= ops[i].value_lo; + } + + /* if desired, read second 32bits */ + if ((ops[i].op == REGOP(WRITE_64)) && + !skip_read_hi) { + data32_hi = gk20a_readl(g, ops[i].offset + 4); + data32_hi &= ~ops[i].and_n_mask_hi; + data32_hi |= ops[i].value_hi; + } + + /* now update first 32bits */ + gk20a_writel(g, ops[i].offset, data32_lo); + gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", + data32_lo, ops[i].offset); + /* if desired, update second 32bits */ + if (ops[i].op == REGOP(WRITE_64)) { + gk20a_writel(g, ops[i].offset + 4, data32_hi); + gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", + data32_hi, ops[i].offset + 4); + + } + + + break; + + /* shouldn't happen as we've already screened */ + default: + BUG(); + err = -EINVAL; + goto clean_up; + break; + } + } + + if (ctx_wr_count | ctx_rd_count) { + err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops, + ctx_wr_count, ctx_rd_count); + if (err) { + dev_warn(dbg_s->dev, + "failed to perform ctx ops\n"); + goto clean_up; + } + } + + clean_up: + gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); + return err; + +} + + +static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *op) +{ + int err = 0; + + op->status = REGOP(STATUS_SUCCESS); + + switch (op->op) { + case REGOP(READ_32): + case REGOP(READ_64): + case REGOP(WRITE_32): + case REGOP(WRITE_64): + break; + default: + op->status |= REGOP(STATUS_UNSUPPORTED_OP); + /*gk20a_err(dbg_s->dev, "Invalid regops op %d!", op->op);*/ + err = -EINVAL; + break; + } + + switch (op->type) { + case REGOP(TYPE_GLOBAL): + case REGOP(TYPE_GR_CTX): + case REGOP(TYPE_GR_CTX_TPC): + case REGOP(TYPE_GR_CTX_SM): + case REGOP(TYPE_GR_CTX_CROP): + case REGOP(TYPE_GR_CTX_ZROP): + case REGOP(TYPE_GR_CTX_QUAD): + break; + /* + case NVHOST_DBG_GPU_REG_OP_TYPE_FB: + */ + default: + op->status |= REGOP(STATUS_INVALID_TYPE); + /*gk20a_err(dbg_s->dev, "Invalid regops type %d!", op->type);*/ + err = -EINVAL; + break; + } + + return err; +} + +static bool check_whitelists(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *op, u32 offset) +{ + bool valid = false; + + if (op->type == REGOP(TYPE_GLOBAL)) { + /* search global list */ + valid = !!bsearch(&offset, + gk20a_global_whitelist_ranges, + gk20a_global_whitelist_ranges_count, + sizeof(*gk20a_global_whitelist_ranges), + regop_bsearch_range_cmp); + + /* if debug session and channel is bound search context list */ + if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { + /* binary search context list */ + valid = !!bsearch(&offset, + gk20a_context_whitelist_ranges, + gk20a_context_whitelist_ranges_count, + sizeof(*gk20a_context_whitelist_ranges), + regop_bsearch_range_cmp); + } + + /* if debug session and channel is bound search runcontrol list */ + if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { + valid = linear_search(offset, + gk20a_runcontrol_whitelist, + gk20a_runcontrol_whitelist_count); + } + } else if (op->type == REGOP(TYPE_GR_CTX)) { + /* it's a context-relative op */ + if (!dbg_s->ch) { + gk20a_err(dbg_s->dev, "can't perform ctx regop unless bound"); + op->status = REGOP(STATUS_UNSUPPORTED_OP); + return -ENODEV; + } + + /* binary search context list */ + valid = !!bsearch(&offset, + gk20a_context_whitelist_ranges, + gk20a_context_whitelist_ranges_count, + sizeof(*gk20a_context_whitelist_ranges), + regop_bsearch_range_cmp); + + /* if debug session and channel is bound search runcontrol list */ + if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) { + valid = linear_search(offset, + gk20a_runcontrol_whitelist, + gk20a_runcontrol_whitelist_count); + } + + } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) { + valid = linear_search(offset, + gk20a_qctl_whitelist, + gk20a_qctl_whitelist_count); + } + + return valid; +} + +/* note: the op here has already been through validate_reg_op_info */ +static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *op) +{ + int err; + u32 buf_offset_lo, buf_offset_addr, num_offsets, offset; + bool valid = false; + + op->status = 0; + offset = op->offset; + + /* support only 24-bit 4-byte aligned offsets */ + if (offset & 0xFF000003) { + gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + valid = check_whitelists(dbg_s, op, offset); + if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) + valid = check_whitelists(dbg_s, op, offset + 4); + + if (valid && (op->type != REGOP(TYPE_GLOBAL))) { + err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, + op->offset, + 1, + &buf_offset_lo, + &buf_offset_addr, + &num_offsets, + op->type == REGOP(TYPE_GR_CTX_QUAD), + op->quad); + if (err) { + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + if (!buf_offset_lo) { + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + } + + if (!valid) { + gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + return 0; +} + +static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, + u32 *ctx_rd_count, u32 *ctx_wr_count, + struct nvhost_dbg_gpu_reg_op *ops, + u32 op_count) +{ + u32 i; + int err; + bool ok = true; + + /* keep going until the end so every op can get + * a separate error code if needed */ + for (i = 0; i < op_count; i++) { + + err = validate_reg_op_info(dbg_s, &ops[i]); + ok &= !err; + + if (reg_op_is_gr_ctx(ops[i].type)) { + if (reg_op_is_read(ops[i].op)) + (*ctx_rd_count)++; + else + (*ctx_wr_count)++; + } + + err = validate_reg_op_offset(dbg_s, &ops[i]); + ok &= !err; + } + + gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n", + *ctx_wr_count, *ctx_rd_count); + + return ok; +} + +/* exported for tools like cyclestats, etc */ +bool is_bar0_global_offset_whitelisted_gk20a(u32 offset) +{ + + bool valid = !!bsearch(&offset, + gk20a_global_whitelist_ranges, + gk20a_global_whitelist_ranges_count, + sizeof(*gk20a_global_whitelist_ranges), + regop_bsearch_range_cmp); + return valid; +} diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h new file mode 100644 index 00000000..23b4865b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h @@ -0,0 +1,47 @@ +/* + * + * Tegra GK20A GPU Debugger Driver Register Ops + * + * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __REGOPS_GK20A_H_ +#define __REGOPS_GK20A_H_ + +int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvhost_dbg_gpu_reg_op *ops, + u64 num_ops); + +/* turn seriously unwieldy names -> something shorter */ +#define REGOP(x) NVHOST_DBG_GPU_REG_OP_##x + + +static inline bool reg_op_is_gr_ctx(u8 type) +{ + return type == REGOP(TYPE_GR_CTX) || + type == REGOP(TYPE_GR_CTX_TPC) || + type == REGOP(TYPE_GR_CTX_SM) || + type == REGOP(TYPE_GR_CTX_CROP) || + type == REGOP(TYPE_GR_CTX_ZROP) || + type == REGOP(TYPE_GR_CTX_QUAD); +} +static inline bool reg_op_is_read(u8 op) +{ + return op == REGOP(READ_32) || + op == REGOP(READ_64) ; +} + +bool is_bar0_global_offset_whitelisted_gk20a(u32 offset); + +#endif /* __REGOPS_GK20A_H_ */ diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h new file mode 100644 index 00000000..5fc8006e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h @@ -0,0 +1,62 @@ +/* + * drivers/video/tegra/host/gk20a/sim_gk20a.h + * + * GK20A sim support + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __SIM_GK20A_H__ +#define __SIM_GK20A_H__ + + +struct gk20a; +struct sim_gk20a { + struct gk20a *g; + struct resource *reg_mem; + void __iomem *regs; + struct { + struct page *page; + void *kvaddr; + phys_addr_t phys; + } send_bfr, recv_bfr, msg_bfr; + u32 send_ring_put; + u32 recv_ring_get; + u32 recv_ring_put; + u32 sequence_base; + void (*remove_support)(struct sim_gk20a *); +}; + + +int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, + u32 count, u32 *data); + +static inline int gk20a_sim_esc_read_no_sim(struct gk20a *g, char *p, + u32 i, u32 c, u32 *d) +{ + *d = ~(u32)0; + return -1; +} + +static inline int gk20a_sim_esc_readl(struct gk20a *g, char * p, u32 i, u32 *d) +{ + if (tegra_cpu_is_asim()) + return gk20a_sim_esc_read(g, p, i, sizeof(u32), d); + + return gk20a_sim_esc_read_no_sim(g, p, i, sizeof(u32), d); +} + + +#endif /*__SIM_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c new file mode 100644 index 00000000..da911979 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c @@ -0,0 +1,142 @@ +/* + * drivers/video/tegra/host/gk20a/therm_gk20a.c + * + * GK20A Therm + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "gk20a.h" +#include "hw_chiplet_pwr_gk20a.h" +#include "hw_gr_gk20a.h" +#include "hw_therm_gk20a.h" + +static int gk20a_init_therm_reset_enable_hw(struct gk20a *g) +{ + return 0; +} + +static int gk20a_init_therm_setup_sw(struct gk20a *g) +{ + return 0; +} + +static int gk20a_init_therm_setup_hw(struct gk20a *g) +{ + /* program NV_THERM registers */ + gk20a_writel(g, therm_use_a_r(), NV_THERM_USE_A_INIT); + gk20a_writel(g, therm_evt_ext_therm_0_r(), + NV_THERM_EVT_EXT_THERM_0_INIT); + gk20a_writel(g, therm_evt_ext_therm_1_r(), + NV_THERM_EVT_EXT_THERM_1_INIT); + gk20a_writel(g, therm_evt_ext_therm_2_r(), + NV_THERM_EVT_EXT_THERM_2_INIT); + +/* + u32 data; + + data = gk20a_readl(g, gr_gpcs_tpcs_l1c_cfg_r()); + data = set_field(data, gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(), + gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f()); + gk20a_writel(g, gr_gpcs_tpcs_l1c_cfg_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_l1c_pm_r()); + data = set_field(data, gr_gpcs_tpcs_l1c_pm_enable_m(), + gr_gpcs_tpcs_l1c_pm_enable_enable_f()); + gk20a_writel(g, gr_gpcs_tpcs_l1c_pm_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_pm_ctrl_r()); + data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(), + gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f()); + data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(), + gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_pm_ctrl_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r()); + data = set_field(data, gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(), + gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), data); + + data = gk20a_readl(g, gr_gpcs_tpcs_sm_debug_sfe_control_r()); + data = set_field(data, gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(), + gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f()); + gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), data); + + gk20a_writel(g, therm_peakpower_config6_r(0), + therm_peakpower_config6_trigger_cfg_1h_intr_f() | + therm_peakpower_config6_trigger_cfg_1l_intr_f()); + + gk20a_writel(g, chiplet_pwr_gpcs_config_1_r(), + chiplet_pwr_gpcs_config_1_ba_enable_yes_f()); + gk20a_writel(g, chiplet_pwr_fbps_config_1_r(), + chiplet_pwr_fbps_config_1_ba_enable_yes_f()); + + data = gk20a_readl(g, therm_config1_r()); + data = set_field(data, therm_config1_ba_enable_m(), + therm_config1_ba_enable_yes_f()); + gk20a_writel(g, therm_config1_r(), data); + + gk20a_writel(g, gr_gpcs_tpcs_sm_power_throttle_r(), 0x441a); + + gk20a_writel(g, therm_weight_1_r(), 0xd3); + gk20a_writel(g, chiplet_pwr_gpcs_weight_6_r(), 0x7d); + gk20a_writel(g, chiplet_pwr_gpcs_weight_7_r(), 0xff); + gk20a_writel(g, chiplet_pwr_fbps_weight_0_r(), 0x13000000); + gk20a_writel(g, chiplet_pwr_fbps_weight_1_r(), 0x19); + + gk20a_writel(g, therm_peakpower_config8_r(0), 0x8); + gk20a_writel(g, therm_peakpower_config9_r(0), 0x0); + + gk20a_writel(g, therm_evt_ba_w0_t1h_r(), 0x100); + + gk20a_writel(g, therm_use_a_r(), therm_use_a_ba_w0_t1h_yes_f()); + + gk20a_writel(g, therm_peakpower_config1_r(0), + therm_peakpower_config1_window_period_2m_f() | + therm_peakpower_config1_ba_sum_shift_20_f() | + therm_peakpower_config1_window_en_enabled_f()); + + gk20a_writel(g, therm_peakpower_config2_r(0), + therm_peakpower_config2_ba_threshold_1h_val_f(1) | + therm_peakpower_config2_ba_threshold_1h_en_enabled_f()); + + gk20a_writel(g, therm_peakpower_config4_r(0), + therm_peakpower_config4_ba_threshold_1l_val_f(1) | + therm_peakpower_config4_ba_threshold_1l_en_enabled_f()); +*/ + return 0; +} + +int gk20a_init_therm_support(struct gk20a *g) +{ + u32 err; + + gk20a_dbg_fn(""); + + err = gk20a_init_therm_reset_enable_hw(g); + if (err) + return err; + + err = gk20a_init_therm_setup_sw(g); + if (err) + return err; + + err = gk20a_init_therm_setup_hw(g); + if (err) + return err; + + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h new file mode 100644 index 00000000..3f67ee12 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h @@ -0,0 +1,33 @@ +/* + * drivers/video/tegra/host/gk20a/therm_gk20a.h + * + * GK20A Therm + * + * Copyright (c) 2011 - 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef _NVHOST_THERM_GK20A_H_ +#define _NVHOST_THERM_GK20A_H_ + +/* priority for EXT_THERM_0 event set to highest */ +#define NV_THERM_EVT_EXT_THERM_0_INIT 0x3000100 +#define NV_THERM_EVT_EXT_THERM_1_INIT 0x2000200 +#define NV_THERM_EVT_EXT_THERM_2_INIT 0x1000300 +/* configures the thermal events that may cause clock slowdown */ +#define NV_THERM_USE_A_INIT 0x7 + +int gk20a_init_therm_support(struct gk20a *g); + +#endif /* _NVHOST_THERM_GK20A_H_ */ -- cgit v1.2.2