From b42fb7ba26b565f93118fbdd9e17b42ee6144c5e Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 14 Nov 2017 06:43:28 -0800 Subject: gpu: nvgpu: move vgpu code to linux Most of VGPU code is linux specific but lies in common code So until VGPU code is properly abstracted and made os-independent, move all of VGPU code to linux specific directory Handle corresponding Makefile changes Update all #includes to reflect new paths Add GPL license to newly added linux files Jira NVGPU-387 Change-Id: Ic133e4c80e570bcc273f0dacf45283fefd678923 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1599472 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c | 24 + .../nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c | 332 +++++++++++ .../nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h | 39 ++ .../nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | 624 +++++++++++++++++++++ .../nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c | 197 +++++++ .../nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h | 39 ++ 6 files changed, 1255 insertions(+) create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c create mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h (limited to 'drivers/gpu/nvgpu/common/linux/vgpu/gp10b') diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c new file mode 100644 index 00000000..cc006f76 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_fifo_gp10b.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "vgpu_fifo_gp10b.h" + +void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops) +{ + /* syncpoint protection not supported yet */ + gops->fifo.resetup_ramfc = NULL; + gops->fifo.reschedule_runlist = NULL; +} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c new file mode 100644 index 00000000..efc9c595 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include "common/linux/vgpu/vgpu.h" +#include "common/linux/vgpu/gm20b/vgpu_gr_gm20b.h" + +#include "vgpu_gr_gp10b.h" + +#include + +void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + gk20a_dbg_fn(""); + + if (!gr_ctx || !gr_ctx->mem.gpu_va) + return; + + msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; + msg.handle = vgpu_get_handle(g); + p->gr_ctx_handle = gr_ctx->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, gmmu_page_size_kernel); + + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); + + nvgpu_kfree(g, gr_ctx); +} + +int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, + struct vm_gk20a *vm, + u32 class, + u32 flags) +{ + struct gr_ctx_desc *gr_ctx; + u32 graphics_preempt_mode = 0; + u32 compute_preempt_mode = 0; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err; + + gk20a_dbg_fn(""); + + err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); + if (err) + return err; + + gr_ctx = *__gr_ctx; + + if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + + if (priv->constants.force_preempt_mode && !graphics_preempt_mode && + !compute_preempt_mode) { + graphics_preempt_mode = g->ops.gr.is_valid_gfx_class(g, class) ? + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP : 0; + compute_preempt_mode = + g->ops.gr.is_valid_compute_class(g, class) ? + NVGPU_PREEMPTION_MODE_COMPUTE_CTA : 0; + } + + if (graphics_preempt_mode || compute_preempt_mode) { + if (g->ops.gr.set_ctxsw_preemption_mode) { + err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, + class, graphics_preempt_mode, compute_preempt_mode); + if (err) { + nvgpu_err(g, + "set_ctxsw_preemption_mode failed"); + goto fail; + } + } else { + err = -ENOSYS; + goto fail; + } + } + + gk20a_dbg_fn("done"); + return err; + +fail: + vgpu_gr_gp10b_free_gr_ctx(g, vm, gr_ctx); + return err; +} + +int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p = + &msg.params.gr_bind_ctxsw_buffers; + int err = 0; + + if (g->ops.gr.is_valid_gfx_class(g, class) && + g->gr.t18x.ctx_vars.force_preemption_gfxp) + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + + if (g->ops.gr.is_valid_compute_class(g, class) && + g->gr.t18x.ctx_vars.force_preemption_cilp) + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + + /* check for invalid combinations */ + if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0)) + return -EINVAL; + + if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) && + (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) + return -EINVAL; + + /* set preemption modes */ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + { + u32 spill_size = + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * + gr_scc_pagepool_total_pages_byte_granularity_v(); + u32 betacb_size = g->gr.attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * + g->gr.max_tpc_count; + struct nvgpu_mem *desc; + + attrib_cb_size = ALIGN(attrib_cb_size, 128); + + gk20a_dbg_info("gfxp context preempt size=%d", + g->gr.t18x.ctx_vars.preempt_image_size); + gk20a_dbg_info("gfxp context spill size=%d", spill_size); + gk20a_dbg_info("gfxp context pagepool size=%d", pagepool_size); + gk20a_dbg_info("gfxp context attrib cb size=%d", + attrib_cb_size); + + err = gr_gp10b_alloc_buffer(vm, + g->gr.t18x.ctx_vars.preempt_image_size, + &gr_ctx->t18x.preempt_ctxsw_buffer); + if (err) { + err = -ENOMEM; + goto fail; + } + desc = &gr_ctx->t18x.preempt_ctxsw_buffer; + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->size; + + err = gr_gp10b_alloc_buffer(vm, + spill_size, + &gr_ctx->t18x.spill_ctxsw_buffer); + if (err) { + err = -ENOMEM; + goto fail; + } + desc = &gr_ctx->t18x.spill_ctxsw_buffer; + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->size; + + err = gr_gp10b_alloc_buffer(vm, + pagepool_size, + &gr_ctx->t18x.pagepool_ctxsw_buffer); + if (err) { + err = -ENOMEM; + goto fail; + } + desc = &gr_ctx->t18x.pagepool_ctxsw_buffer; + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = + desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = desc->size; + + err = gr_gp10b_alloc_buffer(vm, + attrib_cb_size, + &gr_ctx->t18x.betacb_ctxsw_buffer); + if (err) { + err = -ENOMEM; + goto fail; + } + desc = &gr_ctx->t18x.betacb_ctxsw_buffer; + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = + desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = desc->size; + + gr_ctx->graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP; + break; + } + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + gr_ctx->graphics_preempt_mode = graphics_preempt_mode; + break; + + default: + break; + } + + if (g->ops.gr.is_valid_compute_class(g, class)) { + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + gr_ctx->compute_preempt_mode = + NVGPU_PREEMPTION_MODE_COMPUTE_WFI; + p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_WFI; + break; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + gr_ctx->compute_preempt_mode = + NVGPU_PREEMPTION_MODE_COMPUTE_CTA; + p->mode = + TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA; + break; + case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: + gr_ctx->compute_preempt_mode = + NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + p->mode = + TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP; + break; + default: + break; + } + } + + if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode) { + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS; + msg.handle = vgpu_get_handle(g); + p->gr_ctx_handle = gr_ctx->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + err = -ENOMEM; + goto fail; + } + } + + return err; + +fail: + nvgpu_err(g, "%s failed %d", __func__, err); + return err; +} + +int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, + u32 graphics_preempt_mode, + u32 compute_preempt_mode) +{ + struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; + struct gk20a *g = ch->g; + struct tsg_gk20a *tsg; + struct vm_gk20a *vm; + u32 class; + int err; + + class = ch->obj_class; + if (!class) + return -EINVAL; + + /* skip setting anything if both modes are already set */ + if (graphics_preempt_mode && + (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) + graphics_preempt_mode = 0; + + if (compute_preempt_mode && + (compute_preempt_mode == gr_ctx->compute_preempt_mode)) + compute_preempt_mode = 0; + + if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) + return 0; + + if (gk20a_is_channel_marked_as_tsg(ch)) { + tsg = &g->fifo.tsg[ch->tsgid]; + vm = tsg->vm; + } else { + vm = ch->vm; + } + + if (g->ops.gr.set_ctxsw_preemption_mode) { + err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, + graphics_preempt_mode, + compute_preempt_mode); + if (err) { + nvgpu_err(g, "set_ctxsw_preemption_mode failed"); + return err; + } + } else { + err = -ENOSYS; + } + + return err; +} + +int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err; + + gk20a_dbg_fn(""); + + err = vgpu_gr_init_ctx_state(g); + if (err) + return err; + + g->gr.t18x.ctx_vars.preempt_image_size = + priv->constants.preempt_ctx_size; + if (!g->gr.t18x.ctx_vars.preempt_image_size) + return -EINVAL; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h new file mode 100644 index 00000000..a11dab7d --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __VGPU_GR_GP10B_H__ +#define __VGPU_GR_GP10B_H__ + +#include "gk20a/gk20a.h" + +void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx); +int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, + struct vm_gk20a *vm, + u32 class, + u32 flags); +int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); +int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); +int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c new file mode 100644 index 00000000..da4ca10c --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "common/linux/vgpu/vgpu.h" +#include "common/linux/vgpu/fifo_vgpu.h" +#include "common/linux/vgpu/gr_vgpu.h" +#include "common/linux/vgpu/ltc_vgpu.h" +#include "common/linux/vgpu/mm_vgpu.h" +#include "common/linux/vgpu/dbg_vgpu.h" +#include "common/linux/vgpu/fecs_trace_vgpu.h" +#include "common/linux/vgpu/css_vgpu.h" +#include "gp10b/gp10b.h" +#include "gp10b/hal_gp10b.h" +#include "common/linux/vgpu/gm20b/vgpu_gr_gm20b.h" +#include "vgpu_gr_gp10b.h" +#include "vgpu_mm_gp10b.h" + +#include "gk20a/bus_gk20a.h" +#include "gk20a/pramin_gk20a.h" +#include "gk20a/flcn_gk20a.h" +#include "gk20a/mc_gk20a.h" +#include "gk20a/fb_gk20a.h" + +#include "gp10b/mc_gp10b.h" +#include "gp10b/ltc_gp10b.h" +#include "gp10b/mm_gp10b.h" +#include "gp10b/ce_gp10b.h" +#include "gp10b/fb_gp10b.h" +#include "gp10b/pmu_gp10b.h" +#include "gp10b/gr_ctx_gp10b.h" +#include "gp10b/fifo_gp10b.h" +#include "gp10b/gp10b_gating_reglist.h" +#include "gp10b/regops_gp10b.h" +#include "gp10b/therm_gp10b.h" +#include "gp10b/priv_ring_gp10b.h" + +#include "gm20b/ltc_gm20b.h" +#include "gm20b/gr_gm20b.h" +#include "gm20b/fifo_gm20b.h" +#include "gm20b/acr_gm20b.h" +#include "gm20b/pmu_gm20b.h" +#include "gm20b/fb_gm20b.h" +#include "gm20b/mm_gm20b.h" + +#include + +#include +#include +#include +#include +#include +#include + +static const struct gpu_ops vgpu_gp10b_ops = { + .ltc = { + .determine_L2_size_bytes = vgpu_determine_L2_size_bytes, + .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, + .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, + .init_cbc = gm20b_ltc_init_cbc, + .init_fs_state = vgpu_ltc_init_fs_state, + .init_comptags = vgpu_ltc_init_comptags, + .cbc_ctrl = NULL, + .isr = gp10b_ltc_isr, + .cbc_fix_config = gm20b_ltc_cbc_fix_config, + .flush = gm20b_flush_ltc, + .set_enabled = gp10b_ltc_set_enabled, + }, + .ce2 = { + .isr_stall = gp10b_ce_isr, + .isr_nonstall = gp10b_ce_nonstall_isr, + .get_num_pce = vgpu_ce_get_num_pce, + }, + .gr = { + .get_patch_slots = gr_gk20a_get_patch_slots, + .init_gpc_mmu = gr_gm20b_init_gpc_mmu, + .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, + .cb_size_default = gr_gp10b_cb_size_default, + .calc_global_ctx_buffer_size = + gr_gp10b_calc_global_ctx_buffer_size, + .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, + .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, + .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, + .commit_global_pagepool = gr_gp10b_commit_global_pagepool, + .handle_sw_method = gr_gp10b_handle_sw_method, + .set_alpha_circular_buffer_size = + gr_gp10b_set_alpha_circular_buffer_size, + .set_circular_buffer_size = gr_gp10b_set_circular_buffer_size, + .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions, + .is_valid_class = gr_gp10b_is_valid_class, + .is_valid_gfx_class = gr_gp10b_is_valid_gfx_class, + .is_valid_compute_class = gr_gp10b_is_valid_compute_class, + .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, + .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, + .init_fs_state = vgpu_gm20b_init_fs_state, + .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, + .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, + .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, + .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, + .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, + .free_channel_ctx = vgpu_gr_free_channel_ctx, + .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, + .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, + .get_zcull_info = vgpu_gr_get_zcull_info, + .is_tpc_addr = gr_gm20b_is_tpc_addr, + .get_tpc_num = gr_gm20b_get_tpc_num, + .detect_sm_arch = vgpu_gr_detect_sm_arch, + .add_zbc_color = gr_gp10b_add_zbc_color, + .add_zbc_depth = gr_gp10b_add_zbc_depth, + .zbc_set_table = vgpu_gr_add_zbc, + .zbc_query_table = vgpu_gr_query_zbc, + .pmu_save_zbc = gk20a_pmu_save_zbc, + .add_zbc = gr_gk20a_add_zbc, + .pagepool_default_size = gr_gp10b_pagepool_default_size, + .init_ctx_state = vgpu_gr_gp10b_init_ctx_state, + .alloc_gr_ctx = vgpu_gr_gp10b_alloc_gr_ctx, + .free_gr_ctx = vgpu_gr_gp10b_free_gr_ctx, + .update_ctxsw_preemption_mode = + gr_gp10b_update_ctxsw_preemption_mode, + .dump_gr_regs = NULL, + .update_pc_sampling = gr_gm20b_update_pc_sampling, + .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask, + .get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp, + .get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc, + .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask, + .get_max_fbps_count = vgpu_gr_get_max_fbps_count, + .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, + .wait_empty = gr_gp10b_wait_empty, + .init_cyclestats = vgpu_gr_gm20b_init_cyclestats, + .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode, + .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, + .bpt_reg_info = gr_gm20b_bpt_reg_info, + .get_access_map = gr_gp10b_get_access_map, + .handle_fecs_error = gr_gp10b_handle_fecs_error, + .handle_sm_exception = gr_gp10b_handle_sm_exception, + .handle_tex_exception = gr_gp10b_handle_tex_exception, + .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions, + .enable_exceptions = gk20a_gr_enable_exceptions, + .get_lrf_tex_ltc_dram_override = get_ecc_override_val, + .update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode, + .update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode, + .record_sm_error_state = gm20b_gr_record_sm_error_state, + .update_sm_error_state = gm20b_gr_update_sm_error_state, + .clear_sm_error_state = vgpu_gr_clear_sm_error_state, + .suspend_contexts = vgpu_gr_suspend_contexts, + .resume_contexts = vgpu_gr_resume_contexts, + .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, + .init_sm_id_table = gr_gk20a_init_sm_id_table, + .load_smid_config = gr_gp10b_load_smid_config, + .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, + .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, + .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, + .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, + .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, + .setup_rop_mapping = gr_gk20a_setup_rop_mapping, + .program_zcull_mapping = gr_gk20a_program_zcull_mapping, + .commit_global_timeslice = gr_gk20a_commit_global_timeslice, + .commit_inst = vgpu_gr_commit_inst, + .write_zcull_ptr = gr_gk20a_write_zcull_ptr, + .write_pm_ptr = gr_gk20a_write_pm_ptr, + .init_elcg_mode = gr_gk20a_init_elcg_mode, + .load_tpc_mask = gr_gm20b_load_tpc_mask, + .inval_icache = gr_gk20a_inval_icache, + .trigger_suspend = gr_gk20a_trigger_suspend, + .wait_for_pause = gr_gk20a_wait_for_pause, + .resume_from_pause = gr_gk20a_resume_from_pause, + .clear_sm_errors = gr_gk20a_clear_sm_errors, + .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions, + .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel, + .sm_debugger_attached = gk20a_gr_sm_debugger_attached, + .suspend_single_sm = gk20a_gr_suspend_single_sm, + .suspend_all_sms = gk20a_gr_suspend_all_sms, + .resume_single_sm = gk20a_gr_resume_single_sm, + .resume_all_sms = gk20a_gr_resume_all_sms, + .get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr, + .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr, + .get_sm_no_lock_down_hww_global_esr_mask = + gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask, + .lock_down_sm = gk20a_gr_lock_down_sm, + .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down, + .clear_sm_hww = gm20b_gr_clear_sm_hww, + .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, + .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, + .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, + .set_boosted_ctx = NULL, + .set_preemption_mode = vgpu_gr_gp10b_set_preemption_mode, + .set_czf_bypass = gr_gp10b_set_czf_bypass, + .init_czf_bypass = gr_gp10b_init_czf_bypass, + .pre_process_sm_exception = gr_gp10b_pre_process_sm_exception, + .set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va, + .init_preemption_state = gr_gp10b_init_preemption_state, + .update_boosted_ctx = NULL, + .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .create_gr_sysfs = gr_gp10b_create_sysfs, + .set_ctxsw_preemption_mode = + vgpu_gr_gp10b_set_ctxsw_preemption_mode, + .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, + }, + .fb = { + .reset = fb_gk20a_reset, + .init_hw = gk20a_fb_init_hw, + .init_fs_state = fb_gm20b_init_fs_state, + .set_mmu_page_size = gm20b_fb_set_mmu_page_size, + .set_use_full_comp_tag_line = + gm20b_fb_set_use_full_comp_tag_line, + .compression_page_size = gp10b_fb_compression_page_size, + .compressible_page_size = gp10b_fb_compressible_page_size, + .vpr_info_fetch = gm20b_fb_vpr_info_fetch, + .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info, + .read_wpr_info = gm20b_fb_read_wpr_info, + .is_debug_mode_enabled = NULL, + .set_debug_mode = vgpu_mm_mmu_set_debug_mode, + .tlb_invalidate = vgpu_mm_tlb_invalidate, + }, + .clock_gating = { + .slcg_bus_load_gating_prod = + gp10b_slcg_bus_load_gating_prod, + .slcg_ce2_load_gating_prod = + gp10b_slcg_ce2_load_gating_prod, + .slcg_chiplet_load_gating_prod = + gp10b_slcg_chiplet_load_gating_prod, + .slcg_ctxsw_firmware_load_gating_prod = + gp10b_slcg_ctxsw_firmware_load_gating_prod, + .slcg_fb_load_gating_prod = + gp10b_slcg_fb_load_gating_prod, + .slcg_fifo_load_gating_prod = + gp10b_slcg_fifo_load_gating_prod, + .slcg_gr_load_gating_prod = + gr_gp10b_slcg_gr_load_gating_prod, + .slcg_ltc_load_gating_prod = + ltc_gp10b_slcg_ltc_load_gating_prod, + .slcg_perf_load_gating_prod = + gp10b_slcg_perf_load_gating_prod, + .slcg_priring_load_gating_prod = + gp10b_slcg_priring_load_gating_prod, + .slcg_pmu_load_gating_prod = + gp10b_slcg_pmu_load_gating_prod, + .slcg_therm_load_gating_prod = + gp10b_slcg_therm_load_gating_prod, + .slcg_xbar_load_gating_prod = + gp10b_slcg_xbar_load_gating_prod, + .blcg_bus_load_gating_prod = + gp10b_blcg_bus_load_gating_prod, + .blcg_ce_load_gating_prod = + gp10b_blcg_ce_load_gating_prod, + .blcg_ctxsw_firmware_load_gating_prod = + gp10b_blcg_ctxsw_firmware_load_gating_prod, + .blcg_fb_load_gating_prod = + gp10b_blcg_fb_load_gating_prod, + .blcg_fifo_load_gating_prod = + gp10b_blcg_fifo_load_gating_prod, + .blcg_gr_load_gating_prod = + gp10b_blcg_gr_load_gating_prod, + .blcg_ltc_load_gating_prod = + gp10b_blcg_ltc_load_gating_prod, + .blcg_pwr_csb_load_gating_prod = + gp10b_blcg_pwr_csb_load_gating_prod, + .blcg_pmu_load_gating_prod = + gp10b_blcg_pmu_load_gating_prod, + .blcg_xbar_load_gating_prod = + gp10b_blcg_xbar_load_gating_prod, + .pg_gr_load_gating_prod = + gr_gp10b_pg_gr_load_gating_prod, + }, + .fifo = { + .init_fifo_setup_hw = vgpu_init_fifo_setup_hw, + .bind_channel = vgpu_channel_bind, + .unbind_channel = vgpu_channel_unbind, + .disable_channel = vgpu_channel_disable, + .enable_channel = vgpu_channel_enable, + .alloc_inst = vgpu_channel_alloc_inst, + .free_inst = vgpu_channel_free_inst, + .setup_ramfc = vgpu_channel_setup_ramfc, + .channel_set_timeslice = vgpu_channel_set_timeslice, + .default_timeslice_us = vgpu_fifo_default_timeslice_us, + .setup_userd = gk20a_fifo_setup_userd, + .userd_gp_get = gk20a_fifo_userd_gp_get, + .userd_gp_put = gk20a_fifo_userd_gp_put, + .userd_pb_get = gk20a_fifo_userd_pb_get, + .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val, + .preempt_channel = vgpu_fifo_preempt_channel, + .preempt_tsg = vgpu_fifo_preempt_tsg, + .enable_tsg = vgpu_enable_tsg, + .disable_tsg = gk20a_disable_tsg, + .tsg_verify_channel_status = NULL, + .tsg_verify_status_ctx_reload = NULL, + .reschedule_runlist = NULL, + .update_runlist = vgpu_fifo_update_runlist, + .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault, + .get_mmu_fault_info = gp10b_fifo_get_mmu_fault_info, + .wait_engine_idle = vgpu_fifo_wait_engine_idle, + .get_num_fifos = gm20b_fifo_get_num_fifos, + .get_pbdma_signature = gp10b_fifo_get_pbdma_signature, + .set_runlist_interleave = vgpu_fifo_set_runlist_interleave, + .tsg_set_timeslice = vgpu_tsg_set_timeslice, + .tsg_open = vgpu_tsg_open, + .force_reset_ch = vgpu_fifo_force_reset_ch, + .engine_enum_from_type = gp10b_fifo_engine_enum_from_type, + .device_info_data_parse = gp10b_device_info_data_parse, + .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v, + .init_engine_info = vgpu_fifo_init_engine_info, + .runlist_entry_size = ram_rl_entry_size_v, + .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry, + .get_ch_runlist_entry = gk20a_get_ch_runlist_entry, + .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc, + .dump_pbdma_status = gk20a_dump_pbdma_status, + .dump_eng_status = gk20a_dump_eng_status, + .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc, + .intr_0_error_mask = gk20a_fifo_intr_0_error_mask, + .is_preempt_pending = gk20a_fifo_is_preempt_pending, + .init_pbdma_intr_descs = gp10b_fifo_init_pbdma_intr_descs, + .reset_enable_hw = gk20a_init_fifo_reset_enable_hw, + .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg, + .handle_sched_error = gk20a_fifo_handle_sched_error, + .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0, + .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1, + .tsg_bind_channel = vgpu_tsg_bind_channel, + .tsg_unbind_channel = vgpu_tsg_unbind_channel, +#ifdef CONFIG_TEGRA_GK20A_NVHOST + .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf, + .free_syncpt_buf = gk20a_fifo_free_syncpt_buf, + .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd, + .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size, + .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd, + .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size, +#endif + .resetup_ramfc = NULL, + .device_info_fault_id = top_device_info_data_fault_id_enum_v, + }, + .gr_ctx = { + .get_netlist_name = gr_gp10b_get_netlist_name, + .is_fw_defined = gr_gp10b_is_firmware_defined, + }, +#ifdef CONFIG_GK20A_CTXSW_TRACE + .fecs_trace = { + .alloc_user_buffer = vgpu_alloc_user_buffer, + .free_user_buffer = vgpu_free_user_buffer, + .mmap_user_buffer = vgpu_mmap_user_buffer, + .init = vgpu_fecs_trace_init, + .deinit = vgpu_fecs_trace_deinit, + .enable = vgpu_fecs_trace_enable, + .disable = vgpu_fecs_trace_disable, + .is_enabled = vgpu_fecs_trace_is_enabled, + .reset = NULL, + .flush = NULL, + .poll = vgpu_fecs_trace_poll, + .bind_channel = NULL, + .unbind_channel = NULL, + .max_entries = vgpu_fecs_trace_max_entries, + .set_filter = vgpu_fecs_trace_set_filter, + }, +#endif /* CONFIG_GK20A_CTXSW_TRACE */ + .mm = { + /* FIXME: add support for sparse mappings */ + .support_sparse = NULL, + .gmmu_map = vgpu_gp10b_locked_gmmu_map, + .gmmu_unmap = vgpu_locked_gmmu_unmap, + .vm_bind_channel = vgpu_vm_bind_channel, + .fb_flush = vgpu_mm_fb_flush, + .l2_invalidate = vgpu_mm_l2_invalidate, + .l2_flush = vgpu_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + .set_big_page_size = gm20b_mm_set_big_page_size, + .get_big_page_sizes = gm20b_mm_get_big_page_sizes, + .get_default_big_page_size = gp10b_mm_get_default_big_page_size, + .gpu_phys_addr = gm20b_gpu_phys_addr, + .get_iommu_bit = gk20a_mm_get_iommu_bit, + .get_mmu_levels = gp10b_mm_get_mmu_levels, + .init_pdb = gp10b_mm_init_pdb, + .init_mm_setup_hw = vgpu_gp10b_init_mm_setup_hw, + .is_bar1_supported = gm20b_mm_is_bar1_supported, + .init_inst_block = gk20a_init_inst_block, + .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, + .init_bar2_vm = gb10b_init_bar2_vm, + .init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup, + .remove_bar2_vm = gp10b_remove_bar2_vm, + .get_kind_invalid = gm20b_get_kind_invalid, + .get_kind_pitch = gm20b_get_kind_pitch, + }, + .pramin = { + .enter = gk20a_pramin_enter, + .exit = gk20a_pramin_exit, + .data032_r = pram_data032_r, + }, + .therm = { + .init_therm_setup_hw = gp10b_init_therm_setup_hw, + .elcg_init_idle_filters = gp10b_elcg_init_idle_filters, + }, + .pmu = { + .pmu_setup_elpg = gp10b_pmu_setup_elpg, + .pmu_get_queue_head = pwr_pmu_queue_head_r, + .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v, + .pmu_get_queue_tail = pwr_pmu_queue_tail_r, + .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v, + .pmu_queue_head = gk20a_pmu_queue_head, + .pmu_queue_tail = gk20a_pmu_queue_tail, + .pmu_msgq_tail = gk20a_pmu_msgq_tail, + .pmu_mutex_size = pwr_pmu_mutex__size_1_v, + .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, + .pmu_mutex_release = gk20a_pmu_mutex_release, + .write_dmatrfbase = gp10b_write_dmatrfbase, + .pmu_elpg_statistics = gp10b_pmu_elpg_statistics, + .pmu_pg_init_param = gp10b_pg_gr_init, + .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, + .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, + .dump_secure_fuses = pmu_dump_security_fuses_gp10b, + .reset_engine = gk20a_pmu_engine_reset, + .is_engine_in_reset = gk20a_pmu_is_engine_in_reset, + }, + .regops = { + .get_global_whitelist_ranges = + gp10b_get_global_whitelist_ranges, + .get_global_whitelist_ranges_count = + gp10b_get_global_whitelist_ranges_count, + .get_context_whitelist_ranges = + gp10b_get_context_whitelist_ranges, + .get_context_whitelist_ranges_count = + gp10b_get_context_whitelist_ranges_count, + .get_runcontrol_whitelist = gp10b_get_runcontrol_whitelist, + .get_runcontrol_whitelist_count = + gp10b_get_runcontrol_whitelist_count, + .get_runcontrol_whitelist_ranges = + gp10b_get_runcontrol_whitelist_ranges, + .get_runcontrol_whitelist_ranges_count = + gp10b_get_runcontrol_whitelist_ranges_count, + .get_qctl_whitelist = gp10b_get_qctl_whitelist, + .get_qctl_whitelist_count = gp10b_get_qctl_whitelist_count, + .get_qctl_whitelist_ranges = gp10b_get_qctl_whitelist_ranges, + .get_qctl_whitelist_ranges_count = + gp10b_get_qctl_whitelist_ranges_count, + .apply_smpc_war = gp10b_apply_smpc_war, + }, + .mc = { + .intr_enable = mc_gp10b_intr_enable, + .intr_unit_config = mc_gp10b_intr_unit_config, + .isr_stall = mc_gp10b_isr_stall, + .intr_stall = mc_gp10b_intr_stall, + .intr_stall_pause = mc_gp10b_intr_stall_pause, + .intr_stall_resume = mc_gp10b_intr_stall_resume, + .intr_nonstall = mc_gp10b_intr_nonstall, + .intr_nonstall_pause = mc_gp10b_intr_nonstall_pause, + .intr_nonstall_resume = mc_gp10b_intr_nonstall_resume, + .enable = gk20a_mc_enable, + .disable = gk20a_mc_disable, + .reset = gk20a_mc_reset, + .boot_0 = gk20a_mc_boot_0, + .is_intr1_pending = mc_gp10b_is_intr1_pending, + }, + .debug = { + .show_dump = NULL, + }, + .dbg_session_ops = { + .exec_reg_ops = vgpu_exec_regops, + .dbg_set_powergate = vgpu_dbg_set_powergate, + .check_and_set_global_reservation = + vgpu_check_and_set_global_reservation, + .check_and_set_context_reservation = + vgpu_check_and_set_context_reservation, + .release_profiler_reservation = + vgpu_release_profiler_reservation, + .perfbuffer_enable = vgpu_perfbuffer_enable, + .perfbuffer_disable = vgpu_perfbuffer_disable, + }, + .bus = { + .init_hw = gk20a_bus_init_hw, + .isr = gk20a_bus_isr, + .read_ptimer = vgpu_read_ptimer, + .get_timestamps_zipper = vgpu_get_timestamps_zipper, + .bar1_bind = gk20a_bus_bar1_bind, + }, +#if defined(CONFIG_GK20A_CYCLE_STATS) + .css = { + .enable_snapshot = vgpu_css_enable_snapshot_buffer, + .disable_snapshot = vgpu_css_release_snapshot_buffer, + .check_data_available = vgpu_css_flush_snapshots, + .detach_snapshot = vgpu_css_detach, + .set_handled_snapshots = NULL, + .allocate_perfmon_ids = NULL, + .release_perfmon_ids = NULL, + }, +#endif + .falcon = { + .falcon_hal_sw_init = gk20a_falcon_hal_sw_init, + }, + .priv_ring = { + .isr = gp10b_priv_ring_isr, + }, + .chip_init_gpu_characteristics = vgpu_init_gpu_characteristics, + .get_litter_value = gp10b_get_litter_value, +}; + +int vgpu_gp10b_init_hal(struct gk20a *g) +{ + struct gpu_ops *gops = &g->ops; + u32 val; + + gops->ltc = vgpu_gp10b_ops.ltc; + gops->ce2 = vgpu_gp10b_ops.ce2; + gops->gr = vgpu_gp10b_ops.gr; + gops->fb = vgpu_gp10b_ops.fb; + gops->clock_gating = vgpu_gp10b_ops.clock_gating; + gops->fifo = vgpu_gp10b_ops.fifo; + gops->gr_ctx = vgpu_gp10b_ops.gr_ctx; + gops->fecs_trace = vgpu_gp10b_ops.fecs_trace; + gops->mm = vgpu_gp10b_ops.mm; + gops->pramin = vgpu_gp10b_ops.pramin; + gops->therm = vgpu_gp10b_ops.therm; + gops->pmu = vgpu_gp10b_ops.pmu; + gops->regops = vgpu_gp10b_ops.regops; + gops->mc = vgpu_gp10b_ops.mc; + gops->debug = vgpu_gp10b_ops.debug; + gops->dbg_session_ops = vgpu_gp10b_ops.dbg_session_ops; + gops->bus = vgpu_gp10b_ops.bus; +#if defined(CONFIG_GK20A_CYCLE_STATS) + gops->css = vgpu_gp10b_ops.css; +#endif + gops->falcon = vgpu_gp10b_ops.falcon; + + gops->priv_ring = vgpu_gp10b_ops.priv_ring; + + /* Lone Functions */ + gops->chip_init_gpu_characteristics = + vgpu_gp10b_ops.chip_init_gpu_characteristics; + gops->get_litter_value = vgpu_gp10b_ops.get_litter_value; + + __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); + __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); + +#ifdef CONFIG_TEGRA_ACR + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false); + } else if (g->is_virtual) { + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true); + } else { + val = gk20a_readl(g, fuse_opt_priv_sec_en_r()); + if (val) { + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true); + } else { + gk20a_dbg_info("priv security is disabled in HW"); + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false); + } + } +#else + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + gk20a_dbg_info("running simulator with PRIV security disabled"); + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false); + } else { + val = gk20a_readl(g, fuse_opt_priv_sec_en_r()); + if (val) { + gk20a_dbg_info("priv security is not supported but enabled"); + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true); + return -EPERM; + } else { + __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); + __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false); + } + } +#endif + + /* priv security dependent ops */ + if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + /* Add in ops from gm20b acr */ + gops->pmu.is_pmu_supported = gm20b_is_pmu_supported, + gops->pmu.prepare_ucode = prepare_ucode_blob, + gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn, + gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap, + gops->pmu.is_priv_load = gm20b_is_priv_load, + gops->pmu.get_wpr = gm20b_wpr_info, + gops->pmu.alloc_blob_space = gm20b_alloc_blob_space, + gops->pmu.pmu_populate_loader_cfg = + gm20b_pmu_populate_loader_cfg, + gops->pmu.flcn_populate_bl_dmem_desc = + gm20b_flcn_populate_bl_dmem_desc, + gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt, + gops->pmu.falcon_clear_halt_interrupt_status = + clear_halt_interrupt_status, + gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1, + + gops->pmu.init_wpr_region = gm20b_pmu_init_acr; + gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode; + gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap; + gops->pmu.is_priv_load = gp10b_is_priv_load; + + gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; + } else { + /* Inherit from gk20a */ + gops->pmu.is_pmu_supported = gk20a_is_pmu_supported, + gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob, + gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1, + gops->pmu.pmu_nsbootstrap = pmu_bootstrap, + + gops->pmu.load_lsfalcon_ucode = NULL; + gops->pmu.init_wpr_region = NULL; + gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1; + + gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; + } + + __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); + g->pmu_lsf_pmu_wpr_init_done = 0; + g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; + + g->name = "gp10b"; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c new file mode 100644 index 00000000..9eb140a3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c @@ -0,0 +1,197 @@ +/* + * Virtualized GPU Memory Management + * + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "common/linux/vgpu/vgpu.h" +#include "vgpu_mm_gp10b.h" +#include "gk20a/mm_gk20a.h" + +#include + +int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g) +{ + g->mm.bypass_smmu = true; + g->mm.disable_bigpage = true; + return 0; +} + +static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc, + u64 addr, u64 size, size_t *oob_size) +{ + if (*oob_size < sizeof(*mem_desc)) + return -ENOMEM; + + mem_desc->addr = addr; + mem_desc->length = size; + *oob_size -= sizeof(*mem_desc); + return 0; +} + +u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture) +{ + int err = 0; + struct gk20a *g = gk20a_from_vm(vm); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; + struct tegra_vgpu_mem_desc *mem_desc; + u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; + u64 buffer_size = PAGE_ALIGN(size); + u64 space_to_skip = buffer_offset; + u32 mem_desc_count = 0, i; + void *handle = NULL; + size_t oob_size; + u8 prot; + void *sgl; + + gk20a_dbg_fn(""); + + /* FIXME: add support for sparse mappings */ + + if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) + return 0; + + if (space_to_skip & (page_size - 1)) + return 0; + + memset(&msg, 0, sizeof(msg)); + + /* Allocate (or validate when map_offset != 0) the virtual address. */ + if (!map_offset) { + map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx); + if (!map_offset) { + nvgpu_err(g, "failed to allocate va space"); + err = -ENOMEM; + goto fail; + } + } + + handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT, + tegra_gr_comm_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&mem_desc, &oob_size); + if (!handle) { + err = -EINVAL; + goto fail; + } + sgl = sgt->sgl; + while (sgl) { + u64 phys_addr; + u64 chunk_length; + + /* + * Cut out sgl ents for space_to_skip. + */ + if (space_to_skip && + space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) { + space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); + sgl = nvgpu_sgt_get_next(sgt, sgl); + continue; + } + + phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip; + chunk_length = min(size, + nvgpu_sgt_get_length(sgt, sgl) - space_to_skip); + + if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, + chunk_length, &oob_size)) { + err = -ENOMEM; + goto fail; + } + + space_to_skip = 0; + size -= chunk_length; + sgl = nvgpu_sgt_get_next(sgt, sgl); + + if (size == 0) + break; + } + + if (rw_flag == gk20a_mem_flag_read_only) + prot = TEGRA_VGPU_MAP_PROT_READ_ONLY; + else if (rw_flag == gk20a_mem_flag_write_only) + prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY; + else + prot = TEGRA_VGPU_MAP_PROT_NONE; + + if (pgsz_idx == gmmu_page_size_kernel) { + if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) { + pgsz_idx = gmmu_page_size_small; + } else if (page_size == + vm->gmmu_page_sizes[gmmu_page_size_big]) { + pgsz_idx = gmmu_page_size_big; + } else { + nvgpu_err(g, "invalid kernel page size %d", + page_size); + goto fail; + } + } + + msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX; + msg.handle = vgpu_get_handle(g); + p->handle = vm->handle; + p->gpu_va = map_offset; + p->size = buffer_size; + p->mem_desc_count = mem_desc_count; + p->pgsz_idx = pgsz_idx; + p->iova = 0; + p->kind = kind_v; + p->cacheable = (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) ? 1 : 0; + p->prot = prot; + p->ctag_offset = ctag_offset; + p->clear_ctags = clear_ctags; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) + goto fail; + + /* TLB invalidate handled on server side */ + + tegra_gr_comm_oob_put_ptr(handle); + return map_offset; +fail: + if (handle) + tegra_gr_comm_oob_put_ptr(handle); + nvgpu_err(g, "Failed: err=%d, msg.ret=%d", err, msg.ret); + nvgpu_err(g, + " Map: %-5s GPU virt %#-12llx +%#-9llx " + "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " + "kind=%#02x APT=%-6s", + vm->name, map_offset, buffer_size, buffer_offset, + vm->gmmu_page_sizes[pgsz_idx] >> 10, + nvgpu_gmmu_perm_str(rw_flag), + kind_v, "SYSMEM"); + for (i = 0; i < mem_desc_count; i++) + nvgpu_err(g, " > 0x%010llx + 0x%llx", + mem_desc[i].addr, mem_desc[i].length); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h new file mode 100644 index 00000000..0a477dd0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __VGPU_MM_GP10B_H__ +#define __VGPU_MM_GP10B_H__ + +#include "gk20a/gk20a.h" + +u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture); +int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g); + +#endif -- cgit v1.2.2