From 2f6698b863c9cc1db6455637b7c72e812b470b93 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 15 Dec 2017 09:04:15 -0800 Subject: gpu: nvgpu: Make graphics context property of TSG Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu Tested-by: Seshendra Gadagottu Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/debug_fifo.c | 4 +- drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 8 +- drivers/gpu/nvgpu/common/linux/sched.c | 13 +- .../nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c | 25 +- .../nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h | 4 +- .../nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | 1 - drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | 343 ++++++----- drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h | 1 + .../nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | 1 - .../common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c | 4 +- drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h | 8 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 23 +- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 12 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 20 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 641 ++++++++++----------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 40 +- drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 5 +- drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 6 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 50 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 10 +- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 - drivers/gpu/nvgpu/gp106/gr_gp106.c | 2 +- drivers/gpu/nvgpu/gp106/gr_gp106.h | 2 +- drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 - drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 202 ++++--- drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 19 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 - drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 - drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 66 ++- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 9 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 - drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 42 +- 33 files changed, 833 insertions(+), 739 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c index ad157ee7..aeab0c92 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c +++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c @@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show( tsg->timeslice_us, ch->timeout_ms_max, tsg->interleave_level, - ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, - ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); + tsg->gr_ctx.graphics_preempt_mode, + tsg->gr_ctx.compute_preempt_mode); gk20a_channel_put(ch); } return 0; diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 94501a89..e8f4c14b 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param( tsg_gk20a_from_ch(ch)->timeslice_us, ch->timeout_ms_max, gk20a_fifo_interleave_level_name(tsg->interleave_level), - gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ? - ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0), - gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? - ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)); + gr_gk20a_graphics_preempt_mode_name( + tsg->gr_ctx.graphics_preempt_mode), + gr_gk20a_compute_preempt_mode_name( + tsg->gr_ctx.compute_preempt_mode)); } /* diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c index fc3f6ed8..e6211790 100644 --- a/drivers/gpu/nvgpu/common/linux/sched.c +++ b/drivers/gpu/nvgpu/common/linux/sched.c @@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, arg->runlist_interleave = tsg->interleave_level; arg->timeslice = tsg->timeslice_us; - if (tsg->tsg_gr_ctx) { - arg->graphics_preempt_mode = - tsg->tsg_gr_ctx->graphics_preempt_mode; - arg->compute_preempt_mode = - tsg->tsg_gr_ctx->compute_preempt_mode; - } else { - arg->graphics_preempt_mode = 0; - arg->compute_preempt_mode = 0; - } + arg->graphics_preempt_mode = + tsg->gr_ctx.graphics_preempt_mode; + arg->compute_preempt_mode = + tsg->gr_ctx.compute_preempt_mode; nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c index ed61f16b..9adf20d1 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c @@ -27,12 +27,11 @@ #include int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) { - struct gr_ctx_desc *gr_ctx; u32 graphics_preempt_mode = 0; u32 compute_preempt_mode = 0; struct vgpu_priv_data *priv = vgpu_get_priv_data(g); @@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, gk20a_dbg_fn(""); - err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); + err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags); if (err) return err; - gr_ctx = *__gr_ctx; - if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) @@ -84,7 +81,7 @@ fail: } int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode) @@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode) { - struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; + struct nvgpu_gr_ctx *gr_ctx; struct gk20a *g = ch->g; struct tsg_gk20a *tsg; struct vm_gk20a *vm; @@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, if (!class) return -EINVAL; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + vm = tsg->vm; + gr_ctx = &tsg->gr_ctx; + /* skip setting anything if both modes are already set */ if (graphics_preempt_mode && (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) @@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) return 0; - if (gk20a_is_channel_marked_as_tsg(ch)) { - tsg = &g->fifo.tsg[ch->tsgid]; - vm = tsg->vm; - } else { - vm = ch->vm; - } - if (g->ops.gr.set_ctxsw_preemption_mode) { err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, graphics_preempt_mode, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h index 31b88d19..559bd227 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h @@ -20,12 +20,12 @@ #include "gk20a/gk20a.h" int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags); int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c index e8cb96b4..d5fd5102 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c @@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, - .free_channel_ctx = vgpu_gr_free_channel_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .get_zcull_info = vgpu_gr_get_zcull_info, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index e8790587..8f1c5d78 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c @@ -20,14 +20,18 @@ #include #include +#include #include #include #include "vgpu.h" #include "gr_vgpu.h" #include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/tsg_gk20a.h" #include +#include void vgpu_gr_detect_sm_arch(struct gk20a *g) { @@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; struct vm_gk20a *ch_vm = c->vm; - u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; - u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + struct tsg_gk20a *tsg; + u64 *g_bfr_va; + u64 *g_bfr_size; struct gr_gk20a *gr = &g->gr; u64 gpu_va; u32 i; @@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, gk20a_dbg_fn(""); - /* FIXME: add VPR support */ + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; + g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; /* Circular Buffer */ gpu_va = __nvgpu_vm_alloc_va(ch_vm, @@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, if (err || msg.ret) goto clean_up; - c->ch_ctx.global_ctx_buffer_mapped = true; + tsg->gr_ctx.global_ctx_buffer_mapped = true; return 0; clean_up: @@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, return -ENOMEM; } -static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) +static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg) { - struct vm_gk20a *ch_vm = c->vm; - u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; - u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; + struct vm_gk20a *ch_vm = tsg->vm; + u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; + u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; u32 i; gk20a_dbg_fn(""); - if (c->ch_ctx.global_ctx_buffer_mapped) { - struct tegra_vgpu_cmd_msg msg; - struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; - int err; + if (tsg->gr_ctx.global_ctx_buffer_mapped) { + /* server will unmap on channel close */ - msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; - msg.handle = vgpu_get_handle(c->g); - p->handle = c->virt_ctx; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - WARN_ON(err || msg.ret); - } - - for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { - if (g_bfr_va[i]) { - __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], - gmmu_page_size_kernel); - g_bfr_va[i] = 0; - g_bfr_size[i] = 0; + for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { + if (g_bfr_va[i]) { + __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], + gmmu_page_size_kernel); + g_bfr_va[i] = 0; + g_bfr_size[i] = 0; + } } + + tsg->gr_ctx.global_ctx_buffer_mapped = false; } - c->ch_ctx.global_ctx_buffer_mapped = false; } int vgpu_gr_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) @@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; struct gr_gk20a *gr = &g->gr; - struct gr_ctx_desc *gr_ctx; int err; gk20a_dbg_fn(""); @@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; - gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); - if (!gr_ctx) - return -ENOMEM; - - gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, - gr_ctx->mem.size, + gr->ctx_vars.buffer_total_size, gmmu_page_size_kernel); - if (!gr_ctx->mem.gpu_va) { - nvgpu_kfree(g, gr_ctx); + if (!gr_ctx->mem.gpu_va) return -ENOMEM; - } + gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; + gr_ctx->mem.aperture = APERTURE_SYSMEM; msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; msg.handle = vgpu_get_handle(g); @@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, nvgpu_err(g, "fail to alloc gr_ctx"); __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, gmmu_page_size_kernel); - nvgpu_kfree(g, gr_ctx); + gr_ctx->mem.aperture = APERTURE_INVALID; } else { gr_ctx->virt_ctx = p->gr_ctx_handle; - *__gr_ctx = gr_ctx; } return err; } -void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) -{ - struct tegra_vgpu_cmd_msg msg; - struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; - int err; - - gk20a_dbg_fn(""); - - if (!gr_ctx || !gr_ctx->mem.gpu_va) - return; - - - msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; - msg.handle = vgpu_get_handle(g); - p->gr_ctx_handle = gr_ctx->virt_ctx; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - WARN_ON(err || msg.ret); - - __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, - gmmu_page_size_kernel); - - nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); - - nvgpu_kfree(g, gr_ctx); -} - -static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) -{ - gk20a_dbg_fn(""); - - c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); - c->ch_ctx.gr_ctx = NULL; -} - static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, struct channel_gk20a *c) { - struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct tsg_gk20a *tsg; + struct patch_desc *patch_ctx; struct vm_gk20a *ch_vm = c->vm; struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; @@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + patch_ctx = &tsg->gr_ctx.patch_ctx; patch_ctx->mem.size = 128 * sizeof(u32); patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, patch_ctx->mem.size, @@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, return err; } -static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) +static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg) { - struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; - struct vm_gk20a *ch_vm = c->vm; + struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx; gk20a_dbg_fn(""); if (patch_ctx->mem.gpu_va) { - struct tegra_vgpu_cmd_msg msg; - struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; - int err; + /* server will free on channel close */ - msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; - msg.handle = vgpu_get_handle(c->g); - p->handle = c->virt_ctx; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - WARN_ON(err || msg.ret); - - __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, + __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va, gmmu_page_size_kernel); patch_ctx->mem.gpu_va = 0; } } -static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) +static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg) { - struct tegra_vgpu_cmd_msg msg; - struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx; struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; - int err; gk20a_dbg_fn(""); @@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) if (pm_ctx->mem.gpu_va == 0) return; - msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; - msg.handle = vgpu_get_handle(c->g); - p->handle = c->virt_ctx; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - WARN_ON(err || msg.ret); + /* server will free on channel close */ - __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, + __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va, gmmu_page_size_kernel); pm_ctx->mem.gpu_va = 0; } -void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) +void vgpu_gr_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) { + struct tsg_gk20a *tsg; + gk20a_dbg_fn(""); - if (c->g->ops.fifo.free_channel_ctx_header) - c->g->ops.fifo.free_channel_ctx_header(c); - vgpu_gr_unmap_global_ctx_buffers(c); - vgpu_gr_free_channel_patch_ctx(c); - vgpu_gr_free_channel_pm_ctx(c); - if (!is_tsg) - vgpu_gr_free_channel_gr_ctx(c); + if (gr_ctx->mem.gpu_va) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; - /* zcull_ctx, pm_ctx */ + msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; + msg.handle = vgpu_get_handle(g); + p->gr_ctx_handle = gr_ctx->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); - memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); + __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, + gmmu_page_size_kernel); + + tsg = &g->fifo.tsg[gr_ctx->tsgid]; + vgpu_gr_unmap_global_ctx_buffers(tsg); + vgpu_gr_free_channel_patch_ctx(tsg); + vgpu_gr_free_channel_pm_ctx(tsg); + + nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); - c->first_init = false; + memset(gr_ctx, 0, sizeof(*gr_ctx)); + } } static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) { - struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_channel_bind_gr_ctx_params *p = &msg.params.ch_bind_gr_ctx; int err; + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; msg.handle = vgpu_get_handle(c->g); p->ch_handle = c->virt_ctx; @@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) { - struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; + struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_tsg_bind_gr_ctx_params *p = &msg.params.tsg_bind_gr_ctx; @@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) { struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct nvgpu_gr_ctx *gr_ctx = NULL; struct tsg_gk20a *tsg = NULL; int err = 0; @@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) } c->obj_class = class_num; - if (gk20a_is_channel_marked_as_tsg(c)) - tsg = &f->tsg[c->tsgid]; - - if (!tsg) { - /* allocate gr ctx buffer */ - if (!ch_ctx->gr_ctx) { - err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, - c->vm, - class_num, - flags); - if (!err) - err = vgpu_gr_ch_bind_gr_ctx(c); - if (err) { - nvgpu_err(g, "fail to allocate gr ctx buffer"); - goto out; - } - } else { - /*TBD: needs to be more subtle about which is - * being allocated as some are allowed to be - * allocated along same channel */ + if (!gk20a_is_channel_marked_as_tsg(c)) + return -EINVAL; + + tsg = &f->tsg[c->tsgid]; + gr_ctx = &tsg->gr_ctx; + + if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + tsg->vm = c->vm; + nvgpu_vm_get(tsg->vm); + err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, + c->vm, + class_num, + flags); + if (!err) + err = vgpu_gr_tsg_bind_gr_ctx(tsg); + if (err) { nvgpu_err(g, - "too many classes alloc'd on same channel"); - err = -EINVAL; + "fail to allocate TSG gr ctx buffer, err=%d", err); + nvgpu_vm_put(tsg->vm); + tsg->vm = NULL; goto out; } - } else { - if (!tsg->tsg_gr_ctx) { - tsg->vm = c->vm; - nvgpu_vm_get(tsg->vm); - err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, - c->vm, - class_num, - flags); - if (!err) - err = vgpu_gr_tsg_bind_gr_ctx(tsg); - if (err) { - nvgpu_err(g, - "fail to allocate TSG gr ctx buffer, err=%d", err); - nvgpu_vm_put(tsg->vm); - tsg->vm = NULL; - goto out; - } - } - ch_ctx->gr_ctx = tsg->tsg_gr_ctx; err = vgpu_gr_ch_bind_gr_ctx(c); if (err) { nvgpu_err(g, "fail to bind gr ctx buffer"); goto out; } - } - /* commit gr ctx buffer */ - err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); - if (err) { - nvgpu_err(g, "fail to commit gr ctx buffer"); - goto out; - } + /* commit gr ctx buffer */ + err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); + if (err) { + nvgpu_err(g, "fail to commit gr ctx buffer"); + goto out; + } - /* allocate patch buffer */ - if (ch_ctx->patch_ctx.mem.priv.pages == NULL) { + /* allocate patch buffer */ err = vgpu_gr_alloc_channel_patch_ctx(g, c); if (err) { nvgpu_err(g, "fail to allocate patch buffer"); goto out; } - } - /* map global buffer to channel gpu_va and commit */ - if (!ch_ctx->global_ctx_buffer_mapped) { + /* map global buffer to channel gpu_va and commit */ err = vgpu_gr_map_global_ctx_buffers(g, c); if (err) { nvgpu_err(g, "fail to map global ctx buffer"); goto out; } - vgpu_gr_commit_global_ctx_buffers(g, c, true); - } - /* load golden image */ - if (!c->first_init) { + err = vgpu_gr_commit_global_ctx_buffers(g, c, true); + if (err) { + nvgpu_err(g, "fail to commit global ctx buffers"); + goto out; + } + + /* load golden image */ err = gr_gk20a_elpg_protected_call(g, vgpu_gr_load_golden_ctx_image(g, c)); if (err) { nvgpu_err(g, "fail to load golden ctx image"); goto out; } - c->first_init = true; + } else { + err = vgpu_gr_ch_bind_gr_ctx(c); + if (err) { + nvgpu_err(g, "fail to bind gr ctx buffer"); + goto out; + } + + /* commit gr ctx buffer */ + err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); + if (err) { + nvgpu_err(g, "fail to commit gr ctx buffer"); + goto out; + } } + /* PM ctxt switch is off by default */ + gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + gk20a_dbg_fn("done"); return 0; out: @@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *ch, bool enable) { - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; - struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *ch_ctx; + struct pm_ctx_desc *pm_ctx; struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; int err; gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + ch_ctx = &tsg->gr_ctx; + pm_ctx = &ch_ctx->pm_ctx; + if (enable) { + /* + * send command to enable HWPM only once - otherwise server + * will return an error due to using the same GPU VA twice. + */ + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) + return 0; + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; /* Allocate buffer if necessary */ @@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, return -ENOMEM; pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; } - } else + } else { + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) + return 0; + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + } msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; msg.handle = vgpu_get_handle(g); @@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); + err = err ? err : msg.ret; + if (!err) + pm_ctx->pm_mode = enable ? + ctxsw_prog_main_image_pm_mode_ctxsw_f() : + ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); - return err ? err : msg.ret; + return err; } int vgpu_gr_clear_sm_error_state(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h index 16aa92a9..4b81da91 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h @@ -29,6 +29,7 @@ struct dbg_session_gk20a; void vgpu_gr_detect_sm_arch(struct gk20a *g); void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); +void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg); int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, struct channel_gk20a *c, u64 zcull_va, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c index 968eae10..132ce6e5 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c @@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, - .free_channel_ctx = vgpu_gr_free_channel_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .get_zcull_info = vgpu_gr_get_zcull_info, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c index d59f0381..a0099f03 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c @@ -21,7 +21,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) { - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_alloc_ctx_header_params *p = &msg.params.alloc_ctx_header; @@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) { - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_free_ctx_header_params *p = &msg.params.free_ctx_header; diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h index 8c306ea0..20624240 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h @@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); int vgpu_gr_nonstall_isr(struct gk20a *g, struct tegra_vgpu_gr_nonstall_intr_info *info); int vgpu_gr_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags); void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); void vgpu_gr_handle_sm_esr_event(struct gk20a *g, struct tegra_vgpu_sm_esr_info *info); int vgpu_gr_init_ctx_state(struct gk20a *g); @@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g, return 0; } static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) @@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, return -ENOSYS; } static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx) { } static inline int vgpu_gr_init_ctx_state(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 16d4711f..64266fe5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -259,7 +259,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) ch->g->ops.fifo.disable_channel(ch); - if (channel_preempt && ch->ch_ctx.gr_ctx) + if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); gk20a_channel_abort_clean_up(ch); @@ -421,8 +421,8 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) g->ops.fecs_trace.unbind_channel(g, ch); #endif - /* release channel ctx */ - g->ops.gr.free_channel_ctx(ch, was_tsg); + if(g->ops.fifo.free_channel_ctx_header) + g->ops.fifo.free_channel_ctx_header(ch); gk20a_gr_flush_channel_tlb(gr); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index c13b1c58..29fa302f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -31,7 +31,6 @@ #include struct gk20a; -struct gr_gk20a; struct dbg_session_gk20a; struct gk20a_fence; struct fifo_profile_gk20a; @@ -50,10 +49,6 @@ struct fifo_profile_gk20a; #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) -/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ -#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) -#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) - struct notification { struct { u32 nanoseconds[2]; @@ -63,19 +58,6 @@ struct notification { u16 status; }; -/* contexts associated with a channel */ -struct channel_ctx_gk20a { - struct gr_ctx_desc *gr_ctx; - struct patch_desc patch_ctx; - struct zcull_ctx_desc zcull_ctx; - struct pm_ctx_desc pm_ctx; - u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; - u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; - int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; - bool global_ctx_buffer_mapped; - struct ctx_header_desc ctx_header; -}; - struct channel_gk20a_job { struct nvgpu_mapped_buf **mapped_buffers; int num_mapped_buffers; @@ -190,7 +172,6 @@ struct channel_gk20a { int chid; bool wdt_enabled; nvgpu_atomic_t bound; - bool first_init; bool vpr; bool deterministic; /* deterministic, but explicitly idle and submits disallowed */ @@ -210,8 +191,6 @@ struct channel_gk20a { struct gpfifo_desc gpfifo; - struct channel_ctx_gk20a ch_ctx; - struct nvgpu_mem inst_block; u64 userd_iova; @@ -262,6 +241,8 @@ struct channel_gk20a { struct channel_t19x t19x; #endif + struct ctx_header_desc ctx_header; + /* Any operating system specific data. */ void *os_priv; }; diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index d283a82e..409661fc 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -625,9 +625,10 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, u32 lo; u32 hi; u64 pa; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *ch_ctx; struct gk20a_fecs_trace *trace = g->fecs_trace; - struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; + struct nvgpu_mem *mem; u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); pid_t pid; u32 aperture; @@ -637,6 +638,13 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, ch->chid, context_ptr, nvgpu_inst_block_addr(g, &ch->inst_block)); + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + ch_ctx = &tsg->gr_ctx; + mem = &ch_ctx->mem; + if (!trace) return -ENOMEM; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 070b26b6..685976b1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -187,16 +187,16 @@ struct gpu_ops { void (*cb_size_default)(struct gk20a *g); int (*calc_global_ctx_buffer_size)(struct gk20a *g); void (*commit_global_attrib_cb)(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); void (*commit_global_bundle_cb)(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size, bool patch); int (*commit_global_cb_manager)(struct gk20a *g, struct channel_gk20a *ch, bool patch); void (*commit_global_pagepool)(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch); void (*init_gpc_mmu)(struct gk20a *g); int (*handle_sw_method)(struct gk20a *g, u32 addr, @@ -230,7 +230,6 @@ struct gpu_ops { int (*load_ctxsw_ucode)(struct gk20a *g); u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); - void (*free_channel_ctx)(struct channel_gk20a *c, bool is_tsg); int (*alloc_obj_ctx)(struct channel_gk20a *c, u32 class_num, u32 flags); int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, @@ -285,13 +284,12 @@ struct gpu_ops { u32 (*pagepool_default_size)(struct gk20a *g); int (*init_ctx_state)(struct gk20a *g); int (*alloc_gr_ctx)(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 padding); void (*free_gr_ctx)(struct gk20a *g, - struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx); + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); void (*update_ctxsw_preemption_mode)(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem); int (*update_smpc_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, @@ -384,14 +382,14 @@ struct gpu_ops { int (*get_preemption_mode_flags)(struct gk20a *g, struct nvgpu_preemption_modes_rec *preemption_modes_rec); int (*set_ctxsw_preemption_mode)(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); void (*update_boosted_ctx)(struct gk20a *g, struct nvgpu_mem *mem, - struct gr_ctx_desc *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); int (*init_sm_id_table)(struct gk20a *g); int (*load_smid_config)(struct gk20a *g); void (*program_sm_id_numbering)(struct gk20a *g, @@ -440,7 +438,7 @@ struct gpu_ops { u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 263ae030..f8af091b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -85,18 +85,19 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, struct channel_gk20a *c); -static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); - -/* channel gr ctx buffer */ -static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c, - u32 class, u32 padding); -static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); +static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); +static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); /* channel patch ctx buffer */ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, struct channel_gk20a *c); -static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); +static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); /* golden ctx image */ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, @@ -108,8 +109,16 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, struct channel_gk20a *c, u32 *ctx_id) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx = NULL; + struct nvgpu_mem *mem = NULL; + + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ @@ -671,62 +680,62 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) */ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, bool update_patch_count) { int err = 0; - err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); + err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem); if (err) return err; if (update_patch_count) { /* reset patch count if ucode has already processed it */ - ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, - &ch_ctx->gr_ctx->mem, + gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, + &gr_ctx->mem, ctxsw_prog_main_image_patch_count_o()); nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", - ch_ctx->patch_ctx.data_count); + gr_ctx->patch_ctx.data_count); } return 0; } void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, bool update_patch_count) { - nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); + nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem); /* Write context count to context image if it is mapped */ if (update_patch_count) { - nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, + nvgpu_mem_wr(g, &gr_ctx->mem, ctxsw_prog_main_image_patch_count_o(), - ch_ctx->patch_ctx.data_count); + gr_ctx->patch_ctx.data_count); nvgpu_log(g, gpu_dbg_info, "write patch count %d", - ch_ctx->patch_ctx.data_count); + gr_ctx->patch_ctx.data_count); } } void gr_gk20a_ctx_patch_write(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u32 addr, u32 data, bool patch) { if (patch) { - u32 patch_slot = ch_ctx->patch_ctx.data_count * + u32 patch_slot = gr_ctx->patch_ctx.data_count * PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( - ch_ctx->patch_ctx.mem.size) - + gr_ctx->patch_ctx.mem.size) - PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { nvgpu_err(g, "failed to access patch_slot %d", patch_slot); return; } - nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); - nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); - ch_ctx->patch_ctx.data_count++; + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr); + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data); + gr_ctx->patch_ctx.data_count++; nvgpu_log(g, gpu_dbg_info, "patch addr = 0x%x data = 0x%x data_count %d", - addr, data, ch_ctx->patch_ctx.data_count); + addr, data, gr_ctx->patch_ctx.data_count); } else { gk20a_writel(g, addr, data); } @@ -793,14 +802,22 @@ void gr_gk20a_write_pm_ptr(struct gk20a *g, static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx = NULL; + struct nvgpu_mem *mem = NULL; + struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; int ret = 0; gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; + if (nvgpu_mem_begin(g, mem)) return -ENOMEM; @@ -809,8 +826,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) goto clean_up_mem; } - if (ch_ctx->zcull_ctx.gpu_va == 0 && - ch_ctx->zcull_ctx.ctx_sw_mode == + if (gr_ctx->zcull_ctx.gpu_va == 0 && + gr_ctx->zcull_ctx.ctx_sw_mode == ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { ret = -EINVAL; goto clean_up; @@ -830,13 +847,13 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_zcull_o(), - ch_ctx->zcull_ctx.ctx_sw_mode); + gr_ctx->zcull_ctx.ctx_sw_mode); if (ctxheader->gpu_va) g->ops.gr.write_zcull_ptr(g, ctxheader, - ch_ctx->zcull_ctx.gpu_va); + gr_ctx->zcull_ctx.gpu_va); else - g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va); + g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); gk20a_enable_channel_tsg(g, c); @@ -869,22 +886,29 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, struct channel_gk20a *c, bool patch) { struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx = NULL; u64 addr; u32 size; gk20a_dbg_fn(""); + + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; if (patch) { int err; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); + err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); if (err) return err; } /* global pagepool buffer */ - addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> + addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> gr_scc_pagepool_base_addr_39_8_align_bits_v()) | - (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << + (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); size = gr->global_ctx_buffer[PAGEPOOL].mem.size / @@ -896,12 +920,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", addr, size); - g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); + g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch); /* global bundle cb */ - addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> + addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | - (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << + (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); size = gr->bundle_cb_default_size; @@ -909,20 +933,20 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", addr, size); - g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); + g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch); /* global attrib cb */ - addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> + addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | - (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << + (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); - g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); + g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); g->ops.gr.commit_global_cb_manager(g, c, patch); if (patch) - gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); + gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); return 0; } @@ -930,7 +954,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) { struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = NULL; + struct nvgpu_gr_ctx *gr_ctx = NULL; u32 gpm_pd_cfg; u32 pd_ab_dist_cfg0; u32 ds_debug; @@ -956,22 +980,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } else { gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); } return 0; @@ -1360,13 +1384,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, struct channel_gk20a *c) { struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx = NULL; u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); u32 ctx_header_words; u32 i; u32 data; struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; - struct nvgpu_mem *gr_mem = &ch_ctx->gr_ctx->mem; + struct nvgpu_mem *gr_mem; u32 err = 0; struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; @@ -1374,6 +1399,13 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + gr_mem = &gr_ctx->mem; + /* golden ctx is global to all channels. Although only the first channel initializes golden image, driver needs to prevent multiple channels from initializing golden ctx at the same time */ @@ -1565,7 +1597,7 @@ restore_fe_go_idle: g->ops.gr.write_zcull_ptr(g, gold_mem, 0); - err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); + err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); if (err) goto clean_up; @@ -1614,20 +1646,25 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, struct channel_gk20a *c, bool enable_smpc_ctxsw) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct nvgpu_mem *mem; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx = NULL; + struct nvgpu_mem *mem = NULL; u32 data; int ret; gk20a_dbg_fn(""); - if (!ch_ctx->gr_ctx) { + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; + if (!nvgpu_mem_is_valid(mem)) { nvgpu_err(g, "no graphics context allocated"); return -EFAULT; } - mem = &ch_ctx->gr_ctx->mem; - ret = gk20a_disable_channel_tsg(g, c); if (ret) { nvgpu_err(g, "failed to disable channel/TSG"); @@ -1670,24 +1707,30 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *c, bool enable_hwpm_ctxsw) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; - struct nvgpu_mem *gr_mem; + struct tsg_gk20a *tsg; + struct nvgpu_mem *gr_mem = NULL; + struct nvgpu_gr_ctx *gr_ctx; + struct pm_ctx_desc *pm_ctx; u32 data; u64 virt_addr; - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; int ret; gk20a_dbg_fn(""); - if (!ch_ctx->gr_ctx) { + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + pm_ctx = &gr_ctx->pm_ctx; + gr_mem = &gr_ctx->mem; + if (!nvgpu_mem_is_valid(gr_mem)) { nvgpu_err(g, "no graphics context allocated"); return -EFAULT; } - gr_mem = &ch_ctx->gr_ctx->mem; - if (enable_hwpm_ctxsw) { if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) return 0; @@ -1816,20 +1859,25 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, struct channel_gk20a *c) { struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; u32 virt_addr_lo; u32 virt_addr_hi; u64 virt_addr = 0; u32 v, data; int ret = 0; - struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; - struct nvgpu_mem *ctxheader = &ctx->mem; + struct nvgpu_mem *mem; gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; if (gr->ctx_vars.local_golden_image == NULL) - return -1; + return -EINVAL; /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ @@ -1838,11 +1886,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (nvgpu_mem_begin(g, mem)) return -ENOMEM; - if (nvgpu_mem_begin(g, ctxheader)) { - ret = -ENOMEM; - goto clean_up_mem; - } - nvgpu_mem_wr_n(g, mem, 0, gr->ctx_vars.local_golden_image, gr->ctx_vars.golden_image_size); @@ -1855,9 +1898,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, /* set priv access map */ virt_addr_lo = - u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); virt_addr_hi = - u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); if (g->allow_all) data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); @@ -1867,21 +1910,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), data); - if (ctxheader->gpu_va) { - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_priv_access_map_addr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_priv_access_map_addr_hi_o(), - virt_addr_hi); - } else { - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_priv_access_map_addr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_priv_access_map_addr_hi_o(), - virt_addr_hi); - } + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), + virt_addr_lo); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), + virt_addr_hi); + /* disable verif features */ v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); @@ -1889,65 +1924,50 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); if (g->ops.gr.update_ctxsw_preemption_mode) - g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); + g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); if (g->ops.gr.update_boosted_ctx) - g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); + g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); - virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); - virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); + virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); + virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); nvgpu_log(g, gpu_dbg_info, "write patch count = %d", - ch_ctx->patch_ctx.data_count); + gr_ctx->patch_ctx.data_count); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), - ch_ctx->patch_ctx.data_count); - - if (ctxheader->gpu_va) { - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_hi_o(), - virt_addr_hi); - } else { - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_hi_o(), - virt_addr_hi); - } + gr_ctx->patch_ctx.data_count); + + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_patch_adr_lo_o(), + virt_addr_lo); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_patch_adr_hi_o(), + virt_addr_hi); /* Update main header region of the context buffer with the info needed * for PM context switching, including mode and possibly a pointer to * the PM backing store. */ - if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { - if (ch_ctx->pm_ctx.mem.gpu_va == 0) { + if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { + if (gr_ctx->pm_ctx.mem.gpu_va == 0) { nvgpu_err(g, "context switched pm with no pm buffer!"); nvgpu_mem_end(g, mem); return -EFAULT; } - virt_addr = ch_ctx->pm_ctx.mem.gpu_va; + virt_addr = gr_ctx->pm_ctx.mem.gpu_va; } else virt_addr = 0; data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); data = data & ~ctxsw_prog_main_image_pm_mode_m(); - data |= ch_ctx->pm_ctx.pm_mode; + data |= gr_ctx->pm_ctx.pm_mode; nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); - if (ctxheader->gpu_va) - g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); - else - g->ops.gr.write_pm_ptr(g, mem, virt_addr); - + g->ops.gr.write_pm_ptr(g, mem, virt_addr); - nvgpu_mem_end(g, ctxheader); -clean_up_mem: nvgpu_mem_end(g, mem); return ret; @@ -2568,13 +2588,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) return -ENOMEM; } -static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) +static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) { - struct vm_gk20a *ch_vm = c->vm; - struct gr_gk20a *gr = &c->g->gr; - u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; - u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; - int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; + u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va; + u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size; + int *g_bfr_index = gr_ctx->global_ctx_buffer_index; u32 i; gk20a_dbg_fn(""); @@ -2588,32 +2608,41 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) * the correct struct nvgpu_mem to use. Handles the VPR * vs non-VPR difference in context images. */ - mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; + mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem; - nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); + nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]); } } - memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); - memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); - memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); + memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va)); + memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size)); + memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index)); - c->ch_ctx.global_ctx_buffer_mapped = false; + gr_ctx->global_ctx_buffer_mapped = false; } static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, struct channel_gk20a *c) { + struct tsg_gk20a *tsg; struct vm_gk20a *ch_vm = c->vm; - u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; - u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; - int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; + u64 *g_bfr_va; + u64 *g_bfr_size; + int *g_bfr_index; struct gr_gk20a *gr = &g->gr; struct nvgpu_mem *mem; u64 gpu_va; gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; + g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; + g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index; + /* Circular Buffer */ if (c->vpr && nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { @@ -2688,21 +2717,20 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; - c->ch_ctx.global_ctx_buffer_mapped = true; + tsg->gr_ctx.global_ctx_buffer_mapped = true; return 0; clean_up: - gr_gk20a_unmap_global_ctx_buffers(c); + gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx); return -ENOMEM; } int gr_gk20a_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 padding) { - struct gr_ctx_desc *gr_ctx = NULL; struct gr_gk20a *gr = &g->gr; int err = 0; @@ -2715,15 +2743,11 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; - gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); - if (!gr_ctx) - return -ENOMEM; - err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, gr->ctx_vars.buffer_total_size, &gr_ctx->mem); if (err) - goto err_free_ctx; + return err; gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, &gr_ctx->mem, @@ -2734,15 +2758,10 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, if (!gr_ctx->mem.gpu_va) goto err_free_mem; - *__gr_ctx = gr_ctx; - return 0; err_free_mem: nvgpu_dma_free(g, &gr_ctx->mem); - err_free_ctx: - nvgpu_kfree(g, gr_ctx); - gr_ctx = NULL; return err; } @@ -2750,7 +2769,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, struct tsg_gk20a *tsg, u32 class, u32 padding) { - struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; + struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; int err; if (!tsg->vm) { @@ -2762,57 +2781,44 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, if (err) return err; - return 0; -} - -static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c, - u32 class, - u32 padding) -{ - struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; - int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding); - if (err) - return err; + gr_ctx->tsgid = tsg->tsgid; return 0; } void gr_gk20a_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) { gk20a_dbg_fn(""); - if (!gr_ctx || !gr_ctx->mem.gpu_va) - return; + if (gr_ctx->mem.gpu_va) { + gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx); + gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx); + gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); - if (g->ops.gr.dump_ctxsw_stats && - g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) - g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); + if (g->ops.gr.dump_ctxsw_stats && + g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) + g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); - nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); - nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va); - nvgpu_dma_free(g, &gr_ctx->mem); - nvgpu_kfree(g, gr_ctx); + nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->mem); + + memset(gr_ctx, 0, sizeof(*gr_ctx)); + } } void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) { + struct gk20a *g = tsg->g; + if (!tsg->vm) { - nvgpu_err(tsg->g, "No address space bound"); + nvgpu_err(g, "No address space bound"); return; } - tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); - tsg->tsg_gr_ctx = NULL; -} - -static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) -{ - c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); - c->ch_ctx.gr_ctx = NULL; + tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx); } u32 gr_gk20a_get_patch_slots(struct gk20a *g) @@ -2823,13 +2829,19 @@ u32 gr_gk20a_get_patch_slots(struct gk20a *g) static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, struct channel_gk20a *c) { - struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; + struct tsg_gk20a *tsg; + struct patch_desc *patch_ctx; struct vm_gk20a *ch_vm = c->vm; u32 alloc_size; int err = 0; gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + patch_ctx = &tsg->gr_ctx.patch_ctx; alloc_size = g->ops.gr.get_patch_slots(g) * PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; @@ -2845,57 +2857,42 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, return 0; } -static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) +static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) { - struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; - struct gk20a *g = c->g; + struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; gk20a_dbg_fn(""); if (patch_ctx->mem.gpu_va) - nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem, + nvgpu_gmmu_unmap(vm, &patch_ctx->mem, patch_ctx->mem.gpu_va); nvgpu_dma_free(g, &patch_ctx->mem); patch_ctx->data_count = 0; } -static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) +static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) { - struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; - struct gk20a *g = c->g; + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; gk20a_dbg_fn(""); if (pm_ctx->mem.gpu_va) { - nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); + nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); nvgpu_dma_free(g, &pm_ctx->mem); } } -void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) -{ - if(c->g->ops.fifo.free_channel_ctx_header) - c->g->ops.fifo.free_channel_ctx_header(c); - gr_gk20a_unmap_global_ctx_buffers(c); - gr_gk20a_free_channel_patch_ctx(c); - gr_gk20a_free_channel_pm_ctx(c); - if (!is_tsg) - gr_gk20a_free_channel_gr_ctx(c); - - /* zcull_ctx */ - - memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); - - c->first_init = false; -} - int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) { struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct nvgpu_gr_ctx *gr_ctx; struct tsg_gk20a *tsg = NULL; int err = 0; @@ -2917,92 +2914,64 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) } c->obj_class = class_num; - if (gk20a_is_channel_marked_as_tsg(c)) - tsg = &f->tsg[c->tsgid]; + if (!gk20a_is_channel_marked_as_tsg(c)) + return -EINVAL; - /* allocate gr ctx buffer */ - if (!tsg) { - if (!ch_ctx->gr_ctx) { - err = gr_gk20a_alloc_channel_gr_ctx(g, c, - class_num, - flags); - if (err) { - nvgpu_err(g, - "fail to allocate gr ctx buffer"); - goto out; - } - } else { - /*TBD: needs to be more subtle about which is - * being allocated as some are allowed to be - * allocated along same channel */ + tsg = &f->tsg[c->tsgid]; + gr_ctx = &tsg->gr_ctx; + + if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + tsg->vm = c->vm; + nvgpu_vm_get(tsg->vm); + err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, + class_num, + flags); + if (err) { nvgpu_err(g, - "too many classes alloc'd on same channel"); - err = -EINVAL; + "fail to allocate TSG gr ctx buffer"); + nvgpu_vm_put(tsg->vm); + tsg->vm = NULL; goto out; } - } else { - if (!tsg->tsg_gr_ctx) { - tsg->vm = c->vm; - nvgpu_vm_get(tsg->vm); - err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, - class_num, - flags); + + /* allocate patch buffer */ + if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) { + gr_ctx->patch_ctx.data_count = 0; + err = gr_gk20a_alloc_channel_patch_ctx(g, c); if (err) { nvgpu_err(g, - "fail to allocate TSG gr ctx buffer"); - nvgpu_vm_put(tsg->vm); - tsg->vm = NULL; + "fail to allocate patch buffer"); goto out; } } - ch_ctx->gr_ctx = tsg->tsg_gr_ctx; - } - - /* PM ctxt switch is off by default */ - ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); - /* commit gr ctx buffer */ - err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); - if (err) { - nvgpu_err(g, - "fail to commit gr ctx buffer"); - goto out; - } - - /* allocate patch buffer */ - if (!nvgpu_mem_is_valid(&ch_ctx->patch_ctx.mem)) { - ch_ctx->patch_ctx.data_count = 0; - err = gr_gk20a_alloc_channel_patch_ctx(g, c); + /* map global buffer to channel gpu_va and commit */ + err = gr_gk20a_map_global_ctx_buffers(g, c); if (err) { nvgpu_err(g, - "fail to allocate patch buffer"); + "fail to map global ctx buffer"); goto out; } - } + gr_gk20a_commit_global_ctx_buffers(g, c, true); - /* map global buffer to channel gpu_va and commit */ - if (!ch_ctx->global_ctx_buffer_mapped) { - err = gr_gk20a_map_global_ctx_buffers(g, c); + /* commit gr ctx buffer */ + err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); if (err) { nvgpu_err(g, - "fail to map global ctx buffer"); + "fail to commit gr ctx buffer"); goto out; } - gr_gk20a_commit_global_ctx_buffers(g, c, true); - } - /* init golden image, ELPG enabled after this is done */ - err = gr_gk20a_init_golden_ctx_image(g, c); - if (err) { - nvgpu_err(g, - "fail to init golden ctx image"); - goto out; - } + /* init golden image, ELPG enabled after this is done */ + err = gr_gk20a_init_golden_ctx_image(g, c); + if (err) { + nvgpu_err(g, + "fail to init golden ctx image"); + goto out; + } - /* load golden image */ - if (!c->first_init) { - err = gr_gk20a_elpg_protected_call(g, - gr_gk20a_load_golden_ctx_image(g, c)); + /* load golden image */ + gr_gk20a_load_golden_ctx_image(g, c); if (err) { nvgpu_err(g, "fail to load golden ctx image"); @@ -3016,11 +2985,21 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) "fail to bind channel for ctxsw trace"); } #endif - c->first_init = true; - } - if (g->ops.gr.set_czf_bypass) - g->ops.gr.set_czf_bypass(g, c); + if (g->ops.gr.set_czf_bypass) + g->ops.gr.set_czf_bypass(g, c); + + /* PM ctxt switch is off by default */ + gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + } else { + /* commit gr ctx buffer */ + err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); + if (err) { + nvgpu_err(g, + "fail to commit gr ctx buffer"); + goto out; + } + } gk20a_dbg_fn("done"); return 0; @@ -3553,8 +3532,14 @@ u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, struct channel_gk20a *c, u64 zcull_va, u32 mode) { - struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; + struct tsg_gk20a *tsg; + struct zcull_ctx_desc *zcull_ctx; + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + zcull_ctx = &tsg->gr_ctx.zcull_ctx; zcull_ctx->ctx_sw_mode = mode; zcull_ctx->gpu_va = zcull_va; @@ -6516,7 +6501,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void) * write will actually occur. so later we should put a lazy, * map-and-hold system in the patch write state */ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *ch, u32 addr, u32 data, struct nvgpu_mem *mem) { @@ -6531,9 +6516,16 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 *ovr_perf_regs = NULL; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - struct ctx_header_desc *ctx = &ch_ctx->ctx_header; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; + struct ctx_header_desc *ctx = &ch->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; g->ops.gr.init_ovr_sm_dsm_perf(); g->ops.gr.init_sm_dsm_reg_info(); g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); @@ -6556,17 +6548,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, ctxsw_prog_main_image_patch_count_o()); if (!tmp) - ch_ctx->patch_ctx.data_count = 0; + gr_ctx->patch_ctx.data_count = 0; - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, addr, data, true); - vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); - vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); + vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); + vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), - ch_ctx->patch_ctx.data_count); + gr_ctx->patch_ctx.data_count); if (ctxheader->gpu_va) { /* * Main context can be gr_ctx or pm_ctx. @@ -6575,7 +6567,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, * __gr_gk20a_exec_ctx_ops. Need to take * care of cpu access to ctxheader here. */ - if (nvgpu_mem_begin(g, ctxheader)) + if (nvgpu_mem_begin(g, ctxheader)) return -ENOMEM; nvgpu_mem_wr(g, ctxheader, ctxsw_prog_main_image_patch_adr_lo_o(), @@ -7690,7 +7682,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, bool ch_is_curr_ctx) { struct gk20a *g = ch->g; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; bool gr_ctx_ready = false; bool pm_ctx_ready = false; struct nvgpu_mem *current_mem = NULL; @@ -7707,6 +7700,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", num_ctx_wr_ops, num_ctx_rd_ops); + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + if (ch_is_curr_ctx) { for (pass = 0; pass < 2; pass++) { ctx_op_nr = 0; @@ -7778,7 +7777,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } offset_addrs = offsets + max_offsets; - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); + err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); if (err) goto cleanup; @@ -7812,13 +7811,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, * gr_gk20a_apply_instmem_overrides, * recoded in-place instead. */ - if (nvgpu_mem_begin(g, &ch_ctx->gr_ctx->mem)) { + if (nvgpu_mem_begin(g, &gr_ctx->mem)) { err = -ENOMEM; goto cleanup; } gr_ctx_ready = true; } - current_mem = &ch_ctx->gr_ctx->mem; + current_mem = &gr_ctx->mem; } else { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, ctx_ops[i].offset, @@ -7835,19 +7834,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } if (!pm_ctx_ready) { /* Make sure ctx buffer was initialized */ - if (!nvgpu_mem_is_valid(&ch_ctx->pm_ctx.mem)) { + if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) { nvgpu_err(g, "Invalid ctx buffer"); err = -EINVAL; goto cleanup; } - if (nvgpu_mem_begin(g, &ch_ctx->pm_ctx.mem)) { + if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) { err = -ENOMEM; goto cleanup; } pm_ctx_ready = true; } - current_mem = &ch_ctx->pm_ctx.mem; + current_mem = &gr_ctx->pm_ctx.mem; } /* if this is a quad access, setup for special access*/ @@ -7860,7 +7859,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, /* sanity check gr ctxt offsets, * don't write outside, worst case */ - if ((current_mem == &ch_ctx->gr_ctx->mem) && + if ((current_mem == &gr_ctx->mem) && (offsets[j] >= g->gr.ctx_vars.golden_image_size)) continue; if (pass == 0) { /* write pass */ @@ -7886,7 +7885,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, /* check to see if we need to add a special WAR for some of the SMPC perf regs */ - gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], + gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j], v, current_mem); } else { /* read pass */ @@ -7915,12 +7914,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, if (offsets) nvgpu_kfree(g, offsets); - if (ch_ctx->patch_ctx.mem.cpu_va) - gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); + if (gr_ctx->patch_ctx.mem.cpu_va) + gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); if (gr_ctx_ready) - nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); + nvgpu_mem_end(g, &gr_ctx->mem); if (pm_ctx_ready) - nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); + nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem); return err; } @@ -7962,23 +7961,23 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } void gr_gk20a_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, bool patch) { - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), gr_scc_pagepool_total_pages_f(size) | gr_scc_pagepool_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), gr_gpcs_gcc_pagepool_total_pages_f(size), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(), gr_pd_pagepool_total_pages_f(size) | gr_pd_pagepool_valid_true_f(), patch); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 1c22923b..6cc15c94 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -28,7 +28,6 @@ #include "gr_t19x.h" #endif -#include "tsg_gk20a.h" #include "gr_ctx_gk20a.h" #include "mm_gk20a.h" @@ -48,6 +47,10 @@ #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ +/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ +#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) +#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) + /* * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries * of address and data pairs @@ -64,6 +67,7 @@ #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) +struct tsg_gk20a; struct channel_gk20a; struct nvgpu_warpstate; @@ -433,7 +437,12 @@ struct gr_gk20a { void gk20a_fecs_dump_falcon_stats(struct gk20a *g); -struct gr_ctx_desc { +struct ctx_header_desc { + struct nvgpu_mem mem; +}; + +/* contexts associated with a TSG */ +struct nvgpu_gr_ctx { struct nvgpu_mem mem; u32 graphics_preempt_mode; @@ -452,10 +461,16 @@ struct gr_ctx_desc { u64 virt_ctx; #endif bool golden_img_loaded; -}; -struct ctx_header_desc { - struct nvgpu_mem mem; + struct patch_desc patch_ctx; + struct zcull_ctx_desc zcull_ctx; + struct pm_ctx_desc pm_ctx; + u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; + u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; + int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; + bool global_ctx_buffer_mapped; + + u32 tsgid; }; struct gk20a_ctxsw_ucode_segment { @@ -552,7 +567,6 @@ int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); -void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); int gk20a_gr_isr(struct gk20a *g); int gk20a_gr_nonstall_isr(struct gk20a *g); @@ -633,17 +647,17 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *c, bool enable_hwpm_ctxsw); -struct channel_ctx_gk20a; -void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, +struct nvgpu_gr_ctx; +void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u32 addr, u32 data, bool patch); int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, bool update_patch_count); void gr_gk20a_ctx_patch_write_end(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, bool update_patch_count); void gr_gk20a_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch); void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); void gr_gk20a_enable_hww_exceptions(struct gk20a *g); @@ -694,10 +708,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, struct fecs_method_op_gk20a op); int gr_gk20a_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 padding); void gr_gk20a_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); int gr_gk20a_halt_pipe(struct gk20a *g); #if defined(CONFIG_GK20A_CYCLE_STATS) diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index d9ddc011..19d0ecce 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -280,7 +280,6 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g) tsg->num_active_channels = 0; nvgpu_ref_init(&tsg->refcount); - tsg->tsg_gr_ctx = NULL; tsg->vm = NULL; tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; tsg->timeslice_us = 0; @@ -319,10 +318,8 @@ void gk20a_tsg_release(struct nvgpu_ref *ref) if (g->ops.fifo.tsg_release) g->ops.fifo.tsg_release(tsg); - if (tsg->tsg_gr_ctx) { + if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem)) gr_gk20a_free_tsg_gr_ctx(tsg); - tsg->tsg_gr_ctx = NULL; - } if (g->ops.fifo.deinit_eng_method_buffers) g->ops.fifo.deinit_eng_method_buffers(g, tsg); diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 08fe0365..2168cb4f 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -26,6 +26,8 @@ #include #include +#include "gr_gk20a.h" + #ifdef CONFIG_TEGRA_19x_GPU #include "tsg_t19x.h" #endif @@ -56,8 +58,6 @@ struct tsg_gk20a { unsigned int timeslice_timeout; unsigned int timeslice_scale; - struct gr_ctx_desc *tsg_gr_ctx; - struct vm_gk20a *vm; u32 interleave_level; @@ -71,6 +71,8 @@ struct tsg_gk20a { #ifdef CONFIG_TEGRA_19x_GPU struct tsg_t19x t19x; #endif + + struct nvgpu_gr_ctx gr_ctx; }; int gk20a_enable_tsg(struct tsg_gk20a *tsg); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 36fad8b3..a2434320 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -124,7 +124,7 @@ int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) } void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch) { gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), @@ -141,7 +141,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, } void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size, bool patch) { u32 data; @@ -180,7 +180,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch) { struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *ch_ctx; u32 attrib_offset_in_chunk = 0; u32 alpha_offset_in_chunk = 0; u32 pd_ab_max_output; @@ -193,6 +194,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + ch_ctx = &tsg->gr_ctx; + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), @@ -257,7 +264,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, } void gr_gm20b_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch) { gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); @@ -845,7 +852,7 @@ u32 gr_gm20b_pagepool_default_size(struct gk20a *g) } int gr_gm20b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) { @@ -858,7 +865,7 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g, return err; if (class == MAXWELL_COMPUTE_B) - (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; + gr_ctx->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; gk20a_dbg_fn("done"); @@ -866,15 +873,21 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g, } void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem) { - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; u32 cta_preempt_option = ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return; + + gr_ctx = &tsg->gr_ctx; if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { gk20a_dbg_info("CTA: %x", cta_preempt_option); nvgpu_mem_wr(g, mem, @@ -1026,16 +1039,22 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g, int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, bool enable) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; struct nvgpu_mem *mem; u32 v; gk20a_dbg_fn(""); - if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; + if (!nvgpu_mem_is_valid(mem) || c->vpr) return -EINVAL; - mem = &ch_ctx->gr_ctx->mem; if (nvgpu_mem_begin(c->g, mem)) return -ENOMEM; @@ -1289,12 +1308,19 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g, { u32 gpc, tpc, offset; struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *ch_ctx; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); int err = 0; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + ch_ctx = &tsg->gr_ctx; + nvgpu_mutex_acquire(&g->dbg_sessions_lock); gr->sm_error_states[sm_id].hww_global_esr = diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 18e6b032..bddf6412 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -46,7 +46,7 @@ enum { #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); int gr_gm20b_init_fs_state(struct gk20a *g); int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); @@ -57,12 +57,12 @@ void gr_gm20b_bundle_cb_defaults(struct gk20a *g); void gr_gm20b_cb_size_default(struct gk20a *g); int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size, bool patch); int gr_gm20b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch); void gr_gm20b_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch); int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); @@ -96,11 +96,11 @@ int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); void gr_gm20b_detect_sm_arch(struct gk20a *g); u32 gr_gm20b_pagepool_default_size(struct gk20a *g); int gr_gm20b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags); void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem); int gr_gm20b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index c29f7267..3ee22ed1 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -226,7 +226,6 @@ static const struct gpu_ops gm20b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, - .free_channel_ctx = gk20a_free_channel_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index bedc0b78..02cecf53 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -135,7 +135,7 @@ void gr_gp106_cb_size_default(struct gk20a *g) } int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode) diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h index 9f76e4ac..491ced4e 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.h +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h @@ -38,7 +38,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); void gr_gp106_cb_size_default(struct gk20a *g); int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 1498d1c0..3073668e 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -272,7 +272,6 @@ static const struct gpu_ops gp106_ops = { .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, - .free_channel_ctx = gk20a_free_channel_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 56acc732..549a4da4 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -389,9 +389,9 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, int gr_gp10b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch) { + struct tsg_gk20a *tsg; struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + struct nvgpu_gr_ctx *gr_ctx; u32 attrib_offset_in_chunk = 0; u32 alpha_offset_in_chunk = 0; u32 pd_ab_max_output; @@ -405,6 +405,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { attrib_size_in_chunk = gr->attrib_cb_gfxp_size; cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; @@ -413,9 +419,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, cb_attrib_cache_size_init = gr->attrib_cb_default_size; } - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(), gr->attrib_cb_default_size, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), gr->alpha_cb_default_size, patch); pd_ab_max_output = (gr->alpha_cb_default_size * @@ -423,11 +429,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_output_granularity_v(); if (g->gr.pd_max_batches) { - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); } else { - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); } @@ -447,17 +453,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, cbm_cfg_size_steadystate = gr->attrib_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_beta, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, attrib_offset_in_chunk, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_steadystate, @@ -466,12 +472,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, attrib_offset_in_chunk += attrib_size_in_chunk * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + ppc_in_gpc_stride * ppc_index, cbm_cfg_size_alpha, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); @@ -479,7 +485,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, alpha_offset_in_chunk += gr->alpha_cb_size * gr->pes_tpc_count[ppc_index][gpc_index]; - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), patch); @@ -490,20 +496,20 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, } void gr_gp10b_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, bool patch) { - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), gr_scc_pagepool_total_pages_f(size) | gr_scc_pagepool_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), gr_gpcs_gcc_pagepool_total_pages_f(size), patch); } @@ -947,7 +953,7 @@ fail_free: } int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode) @@ -1071,7 +1077,7 @@ fail: } int gr_gp10b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags) { @@ -1085,7 +1091,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, if (err) return err; - (*gr_ctx)->ctx_id_valid = false; + gr_ctx->ctx_id_valid = false; if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; @@ -1094,7 +1100,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, if (graphics_preempt_mode || compute_preempt_mode) { if (g->ops.gr.set_ctxsw_preemption_mode) { - err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm, + err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, graphics_preempt_mode, compute_preempt_mode); if (err) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); @@ -1109,14 +1115,13 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, return 0; fail_free_gk20a_ctx: - gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); - *gr_ctx = NULL; + gr_gk20a_free_gr_ctx(g, vm, gr_ctx); return err; } void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx) { struct nvgpu_mem *mem = &gr_ctx->mem; @@ -1168,13 +1173,13 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, } void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem) { - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; - struct ctx_header_desc *ctx = &ch_ctx->ctx_header; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; + struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; - u32 gfxp_preempt_option = ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); u32 cilp_preempt_option = @@ -1185,6 +1190,12 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return; + + gr_ctx = &tsg->gr_ctx; + if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); nvgpu_mem_wr(g, mem, @@ -1220,7 +1231,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, gr_ctx->preempt_ctxsw_buffer.gpu_va); } - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); + err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); if (err) { nvgpu_err(g, "can't map patch context"); goto out; @@ -1232,7 +1243,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); gk20a_dbg_info("attrib cb addr : 0x%016x", addr); - g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); + g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true); addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> gr_scc_pagepool_base_addr_39_8_align_bits_v()) | @@ -1243,7 +1254,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, if (size == g->ops.gr.pagepool_default_size(g)) size = gr_scc_pagepool_total_pages_hwmax_v(); - g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); + g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true); addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | @@ -1252,28 +1263,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, size = gr_ctx->spill_ctxsw_buffer.size / gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), true); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), true); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); + gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); } out: @@ -1478,10 +1489,9 @@ int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, } void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch) { - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; int attrBufferSize; if (gr_ctx->preempt_ctxsw_buffer.gpu_va) @@ -1491,37 +1501,37 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); - gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); + gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch) { u32 data; - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(), gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(), gr_scc_bundle_cb_size_div_256b_f(size) | gr_scc_bundle_cb_size_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(), gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(), gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); @@ -1535,7 +1545,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", g->gr.bundle_cb_token_limit, data); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(), gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } @@ -1706,14 +1716,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch) { int ret; - struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - if (!gr_ctx) + tsg = tsg_gk20a_from_ch(fault_ch); + if (!tsg) return -EINVAL; + gr_ctx = &tsg->gr_ctx; + if (gr_ctx->cilp_preempt_pending) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP is already pending for chid %d", @@ -1783,13 +1796,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch) { - struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); - if (!gr_ctx) + tsg = tsg_gk20a_from_ch(fault_ch); + if (!tsg) return -EINVAL; + gr_ctx = &tsg->gr_ctx; + /* The ucode is self-clearing, so all we need to do here is to clear cilp_preempt_pending. */ if (!gr_ctx->cilp_preempt_pending) { @@ -1820,13 +1837,19 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g, u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + struct tsg_gk20a *tsg; *early_exit = false; *ignore_debugger = false; - if (fault_ch) - cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == + if (fault_ch) { + tsg = tsg_gk20a_from_ch(fault_ch); + if (!tsg) + return -EINVAL; + + cilp_enabled = (tsg->gr_ctx.compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP); + } gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", gpc, tpc, global_esr); @@ -1911,8 +1934,9 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g, static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) { - struct gr_ctx_desc *gr_ctx; + struct nvgpu_gr_ctx *gr_ctx; struct channel_gk20a *ch; + struct tsg_gk20a *tsg; int chid; int ret = -EINVAL; @@ -1922,7 +1946,11 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) if (!ch) return ret; - gr_ctx = ch->ch_ctx.gr_ctx; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; if (gr_ctx->cilp_preempt_pending) { *__chid = chid; @@ -2022,11 +2050,17 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch, bool *cilp_preempt_pending) { struct gk20a *g = ch->g; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; bool ctx_resident = false; int err = 0; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; + *cilp_preempt_pending = false; if (gk20a_is_channel_ctx_resident(ch)) { @@ -2097,15 +2131,22 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, nvgpu_mutex_release(&g->dbg_sessions_lock); if (cilp_preempt_pending_ch) { - struct channel_ctx_gk20a *ch_ctx = - &cilp_preempt_pending_ch->ch_ctx; - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; struct nvgpu_timeout timeout; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP preempt pending, waiting %lu msecs for preemption", gk20a_get_gr_idle_timeout(g)); + tsg = tsg_gk20a_from_ch(cilp_preempt_pending_ch); + if (!tsg) { + err = -EINVAL; + goto clean_up; + } + + gr_ctx = &tsg->gr_ctx; + nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), NVGPU_TIMER_CPU_TIMER); do { @@ -2130,12 +2171,19 @@ clean_up: int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, bool boost) { - struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; struct gk20a *g = ch->g; - struct nvgpu_mem *mem = &gr_ctx->mem; + struct nvgpu_mem *mem; int err = 0; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; gr_ctx->boosted_ctx = boost; + mem = &gr_ctx->mem; if (nvgpu_mem_begin(g, mem)) return -ENOMEM; @@ -2162,7 +2210,7 @@ unmap_ctx: } void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, - struct gr_ctx_desc *gr_ctx) { + struct nvgpu_gr_ctx *gr_ctx) { u32 v; v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( @@ -2174,13 +2222,12 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode) { - struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct nvgpu_gr_ctx *gr_ctx; struct gk20a *g = ch->g; struct tsg_gk20a *tsg; struct vm_gk20a *vm; - struct nvgpu_mem *mem = &gr_ctx->mem; - struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; + struct nvgpu_mem *mem; + struct ctx_header_desc *ctx = &ch->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; u32 class; int err = 0; @@ -2189,12 +2236,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, if (!class) return -EINVAL; - if (gk20a_is_channel_marked_as_tsg(ch)) { - tsg = &g->fifo.tsg[ch->tsgid]; - vm = tsg->vm; - } else { - vm = ch->vm; - } + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + vm = tsg->vm; + gr_ctx = &tsg->gr_ctx; + mem = &gr_ctx->mem; /* skip setting anything if both modes are already set */ if (graphics_preempt_mode && @@ -2241,15 +2289,15 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, if (g->ops.gr.update_ctxsw_preemption_mode) { g->ops.gr.update_ctxsw_preemption_mode(ch->g, - ch_ctx, mem); + ch, mem); - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); + err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); if (err) { nvgpu_err(g, "can't map patch context"); goto enable_ch; } g->ops.gr.commit_global_cb_manager(g, ch, true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); + gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); } enable_ch: diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index e3ef6304..8d553d37 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -29,9 +29,8 @@ struct gk20a; struct gr_gk20a_isr_data; -struct channel_ctx_gk20a; +struct nvgpu_gr_ctx; struct zbc_entry; -struct gr_ctx_desc; struct nvgpu_preemption_modes_rec; struct gk20a_debug_output; @@ -75,7 +74,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, int gr_gp10b_commit_global_cb_manager(struct gk20a *g, struct channel_gk20a *c, bool patch); void gr_gp10b_commit_global_pagepool(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u32 size, bool patch); u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g); u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g); @@ -93,28 +92,28 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); int gr_gp10b_init_ctx_state(struct gk20a *g); int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); int gr_gp10b_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 flags); void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem); int gr_gp10b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o); void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, - struct gr_ctx_desc *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, u32 expect_delay); void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size, bool patch); int gr_gp10b_load_smid_config(struct gk20a *g); void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); @@ -133,7 +132,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, bool boost); void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, - struct gr_ctx_desc *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index aaee595d..7041c5bd 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -236,7 +236,6 @@ static const struct gpu_ops gp10b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, - .free_channel_ctx = gk20a_free_channel_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index b29a73d4..95d1f076 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -305,7 +305,6 @@ static const struct gpu_ops gv100_ops = { .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, - .free_channel_ctx = gk20a_free_channel_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d5924169..3030def8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1373,7 +1373,7 @@ fail_free: } int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode) @@ -1497,13 +1497,13 @@ fail: } void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *c, struct nvgpu_mem *mem) { - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; - struct ctx_header_desc *ctx = &ch_ctx->ctx_header; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; + struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; - u32 gfxp_preempt_option = ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); u32 cilp_preempt_option = @@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, gk20a_dbg_fn(""); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return; + + gr_ctx = &tsg->gr_ctx; + if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); @@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, gr_ctx->preempt_ctxsw_buffer.gpu_va); } - err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); + err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); if (err) { nvgpu_err(g, "can't map patch context"); goto out; @@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); gk20a_dbg_info("attrib cb addr : 0x%016x", addr); - g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); + g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true); addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> gr_scc_pagepool_base_addr_39_8_align_bits_v()) | @@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, if (size == g->ops.gr.pagepool_default_size(g)) size = gr_scc_pagepool_total_pages_hwmax_v(); - g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); + g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true); addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | @@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, size = gr_ctx->spill_ctxsw_buffer.size / gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), true); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), true); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( cbes_reserve), true); - gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); + gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); } out: @@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, } void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch) { - struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; int attrBufferSize; if (gr_ctx->preempt_ctxsw_buffer.gpu_va) @@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); - gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); + gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), + gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); } @@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + gv11b_gr_sm_offset(g, sm); + struct tsg_gk20a *tsg; *early_exit = false; *ignore_debugger = false; @@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, warp_esr, fault_ch); - if (fault_ch) - cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == + if (fault_ch) { + tsg = tsg_gk20a_from_ch(fault_ch); + if (!tsg) + return -EINVAL; + + cilp_enabled = (tsg->gr_ctx.compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP); + } gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", @@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) if (err) return err; - ctx = &c->ch_ctx.ctx_header; + ctx = &c->ctx_header; addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); addr_hi = u64_hi32(ctx->mem.gpu_va); @@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) { - struct channel_ctx_gk20a *ch_ctx = NULL; + struct nvgpu_gr_ctx *ch_ctx = NULL; u32 pd_ab_dist_cfg0; u32 ds_debug; u32 mpc_vtg_debug; @@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id, struct nvgpu_gr_sm_error_state *sm_error_state) { + struct tsg_gk20a *tsg; u32 gpc, tpc, sm, offset; struct gr_gk20a *gr = &g->gr; - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct nvgpu_gr_ctx *ch_ctx; int err = 0; + tsg = tsg_gk20a_from_ch(ch); + if (!tsg) + return -EINVAL; + + ch_ctx = &tsg->gr_ctx; + nvgpu_mutex_acquire(&g->dbg_sessions_lock); gr->sm_error_states[sm_id].hww_global_esr = diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index b69e69bd..022a7698 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -41,9 +41,10 @@ struct zbc_s_table { }; struct gk20a; +struct gr_gk20a; struct zbc_entry; struct zbc_query_params; -struct channel_ctx_gk20a; +struct nvgpu_gr_ctx; struct nvgpu_warpstate; struct nvgpu_gr_sm_error_state; struct gr_ctx_desc; @@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g, int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, u32 expect_delay); void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gv11b_get_access_map(struct gk20a *g, @@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, - struct gr_ctx_desc *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, - struct channel_ctx_gk20a *ch_ctx, + struct channel_gk20a *ch_ctx, struct nvgpu_mem *mem); #endif diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index aa3d52af..0a552f5b 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, - .free_channel_ctx = gk20a_free_channel_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .get_zcull_info = gr_gk20a_get_zcull_info, diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index fe1aa8a5..607fff91 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, void gv11b_free_subctx_header(struct channel_gk20a *c) { - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct gk20a *g = c->g; nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); @@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c) int gv11b_alloc_subctx_header(struct channel_gk20a *c) { - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct gk20a *g = c->g; int ret = 0; nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); - if (ctx->mem.gpu_va == 0) { + if (!nvgpu_mem_is_valid(&ctx->mem)) { ret = nvgpu_dma_alloc_flags_sys(g, 0, /* No Special flags */ ctxsw_prog_fecs_header_v(), @@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) { - struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *gr_mem; struct gk20a *g = c->g; int ret = 0; u32 addr_lo, addr_hi; + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; - addr_lo = u64_lo32(gpu_va); - addr_hi = u64_hi32(gpu_va); + tsg = tsg_gk20a_from_ch(c); + if (!tsg) + return -EINVAL; + + gr_ctx = &tsg->gr_ctx; gr_mem = &ctx->mem; g->ops.mm.l2_flush(g, true); if (nvgpu_mem_begin(g, gr_mem)) return -ENOMEM; + /* set priv access map */ + addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); + nvgpu_mem_wr(g, gr_mem, + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), + addr_lo); + nvgpu_mem_wr(g, gr_mem, + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), + addr_hi); + + addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); + addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); + nvgpu_mem_wr(g, gr_mem, + ctxsw_prog_main_image_patch_adr_lo_o(), + addr_lo); + nvgpu_mem_wr(g, gr_mem, + ctxsw_prog_main_image_patch_adr_hi_o(), + addr_hi); + + g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va); + g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va); + + addr_lo = u64_lo32(gpu_va); + addr_hi = u64_hi32(gpu_va); + nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); nvgpu_mem_wr(g, gr_mem, -- cgit v1.2.2