From 6eeabfbdd08e48f924885952c80ff41aa2b534b7 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 23 Mar 2016 09:43:43 -0700 Subject: gpu: nvgpu: vgpu: virtualized SMPC/HWPM ctx switch Add support for SMPC and HWPM context switching when virtualized Bug 1648200 JIRASW EVLR-219 JIRASW EVLR-253 Change-Id: I80a1613eaad87d8510f00d9aef001400d642ecdf Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1122034 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 8 ++-- drivers/gpu/nvgpu/gk20a/gk20a.h | 6 +++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 2 + drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 + drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 + drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 83 +++++++++++++++++++++++++++++++++ include/linux/tegra_vgpu.h | 25 +++++++++- 7 files changed, 122 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 321cebb2..309fe75a 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -891,8 +891,8 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, goto clean_up; } - err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, - args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); + err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, + args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); if (err) { gk20a_err(dev_from_gk20a(g), "error (%d) during smpc ctxsw mode update\n", err); @@ -927,8 +927,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, goto clean_up; } - err = gr_gk20a_update_hwpm_ctxsw_mode(g, ch_gk20a, - args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); + err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, + args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); if (err) gk20a_err(dev_from_gk20a(g), "error (%d) during pm ctxsw mode update\n", err); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9d8dc5f7..d9cc3d4f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -172,6 +172,12 @@ struct gpu_ops { void (*update_ctxsw_preemption_mode)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, void *ctx_ptr); + int (*update_smpc_ctxsw_mode)(struct gk20a *g, + struct channel_gk20a *c, + bool enable); + int (*update_hwpm_ctxsw_mode)(struct gk20a *g, + struct channel_gk20a *c, + bool enable); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b0b5571f..ada67edd 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -8308,4 +8308,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception; gops->gr.get_lrf_tex_ltc_dram_override = NULL; + gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; + gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 52d6c4e5..5390536e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -76,6 +76,7 @@ struct zcull_ctx_desc { struct pm_ctx_desc { struct mem_desc mem; u32 pm_mode; + bool ctx_was_enabled; /* Used in the virtual case only */ }; struct gk20a; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 67654cba..6a79b1ac 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1233,4 +1233,6 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception; gops->gr.get_lrf_tex_ltc_dram_override = NULL; + gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; + gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; } diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 32e451ed..16d51ad3 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -402,12 +402,36 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) } } +static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) +{ + struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + int err; + + gk20a_dbg_fn(""); + + /* check if hwpm was ever initialized. If not, nothing to do */ + if (ch_ctx->pm_ctx.ctx_was_enabled == false) + return; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; + msg.handle = platform->virt_handle; + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + ch_ctx->pm_ctx.ctx_was_enabled = false; +} + static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c) { gk20a_dbg_fn(""); vgpu_gr_unmap_global_ctx_buffers(c); vgpu_gr_free_channel_patch_ctx(c); + vgpu_gr_free_channel_pm_ctx(c); if (!gk20a_is_channel_marked_as_tsg(c)) vgpu_gr_free_channel_gr_ctx(c); @@ -950,6 +974,63 @@ static int vgpu_gr_set_sm_debug_mode(struct gk20a *g, return err ? err : msg.ret; } +static int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, + struct channel_gk20a *ch, bool enable) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + + if (enable) + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + else + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + return err ? err : msg.ret; +} + +static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, + struct channel_gk20a *ch, bool enable) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; + msg.handle = platform->virt_handle; + p->handle = ch->virt_ctx; + + /* If we just enabled HWPM context switching, flag this + * so we know we need to free the buffer when channel contexts + * are cleaned up. + */ + if (enable) { + struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + ch_ctx->pm_ctx.ctx_was_enabled = true; + + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + } else + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + return err ? err : msg.ret; +} + void vgpu_init_gr_ops(struct gpu_ops *gops) { gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx; @@ -969,4 +1050,6 @@ void vgpu_init_gr_ops(struct gpu_ops *gops) gops->gr.zbc_query_table = vgpu_gr_query_zbc; gops->gr.init_ctx_state = vgpu_gr_init_ctx_state; gops->gr.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode; + gops->gr.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode; + gops->gr.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode; } diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index c4dd81dd..979d454e 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h @@ -76,7 +76,14 @@ enum { TEGRA_VGPU_CMD_REG_OPS, TEGRA_VGPU_CMD_CHANNEL_SET_PRIORITY, TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE, - TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE + TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE, + RESVD1, + RESVD2, + RESVD3, + RESVD4, + TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE, + TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE, + TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX, }; struct tegra_vgpu_connect_params { @@ -312,6 +319,20 @@ struct tegra_vgpu_channel_timeslice_params { u32 timeslice_us; }; +enum { + TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0, + TEGRA_VGPU_CTXSW_MODE_CTXSW, +}; + +struct tegra_vgpu_channel_set_ctxsw_mode { + u64 handle; + u32 mode; +}; + +struct tegra_vgpu_channel_free_hwpm_ctx { + u64 handle; +}; + struct tegra_vgpu_cmd_msg { u32 cmd; int ret; @@ -342,6 +363,8 @@ struct tegra_vgpu_cmd_msg { struct tegra_vgpu_channel_priority_params channel_priority; struct tegra_vgpu_channel_runlist_interleave_params channel_interleave; struct tegra_vgpu_channel_timeslice_params channel_timeslice; + struct tegra_vgpu_channel_set_ctxsw_mode set_ctxsw_mode; + struct tegra_vgpu_channel_free_hwpm_ctx free_hwpm_ctx; char padding[192]; } params; }; -- cgit v1.2.2