gpu: nvgpu: Make graphics context property of TSG

Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-12-15 12:04:15 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-01-17 15:29:09 -0500
commit: 2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
tree: d0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/common/linux
parent: 6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)
11 files changed, 195 insertions, 217 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
index ad157ee7..aeab0c92 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show(
                                tsg->timeslice_us,
                                ch->timeout_ms_max,
                                tsg->interleave_level,
-                                ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
+                                tsg->gr_ctx.graphics_preempt_mode,
-                                ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
+                                tsg->gr_ctx.compute_preempt_mode);
                gk20a_channel_put(ch);
        }
        return 0;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 94501a89..e8f4c14b 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param(
                tsg_gk20a_from_ch(ch)->timeslice_us,
                ch->timeout_ms_max,
                gk20a_fifo_interleave_level_name(tsg->interleave_level),
-                gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ?
+                gr_gk20a_graphics_preempt_mode_name(
-                        ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0),
+                        tsg->gr_ctx.graphics_preempt_mode),
-                gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ?
+                gr_gk20a_compute_preempt_mode_name(
-                        ch->ch_ctx.gr_ctx->compute_preempt_mode : 0));
+                        tsg->gr_ctx.compute_preempt_mode));
 }
 /*
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c
index fc3f6ed8..e6211790 100644
--- a/drivers/gpu/nvgpu/common/linux/sched.c
+++ b/drivers/gpu/nvgpu/common/linux/sched.c
@@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
        arg->runlist_interleave = tsg->interleave_level;
        arg->timeslice = tsg->timeslice_us;
-        if (tsg->tsg_gr_ctx) {
+        arg->graphics_preempt_mode =
-                arg->graphics_preempt_mode =
+                tsg->gr_ctx.graphics_preempt_mode;
-                        tsg->tsg_gr_ctx->graphics_preempt_mode;
+        arg->compute_preempt_mode =
-                arg->compute_preempt_mode =
+                tsg->gr_ctx.compute_preempt_mode;
-                        tsg->tsg_gr_ctx->compute_preempt_mode;
-        } else {
-                arg->graphics_preempt_mode = 0;
-                arg->compute_preempt_mode = 0;
-        }
        nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
index ed61f16b..9adf20d1 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -27,12 +27,11 @@
 #include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
 int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
-                                struct gr_ctx_desc **__gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm,
                                u32 class,
                                u32 flags)
 {
-        struct gr_ctx_desc *gr_ctx;
        u32 graphics_preempt_mode = 0;
        u32 compute_preempt_mode = 0;
        struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
        gk20a_dbg_fn("");
-        err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags);
+        err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
        if (err)
                return err;
-        gr_ctx = *__gr_ctx;
        if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
                graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
        if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP)
@@ -84,7 +81,7 @@ fail:
 }
 int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
-                                struct gr_ctx_desc *gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm, u32 class,
                                u32 graphics_preempt_mode,
                                u32 compute_preempt_mode)
@@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
                                        u32 graphics_preempt_mode,
                                        u32 compute_preempt_mode)
 {
-        struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
+        struct nvgpu_gr_ctx *gr_ctx;
        struct gk20a *g = ch->g;
        struct tsg_gk20a *tsg;
        struct vm_gk20a *vm;
@@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
        if (!class)
                return -EINVAL;
+        tsg = tsg_gk20a_from_ch(ch);
+        if (!tsg)
+                return -EINVAL;
+        vm = tsg->vm;
+        gr_ctx = &tsg->gr_ctx;
        /* skip setting anything if both modes are already set */
        if (graphics_preempt_mode &&
           (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
@@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
        if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
                return 0;
-        if (gk20a_is_channel_marked_as_tsg(ch)) {
-                tsg = &g->fifo.tsg[ch->tsgid];
-                vm = tsg->vm;
-        } else {
-                vm = ch->vm;
-        }
        if (g->ops.gr.set_ctxsw_preemption_mode) {
                err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
                                                graphics_preempt_mode,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
index 31b88d19..559bd227 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
@@ -20,12 +20,12 @@
 #include "gk20a/gk20a.h"
 int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
-                                struct gr_ctx_desc **__gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm,
                                u32 class,
                                u32 flags);
 int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
-                                struct gr_ctx_desc *gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm, u32 class,
                                u32 graphics_preempt_mode,
                                u32 compute_preempt_mode);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
index e8cb96b4..d5fd5102 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
                .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
                .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
-                .free_channel_ctx = vgpu_gr_free_channel_ctx,
                .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
                .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
                .get_zcull_info = vgpu_gr_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
index e8790587..8f1c5d78 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,14 +20,18 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/error_notifier.h>
 #include <nvgpu/dma.h>
 #include "vgpu.h"
 #include "gr_vgpu.h"
 #include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
 void vgpu_gr_detect_sm_arch(struct gk20a *g)
 {
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        struct tegra_vgpu_cmd_msg msg;
        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
        struct vm_gk20a *ch_vm = c->vm;
-        u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+        struct tsg_gk20a *tsg;
-        u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+        u64 *g_bfr_va;
+        u64 *g_bfr_size;
        struct gr_gk20a *gr = &g->gr;
        u64 gpu_va;
        u32 i;
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        gk20a_dbg_fn("");
-        /* FIXME: add VPR support */
+        tsg = tsg_gk20a_from_ch(c);
+        if (!tsg)
+                return -EINVAL;
+        g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
+        g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
        /* Circular Buffer */
        gpu_va = __nvgpu_vm_alloc_va(ch_vm,
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        if (err || msg.ret)
                goto clean_up;
-        c->ch_ctx.global_ctx_buffer_mapped = true;
+        tsg->gr_ctx.global_ctx_buffer_mapped = true;
        return 0;
 clean_up:
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        return -ENOMEM;
 }
-static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
+static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg)
 {
-        struct vm_gk20a *ch_vm = c->vm;
+        struct vm_gk20a *ch_vm = tsg->vm;
-        u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+        u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
-        u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+        u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
        u32 i;
        gk20a_dbg_fn("");
-        if (c->ch_ctx.global_ctx_buffer_mapped) {
+        if (tsg->gr_ctx.global_ctx_buffer_mapped) {
-                struct tegra_vgpu_cmd_msg msg;
+                /* server will unmap on channel close */
-                struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
-                int err;
-                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
+                for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
-                msg.handle = vgpu_get_handle(c->g);
+                        if (g_bfr_va[i]) {
-                p->handle = c->virt_ctx;
+                                __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
-                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                                                   gmmu_page_size_kernel);
-                WARN_ON(err || msg.ret);
+                                g_bfr_va[i] = 0;
-        }
+                                g_bfr_size[i] = 0;
+                        }
-        for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
-                if (g_bfr_va[i]) {
-                        __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
-                                           gmmu_page_size_kernel);
-                        g_bfr_va[i] = 0;
-                        g_bfr_size[i] = 0;
                }
+                tsg->gr_ctx.global_ctx_buffer_mapped = false;
        }
-        c->ch_ctx.global_ctx_buffer_mapped = false;
 }
 int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
-                        struct gr_ctx_desc **__gr_ctx,
+                        struct nvgpu_gr_ctx *gr_ctx,
                        struct vm_gk20a *vm,
                        u32 class,
                        u32 flags)
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
        struct tegra_vgpu_cmd_msg msg = {0};
        struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
        struct gr_gk20a *gr = &g->gr;
-        struct gr_ctx_desc *gr_ctx;
        int err;
        gk20a_dbg_fn("");
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
        gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
        gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
-        gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
-        if (!gr_ctx)
-                return -ENOMEM;
-        gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
        gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
-                                                gr_ctx->mem.size,
+                                                gr->ctx_vars.buffer_total_size,
                                                gmmu_page_size_kernel);
-        if (!gr_ctx->mem.gpu_va) {
+        if (!gr_ctx->mem.gpu_va)
-                nvgpu_kfree(g, gr_ctx);
                return -ENOMEM;
-        }
+        gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
+        gr_ctx->mem.aperture = APERTURE_SYSMEM;
        msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC;
        msg.handle = vgpu_get_handle(g);
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
                nvgpu_err(g, "fail to alloc gr_ctx");
                __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
                                   gmmu_page_size_kernel);
-                nvgpu_kfree(g, gr_ctx);
+                gr_ctx->mem.aperture = APERTURE_INVALID;
        } else {
                gr_ctx->virt_ctx = p->gr_ctx_handle;
-                *__gr_ctx = gr_ctx;
        }
        return err;
 }
-void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
-                        struct gr_ctx_desc *gr_ctx)
-{
-        struct tegra_vgpu_cmd_msg msg;
-        struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
-        int err;
-        gk20a_dbg_fn("");
-        if (!gr_ctx || !gr_ctx->mem.gpu_va)
-                return;
-        msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
-        msg.handle = vgpu_get_handle(g);
-        p->gr_ctx_handle = gr_ctx->virt_ctx;
-        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-        WARN_ON(err || msg.ret);
-        __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
-                           gmmu_page_size_kernel);
-        nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
-        nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
-        nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
-        nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
-        nvgpu_kfree(g, gr_ctx);
-}
-static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
-{
-        gk20a_dbg_fn("");
-        c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
-        c->ch_ctx.gr_ctx = NULL;
-}
 static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
                                        struct channel_gk20a *c)
 {
-        struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+        struct tsg_gk20a *tsg;
+        struct patch_desc *patch_ctx;
        struct vm_gk20a *ch_vm = c->vm;
        struct tegra_vgpu_cmd_msg msg;
        struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
        gk20a_dbg_fn("");
+        tsg = tsg_gk20a_from_ch(c);
+        if (!tsg)
+                return -EINVAL;
+        patch_ctx = &tsg->gr_ctx.patch_ctx;
        patch_ctx->mem.size = 128 * sizeof(u32);
        patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
                                                patch_ctx->mem.size,
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
        return err;
 }
-static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
+static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg)
 {
-        struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+        struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx;
-        struct vm_gk20a *ch_vm = c->vm;
        gk20a_dbg_fn("");
        if (patch_ctx->mem.gpu_va) {
-                struct tegra_vgpu_cmd_msg msg;
+                /* server will free on channel close */
-                struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
-                int err;
-                msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
+                __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va,
-                msg.handle = vgpu_get_handle(c->g);
-                p->handle = c->virt_ctx;
-                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-                WARN_ON(err || msg.ret);
-                __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
                                   gmmu_page_size_kernel);
                patch_ctx->mem.gpu_va = 0;
        }
 }
-static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
+static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg)
 {
-        struct tegra_vgpu_cmd_msg msg;
+        struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx;
-        struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
-        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
-        int err;
        gk20a_dbg_fn("");
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
        if (pm_ctx->mem.gpu_va == 0)
                return;
-        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX;
+        /* server will free on channel close */
-        msg.handle = vgpu_get_handle(c->g);
-        p->handle = c->virt_ctx;
-        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-        WARN_ON(err || msg.ret);
-        __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va,
+        __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va,
                           gmmu_page_size_kernel);
        pm_ctx->mem.gpu_va = 0;
 }
-void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg)
+void vgpu_gr_free_gr_ctx(struct gk20a *g,
+                         struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
 {
+        struct tsg_gk20a *tsg;
        gk20a_dbg_fn("");
-        if (c->g->ops.fifo.free_channel_ctx_header)
+        if (gr_ctx->mem.gpu_va) {
-                c->g->ops.fifo.free_channel_ctx_header(c);
+                struct tegra_vgpu_cmd_msg msg;
-        vgpu_gr_unmap_global_ctx_buffers(c);
+                struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
-        vgpu_gr_free_channel_patch_ctx(c);
+                int err;
-        vgpu_gr_free_channel_pm_ctx(c);
-        if (!is_tsg)
-                vgpu_gr_free_channel_gr_ctx(c);
-        /* zcull_ctx, pm_ctx */
+                msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
+                msg.handle = vgpu_get_handle(g);
+                p->gr_ctx_handle = gr_ctx->virt_ctx;
+                err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+                WARN_ON(err || msg.ret);
-        memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
+                __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
+                                   gmmu_page_size_kernel);
+                tsg = &g->fifo.tsg[gr_ctx->tsgid];
+                vgpu_gr_unmap_global_ctx_buffers(tsg);
+                vgpu_gr_free_channel_patch_ctx(tsg);
+                vgpu_gr_free_channel_pm_ctx(tsg);
+                nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
+                nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
+                nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
+                nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
-        c->first_init = false;
+                memset(gr_ctx, 0, sizeof(*gr_ctx));
+        }
 }
 static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
 {
-        struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx;
+        struct tsg_gk20a *tsg;
+        struct nvgpu_gr_ctx *gr_ctx;
        struct tegra_vgpu_cmd_msg msg = {0};
        struct tegra_vgpu_channel_bind_gr_ctx_params *p =
                                &msg.params.ch_bind_gr_ctx;
        int err;
+        tsg = tsg_gk20a_from_ch(c);
+        if (!tsg)
+                return -EINVAL;
+        gr_ctx = &tsg->gr_ctx;
        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX;
        msg.handle = vgpu_get_handle(c->g);
        p->ch_handle = c->virt_ctx;
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
 static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg)
 {
-        struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx;
+        struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
        struct tegra_vgpu_cmd_msg msg = {0};
        struct tegra_vgpu_tsg_bind_gr_ctx_params *p =
                                        &msg.params.tsg_bind_gr_ctx;
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 {
        struct gk20a *g = c->g;
        struct fifo_gk20a *f = &g->fifo;
-        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        struct nvgpu_gr_ctx *gr_ctx = NULL;
        struct tsg_gk20a *tsg = NULL;
        int err = 0;
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
        }
        c->obj_class = class_num;
-        if (gk20a_is_channel_marked_as_tsg(c))
+        if (!gk20a_is_channel_marked_as_tsg(c))
-                tsg = &f->tsg[c->tsgid];
+                return -EINVAL;
-        if (!tsg) {
+        tsg = &f->tsg[c->tsgid];
-                /* allocate gr ctx buffer */
+        gr_ctx = &tsg->gr_ctx;
-                if (!ch_ctx->gr_ctx) {
-                        err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx,
+        if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
-                                                c->vm,
+                tsg->vm = c->vm;
-                                                class_num,
+                nvgpu_vm_get(tsg->vm);
-                                                flags);
+                err = g->ops.gr.alloc_gr_ctx(g, gr_ctx,
-                        if (!err)
+                                        c->vm,
-                                err = vgpu_gr_ch_bind_gr_ctx(c);
+                                        class_num,
-                        if (err) {
+                                        flags);
-                                nvgpu_err(g, "fail to allocate gr ctx buffer");
+                if (!err)
-                                goto out;
+                        err = vgpu_gr_tsg_bind_gr_ctx(tsg);
-                        }
+                if (err) {
-                } else {
-                        /*TBD: needs to be more subtle about which is
-                         * being allocated as some are allowed to be
-                         * allocated along same channel */
                        nvgpu_err(g,
-                                "too many classes alloc'd on same channel");
+                                "fail to allocate TSG gr ctx buffer, err=%d", err);
-                        err = -EINVAL;
+                        nvgpu_vm_put(tsg->vm);
+                        tsg->vm = NULL;
                        goto out;
                }
-        } else {
-                if (!tsg->tsg_gr_ctx) {
-                        tsg->vm = c->vm;
-                        nvgpu_vm_get(tsg->vm);
-                        err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
-                                                c->vm,
-                                                class_num,
-                                                flags);
-                        if (!err)
-                                err = vgpu_gr_tsg_bind_gr_ctx(tsg);
-                        if (err) {
-                                nvgpu_err(g,
-                                        "fail to allocate TSG gr ctx buffer, err=%d", err);
-                                nvgpu_vm_put(tsg->vm);
-                                tsg->vm = NULL;
-                                goto out;
-                        }
-                }
-                ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
                err = vgpu_gr_ch_bind_gr_ctx(c);
                if (err) {
                        nvgpu_err(g, "fail to bind gr ctx buffer");
                        goto out;
                }
-        }
-        /* commit gr ctx buffer */
+                /* commit gr ctx buffer */
-        err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
+                err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
-        if (err) {
+                if (err) {
-                nvgpu_err(g, "fail to commit gr ctx buffer");
+                        nvgpu_err(g, "fail to commit gr ctx buffer");
-                goto out;
+                        goto out;
-        }
+                }
-        /* allocate patch buffer */
+                /* allocate patch buffer */
-        if (ch_ctx->patch_ctx.mem.priv.pages == NULL) {
                err = vgpu_gr_alloc_channel_patch_ctx(g, c);
                if (err) {
                        nvgpu_err(g, "fail to allocate patch buffer");
                        goto out;
                }
-        }
-        /* map global buffer to channel gpu_va and commit */
+                /* map global buffer to channel gpu_va and commit */
-        if (!ch_ctx->global_ctx_buffer_mapped) {
                err = vgpu_gr_map_global_ctx_buffers(g, c);
                if (err) {
                        nvgpu_err(g, "fail to map global ctx buffer");
                        goto out;
                }
-                vgpu_gr_commit_global_ctx_buffers(g, c, true);
-        }
-        /* load golden image */
+                err = vgpu_gr_commit_global_ctx_buffers(g, c, true);
-        if (!c->first_init) {
+                if (err) {
+                        nvgpu_err(g, "fail to commit global ctx buffers");
+                        goto out;
+                }
+                /* load golden image */
                err = gr_gk20a_elpg_protected_call(g,
                                vgpu_gr_load_golden_ctx_image(g, c));
                if (err) {
                        nvgpu_err(g, "fail to load golden ctx image");
                        goto out;
                }
-                c->first_init = true;
+        } else {
+                err = vgpu_gr_ch_bind_gr_ctx(c);
+                if (err) {
+                        nvgpu_err(g, "fail to bind gr ctx buffer");
+                        goto out;
+                }
+                /* commit gr ctx buffer */
+                err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
+                if (err) {
+                        nvgpu_err(g, "fail to commit gr ctx buffer");
+                        goto out;
+                }
        }
+        /* PM ctxt switch is off by default */
+        gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
        gk20a_dbg_fn("done");
        return 0;
 out:
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
        struct channel_gk20a *ch, bool enable)
 {
-        struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+        struct tsg_gk20a *tsg;
-        struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
+        struct nvgpu_gr_ctx *ch_ctx;
+        struct pm_ctx_desc *pm_ctx;
        struct tegra_vgpu_cmd_msg msg;
        struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
        int err;
        gk20a_dbg_fn("");
+        tsg = tsg_gk20a_from_ch(ch);
+        if (!tsg)
+                return -EINVAL;
+        ch_ctx = &tsg->gr_ctx;
+        pm_ctx = &ch_ctx->pm_ctx;
        if (enable) {
+                /*
+                 * send command to enable HWPM only once - otherwise server
+                 * will return an error due to using the same GPU VA twice.
+                 */
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
+                        return 0;
                p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
                /* Allocate buffer if necessary */
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
                                return -ENOMEM;
                        pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
                }
-        } else
+        } else {
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
+                        return 0;
                p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+        }
        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
        msg.handle = vgpu_get_handle(g);
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
        WARN_ON(err || msg.ret);
+        err = err ? err : msg.ret;
+        if (!err)
+                pm_ctx->pm_mode = enable ?
+                        ctxsw_prog_main_image_pm_mode_ctxsw_f() :
+                        ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
-        return err ? err : msg.ret;
+        return err;
 }
 int vgpu_gr_clear_sm_error_state(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
index 16aa92a9..4b81da91 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
@@ -29,6 +29,7 @@ struct dbg_session_gk20a;
 void vgpu_gr_detect_sm_arch(struct gk20a *g);
 void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
+void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg);
 int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags);
 int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
                                struct channel_gk20a *c, u64 zcull_va,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
index 968eae10..132ce6e5 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
                .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
                .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
-                .free_channel_ctx = vgpu_gr_free_channel_ctx,
                .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
                .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
                .get_zcull_info = vgpu_gr_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
index d59f0381..a0099f03 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
@@ -21,7 +21,7 @@
 int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
 {
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct tegra_vgpu_cmd_msg msg = {};
        struct tegra_vgpu_alloc_ctx_header_params *p =
                                &msg.params.alloc_ctx_header;
@@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
 void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c)
 {
-        struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
+        struct ctx_header_desc *ctx = &c->ctx_header;
        struct tegra_vgpu_cmd_msg msg = {};
        struct tegra_vgpu_free_ctx_header_params *p =
                                &msg.params.free_ctx_header;
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
index 8c306ea0..20624240 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
@@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
 int vgpu_gr_nonstall_isr(struct gk20a *g,
                        struct tegra_vgpu_gr_nonstall_intr_info *info);
 int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
-                        struct gr_ctx_desc **__gr_ctx,
+                        struct nvgpu_gr_ctx *gr_ctx,
                        struct vm_gk20a *vm,
                        u32 class,
                        u32 flags);
 void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
-                        struct gr_ctx_desc *gr_ctx);
+                        struct nvgpu_gr_ctx *gr_ctx);
 void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
                        struct tegra_vgpu_sm_esr_info *info);
 int vgpu_gr_init_ctx_state(struct gk20a *g);
@@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g,
        return 0;
 }
 static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
-                                struct gr_ctx_desc **__gr_ctx,
+                                struct nvgpu_gr_ctx *gr_ctx,
                                struct vm_gk20a *vm,
                                u32 class,
                                u32 flags)
@@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
        return -ENOSYS;
 }
 static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
-                                struct gr_ctx_desc *gr_ctx)
+                                struct nvgpu_gr_ctx *gr_ctx)
 {
 }
 static inline int vgpu_gr_init_ctx_state(struct gk20a *g)
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-12-15 12:04:15 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-01-17 15:29:09 -0500
commit	2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
tree	d0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/common/linux
parent	6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)