12 files changed, 116 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
index d9d07844..50002557 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -135,4 +135,13 @@ int gk20a_perfbuf_disable_locked(struct gk20a *g);
 void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s);
 u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
                                bool mode);
+ /* PM Context Switch Mode */
+/*This mode says that the pms are not to be context switched. */
+#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW               (0x00000000)
+/* This mode says that the pms in Mode-B are to be context switched */
+#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW                  (0x00000001)
+/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
+#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW       (0x00000002)
 #endif /* DBG_GPU_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 23ed2f15..ce0a6563 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -307,10 +307,11 @@ struct gpu_ops {
                int (*update_smpc_ctxsw_mode)(struct gk20a *g,
                                struct channel_gk20a *c,
                                bool enable);
+                u32 (*get_hw_accessor_stream_out_mode)(void);
                int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
                                struct channel_gk20a *c,
                                u64 gpu_va,
-                                bool enable);
+                                u32 mode);
                int (*dump_gr_regs)(struct gk20a *g,
                                struct gk20a_debug_output *o);
                int (*update_pc_sampling)(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7a65f353..3f49fbf7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1684,14 +1684,14 @@ out:
 int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
                                  struct channel_gk20a *c,
                                  u64 gpu_va,
-                                  bool enable_hwpm_ctxsw)
+                                  u32 mode)
 {
        struct tsg_gk20a *tsg;
        struct nvgpu_mem *gr_mem = NULL;
        struct nvgpu_gr_ctx *gr_ctx;
        struct pm_ctx_desc *pm_ctx;
        u32 data;
-        u64 virt_addr;
+        u64 virt_addr = 0;
        struct ctx_header_desc *ctx = &c->ctx_header;
        struct nvgpu_mem *ctxheader = &ctx->mem;
        int ret;
@@ -1710,12 +1710,31 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
                return -EFAULT;
        }
-        if (enable_hwpm_ctxsw) {
+        if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
-                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
+                        (!g->ops.gr.get_hw_accessor_stream_out_mode)) {
+                nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
+                return -EINVAL;
+        }
+        switch (mode) {
+        case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
                        return 0;
-        } else {
+                }
-                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
+                break;
+        case  NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
+                        return 0;
+                }
+                break;
+        case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
+                if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
                        return 0;
+                }
+                break;
+        default:
+                nvgpu_err(g, "invalid hwpm context switch mode");
+                return -EINVAL;
        }
        ret = gk20a_disable_channel_tsg(g, c);
@@ -1735,7 +1754,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
           Flush and invalidate before cpu update. */
        g->ops.mm.l2_flush(g, true);
-        if (enable_hwpm_ctxsw) {
+        if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
                /* Allocate buffer if necessary */
                if (pm_ctx->mem.gpu_va == 0) {
                        ret = nvgpu_dma_alloc_sys(g,
@@ -1768,11 +1787,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
        data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
        data = data & ~ctxsw_prog_main_image_pm_mode_m();
-        if (enable_hwpm_ctxsw) {
+        switch (mode) {
+        case  NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
                pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
                virt_addr = pm_ctx->mem.gpu_va;
-        } else {
+                break;
+        case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
+                pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
+                virt_addr = pm_ctx->mem.gpu_va;
+                break;
+        case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
                pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
                virt_addr = 0;
        }
@@ -1892,7 +1916,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
         * for PM context switching, including mode and possibly a pointer to
         * the PM backing store.
         */
-        if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
+        if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
                if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
                        nvgpu_err(g,
                                "context switched pm with no pm buffer!");
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index a77136a6..92e1dff5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -655,7 +655,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
                                  struct channel_gk20a *c,
                                  u64 gpu_va,
-                                  bool enable_hwpm_ctxsw);
+                                  u32 mode);
 struct nvgpu_gr_ctx;
 void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 680bcba3..b1b1cee6 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -38,6 +38,7 @@
 #include <nvgpu/hw/gv100/hw_proj_gv100.h>
 #include <nvgpu/hw/gv100/hw_fuse_gv100.h>
 #include <nvgpu/hw/gv100/hw_top_gv100.h>
+#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
 /*
@@ -453,3 +454,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
                }
        }
 }
+u32 gr_gv100_get_hw_accessor_stream_out_mode()
+{
+        return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
+}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 821659aa..457bd701 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -47,4 +47,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
        u32 num_fbpas,
        u32 *priv_addr_table, u32 *t);
 void gr_gv100_init_gpc_mmu(struct gk20a *g);
+u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
 #endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 07c71a04..c84778dd 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -358,6 +358,8 @@ static const struct gpu_ops gv100_ops = {
                .enable_exceptions = gr_gv11b_enable_exceptions,
                .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
                .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
+                .get_hw_accessor_stream_out_mode =
+                        gr_gv100_get_hw_accessor_stream_out_mode,
                .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
                .record_sm_error_state = gv11b_gr_record_sm_error_state,
                .update_sm_error_state = gv11b_gr_update_sm_error_state,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
index cd792835..b7f3df24 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -144,6 +144,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
 {
        return 0x0U;
 }
+static inline u32 ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(void)
+{
+        return 0x2U;
+}
 static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
 {
        return 0x7U << 3U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
index 6d05e44c..1eb26303 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -356,6 +356,7 @@ struct tegra_vgpu_fecs_trace_filter {
 enum {
        TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0,
        TEGRA_VGPU_CTXSW_MODE_CTXSW,
+        TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW,
 };
 enum {
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 9c7608e9..d530492a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1039,12 +1039,33 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
        return  err;
 }
+/*
+ * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_*
+ * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_*
+ */
+static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
+{
+        switch (mode){
+        case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
+                return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
+        case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW:
+                return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW;
+        case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
+                return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW;
+        }
+        return mode;
+}
 static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
                               struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
 {
        int err;
        struct gk20a *g = dbg_s->g;
        struct channel_gk20a *ch_gk20a;
+        u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
        nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
@@ -1080,7 +1101,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
                goto clean_up;
        }
        err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
-                args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
+                mode);
        if (err)
                nvgpu_err(g,
                        "error (%d) during pm ctxsw mode update", err);
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 83d27f17..0077c537 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -1066,7 +1066,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 }
 int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
-        struct channel_gk20a *ch, u64 gpu_va, bool enable)
+        struct channel_gk20a *ch, u64 gpu_va, u32 mode)
 {
        struct tsg_gk20a *tsg;
        struct nvgpu_gr_ctx *ch_ctx;
@@ -1089,16 +1089,33 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
        ch_ctx = &tsg->gr_ctx;
        pm_ctx = &ch_ctx->pm_ctx;
-        if (enable) {
+        if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
                /*
                 * send command to enable HWPM only once - otherwise server
                 * will return an error due to using the same GPU VA twice.
                 */
-                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
-                        return 0;
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
+                        return 0;
+                }
                p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+        } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
+                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
+                        return 0;
+                }
+                p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+        } else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
+                        (g->ops.gr.get_hw_accessor_stream_out_mode)){
+                if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
+                        return 0;
+                }
+                p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW;
+        } else {
+                nvgpu_err(g, "invalid hwpm context switch mode");
+                return -EINVAL;
+        }
+        if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
                /* Allocate buffer if necessary */
                if (pm_ctx->mem.gpu_va == 0) {
                        pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm,
@@ -1109,11 +1126,6 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
                                return -ENOMEM;
                        pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
                }
-        } else {
-                if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
-                        return 0;
-                p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
        }
        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
@@ -1124,10 +1136,15 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
        WARN_ON(err || msg.ret);
        err = err ? err : msg.ret;
-        if (!err)
+        if (!err) {
-                pm_ctx->pm_mode = enable ?
+                if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
-                        ctxsw_prog_main_image_pm_mode_ctxsw_f() :
+                        pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
-                        ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
+                } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
+                        pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
+                } else {
+                        pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
+                }
+        }
        return err;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
index c4b3944e..149bd17a 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
@@ -58,7 +58,7 @@ int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
 int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
        struct channel_gk20a *ch, bool enable);
 int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
-        struct channel_gk20a *ch, u64 gpu_va, bool enable);
+        struct channel_gk20a *ch, u64 gpu_va, u32 mode);
 int vgpu_gr_clear_sm_error_state(struct gk20a *g,
                struct channel_gk20a *ch, u32 sm_id);
 int vgpu_gr_suspend_contexts(struct gk20a *g,