summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVaibhav Kachore <vkachore@nvidia.com>2018-07-06 05:40:03 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-10 21:13:43 -0400
commite14fdcd8f1f4125da697433b1744b1e4e4f15b09 (patch)
treef48ff794ef77e977ccba397f5abf14f5ae7b185b
parent4cd59404a2d4ab1c31605d96cff848dd4e93c3b4 (diff)
gpu: nvgpu: enable HWPM Mode-E context switch
- Write new pm mode to context buffer header. Ucode use this mode to enable mode-e context switch. This is Mode-B context switch of PMs with Mode-E streamout on one context. If this mode is set, Ucode makes sure that Mode-E pipe (perfmons, routers, pma) is idle before it context switches PMs. - This allows us to collect counters in a secure way (i.e. on context basis) with stream out. Bug 2106999 Change-Id: I5a7435f09d1bf053ca428e538b0a57f3a175ac37 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760366 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c6
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.h1
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h1
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_dbg.c24
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c43
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.h2
-rw-r--r--include/uapi/linux/nvgpu.h4
13 files changed, 120 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
index d9d07844..50002557 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -135,4 +135,13 @@ int gk20a_perfbuf_disable_locked(struct gk20a *g);
135void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s); 135void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s);
136u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, 136u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
137 bool mode); 137 bool mode);
138
139 /* PM Context Switch Mode */
140/*This mode says that the pms are not to be context switched. */
141#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
142/* This mode says that the pms in Mode-B are to be context switched */
143#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001)
144/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
145#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
146
138#endif /* DBG_GPU_GK20A_H */ 147#endif /* DBG_GPU_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 23ed2f15..ce0a6563 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -307,10 +307,11 @@ struct gpu_ops {
307 int (*update_smpc_ctxsw_mode)(struct gk20a *g, 307 int (*update_smpc_ctxsw_mode)(struct gk20a *g,
308 struct channel_gk20a *c, 308 struct channel_gk20a *c,
309 bool enable); 309 bool enable);
310 u32 (*get_hw_accessor_stream_out_mode)(void);
310 int (*update_hwpm_ctxsw_mode)(struct gk20a *g, 311 int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
311 struct channel_gk20a *c, 312 struct channel_gk20a *c,
312 u64 gpu_va, 313 u64 gpu_va,
313 bool enable); 314 u32 mode);
314 int (*dump_gr_regs)(struct gk20a *g, 315 int (*dump_gr_regs)(struct gk20a *g,
315 struct gk20a_debug_output *o); 316 struct gk20a_debug_output *o);
316 int (*update_pc_sampling)(struct channel_gk20a *ch, 317 int (*update_pc_sampling)(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7a65f353..3f49fbf7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1684,14 +1684,14 @@ out:
1684int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 1684int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1685 struct channel_gk20a *c, 1685 struct channel_gk20a *c,
1686 u64 gpu_va, 1686 u64 gpu_va,
1687 bool enable_hwpm_ctxsw) 1687 u32 mode)
1688{ 1688{
1689 struct tsg_gk20a *tsg; 1689 struct tsg_gk20a *tsg;
1690 struct nvgpu_mem *gr_mem = NULL; 1690 struct nvgpu_mem *gr_mem = NULL;
1691 struct nvgpu_gr_ctx *gr_ctx; 1691 struct nvgpu_gr_ctx *gr_ctx;
1692 struct pm_ctx_desc *pm_ctx; 1692 struct pm_ctx_desc *pm_ctx;
1693 u32 data; 1693 u32 data;
1694 u64 virt_addr; 1694 u64 virt_addr = 0;
1695 struct ctx_header_desc *ctx = &c->ctx_header; 1695 struct ctx_header_desc *ctx = &c->ctx_header;
1696 struct nvgpu_mem *ctxheader = &ctx->mem; 1696 struct nvgpu_mem *ctxheader = &ctx->mem;
1697 int ret; 1697 int ret;
@@ -1710,12 +1710,31 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1710 return -EFAULT; 1710 return -EFAULT;
1711 } 1711 }
1712 1712
1713 if (enable_hwpm_ctxsw) { 1713 if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1714 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1714 (!g->ops.gr.get_hw_accessor_stream_out_mode)) {
1715 nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
1716 return -EINVAL;
1717 }
1718
1719 switch (mode) {
1720 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1721 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1715 return 0; 1722 return 0;
1716 } else { 1723 }
1717 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) 1724 break;
1725 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1726 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
1727 return 0;
1728 }
1729 break;
1730 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1731 if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
1718 return 0; 1732 return 0;
1733 }
1734 break;
1735 default:
1736 nvgpu_err(g, "invalid hwpm context switch mode");
1737 return -EINVAL;
1719 } 1738 }
1720 1739
1721 ret = gk20a_disable_channel_tsg(g, c); 1740 ret = gk20a_disable_channel_tsg(g, c);
@@ -1735,7 +1754,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1735 Flush and invalidate before cpu update. */ 1754 Flush and invalidate before cpu update. */
1736 g->ops.mm.l2_flush(g, true); 1755 g->ops.mm.l2_flush(g, true);
1737 1756
1738 if (enable_hwpm_ctxsw) { 1757 if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1739 /* Allocate buffer if necessary */ 1758 /* Allocate buffer if necessary */
1740 if (pm_ctx->mem.gpu_va == 0) { 1759 if (pm_ctx->mem.gpu_va == 0) {
1741 ret = nvgpu_dma_alloc_sys(g, 1760 ret = nvgpu_dma_alloc_sys(g,
@@ -1768,11 +1787,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1768 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); 1787 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1769 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1788 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1770 1789
1771 if (enable_hwpm_ctxsw) { 1790 switch (mode) {
1791 case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
1772 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); 1792 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1773
1774 virt_addr = pm_ctx->mem.gpu_va; 1793 virt_addr = pm_ctx->mem.gpu_va;
1775 } else { 1794 break;
1795 case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1796 pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
1797 virt_addr = pm_ctx->mem.gpu_va;
1798 break;
1799 case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
1776 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); 1800 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1777 virt_addr = 0; 1801 virt_addr = 0;
1778 } 1802 }
@@ -1892,7 +1916,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1892 * for PM context switching, including mode and possibly a pointer to 1916 * for PM context switching, including mode and possibly a pointer to
1893 * the PM backing store. 1917 * the PM backing store.
1894 */ 1918 */
1895 if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { 1919 if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
1896 if (gr_ctx->pm_ctx.mem.gpu_va == 0) { 1920 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
1897 nvgpu_err(g, 1921 nvgpu_err(g,
1898 "context switched pm with no pm buffer!"); 1922 "context switched pm with no pm buffer!");
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index a77136a6..92e1dff5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -655,7 +655,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
655int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 655int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
656 struct channel_gk20a *c, 656 struct channel_gk20a *c,
657 u64 gpu_va, 657 u64 gpu_va,
658 bool enable_hwpm_ctxsw); 658 u32 mode);
659 659
660struct nvgpu_gr_ctx; 660struct nvgpu_gr_ctx;
661void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, 661void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 680bcba3..b1b1cee6 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -38,6 +38,7 @@
38#include <nvgpu/hw/gv100/hw_proj_gv100.h> 38#include <nvgpu/hw/gv100/hw_proj_gv100.h>
39#include <nvgpu/hw/gv100/hw_fuse_gv100.h> 39#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
40#include <nvgpu/hw/gv100/hw_top_gv100.h> 40#include <nvgpu/hw/gv100/hw_top_gv100.h>
41#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
41 42
42 43
43/* 44/*
@@ -453,3 +454,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
453 } 454 }
454 } 455 }
455} 456}
457
458u32 gr_gv100_get_hw_accessor_stream_out_mode()
459{
460 return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
461}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 821659aa..457bd701 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -47,4 +47,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
47 u32 num_fbpas, 47 u32 num_fbpas,
48 u32 *priv_addr_table, u32 *t); 48 u32 *priv_addr_table, u32 *t);
49void gr_gv100_init_gpc_mmu(struct gk20a *g); 49void gr_gv100_init_gpc_mmu(struct gk20a *g);
50u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
50#endif 51#endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 07c71a04..c84778dd 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -358,6 +358,8 @@ static const struct gpu_ops gv100_ops = {
358 .enable_exceptions = gr_gv11b_enable_exceptions, 358 .enable_exceptions = gr_gv11b_enable_exceptions,
359 .get_lrf_tex_ltc_dram_override = get_ecc_override_val, 359 .get_lrf_tex_ltc_dram_override = get_ecc_override_val,
360 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, 360 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
361 .get_hw_accessor_stream_out_mode =
362 gr_gv100_get_hw_accessor_stream_out_mode,
361 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, 363 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
362 .record_sm_error_state = gv11b_gr_record_sm_error_state, 364 .record_sm_error_state = gv11b_gr_record_sm_error_state,
363 .update_sm_error_state = gv11b_gr_update_sm_error_state, 365 .update_sm_error_state = gv11b_gr_update_sm_error_state,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
index cd792835..b7f3df24 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -144,6 +144,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
144{ 144{
145 return 0x0U; 145 return 0x0U;
146} 146}
147static inline u32 ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(void)
148{
149 return 0x2U;
150}
147static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void) 151static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
148{ 152{
149 return 0x7U << 3U; 153 return 0x7U << 3U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
index 6d05e44c..1eb26303 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -356,6 +356,7 @@ struct tegra_vgpu_fecs_trace_filter {
356enum { 356enum {
357 TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0, 357 TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0,
358 TEGRA_VGPU_CTXSW_MODE_CTXSW, 358 TEGRA_VGPU_CTXSW_MODE_CTXSW,
359 TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW,
359}; 360};
360 361
361enum { 362enum {
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 9c7608e9..d530492a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1039,12 +1039,33 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1039 return err; 1039 return err;
1040} 1040}
1041 1041
1042/*
1043 * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_*
1044 * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_*
1045 */
1046
1047static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
1048{
1049 switch (mode){
1050 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
1051 return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
1052 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW:
1053 return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW;
1054 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
1055 return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW;
1056 }
1057
1058 return mode;
1059}
1060
1061
1042static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, 1062static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1043 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) 1063 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
1044{ 1064{
1045 int err; 1065 int err;
1046 struct gk20a *g = dbg_s->g; 1066 struct gk20a *g = dbg_s->g;
1047 struct channel_gk20a *ch_gk20a; 1067 struct channel_gk20a *ch_gk20a;
1068 u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
1048 1069
1049 nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); 1070 nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
1050 1071
@@ -1080,7 +1101,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1080 goto clean_up; 1101 goto clean_up;
1081 } 1102 }
1082 err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, 1103 err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
1083 args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); 1104 mode);
1105
1084 if (err) 1106 if (err)
1085 nvgpu_err(g, 1107 nvgpu_err(g,
1086 "error (%d) during pm ctxsw mode update", err); 1108 "error (%d) during pm ctxsw mode update", err);
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 83d27f17..0077c537 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -1066,7 +1066,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
1066} 1066}
1067 1067
1068int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, 1068int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1069 struct channel_gk20a *ch, u64 gpu_va, bool enable) 1069 struct channel_gk20a *ch, u64 gpu_va, u32 mode)
1070{ 1070{
1071 struct tsg_gk20a *tsg; 1071 struct tsg_gk20a *tsg;
1072 struct nvgpu_gr_ctx *ch_ctx; 1072 struct nvgpu_gr_ctx *ch_ctx;
@@ -1089,16 +1089,33 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1089 ch_ctx = &tsg->gr_ctx; 1089 ch_ctx = &tsg->gr_ctx;
1090 pm_ctx = &ch_ctx->pm_ctx; 1090 pm_ctx = &ch_ctx->pm_ctx;
1091 1091
1092 if (enable) { 1092 if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
1093 /* 1093 /*
1094 * send command to enable HWPM only once - otherwise server 1094 * send command to enable HWPM only once - otherwise server
1095 * will return an error due to using the same GPU VA twice. 1095 * will return an error due to using the same GPU VA twice.
1096 */ 1096 */
1097 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1098 return 0;
1099 1097
1098 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1099 return 0;
1100 }
1100 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; 1101 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
1102 } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1103 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
1104 return 0;
1105 }
1106 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
1107 } else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
1108 (g->ops.gr.get_hw_accessor_stream_out_mode)){
1109 if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
1110 return 0;
1111 }
1112 p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW;
1113 } else {
1114 nvgpu_err(g, "invalid hwpm context switch mode");
1115 return -EINVAL;
1116 }
1101 1117
1118 if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1102 /* Allocate buffer if necessary */ 1119 /* Allocate buffer if necessary */
1103 if (pm_ctx->mem.gpu_va == 0) { 1120 if (pm_ctx->mem.gpu_va == 0) {
1104 pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm, 1121 pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm,
@@ -1109,11 +1126,6 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1109 return -ENOMEM; 1126 return -ENOMEM;
1110 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; 1127 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
1111 } 1128 }
1112 } else {
1113 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
1114 return 0;
1115
1116 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
1117 } 1129 }
1118 1130
1119 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; 1131 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
@@ -1124,10 +1136,15 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1124 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 1136 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
1125 WARN_ON(err || msg.ret); 1137 WARN_ON(err || msg.ret);
1126 err = err ? err : msg.ret; 1138 err = err ? err : msg.ret;
1127 if (!err) 1139 if (!err) {
1128 pm_ctx->pm_mode = enable ? 1140 if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
1129 ctxsw_prog_main_image_pm_mode_ctxsw_f() : 1141 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1130 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); 1142 } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
1143 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1144 } else {
1145 pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
1146 }
1147 }
1131 1148
1132 return err; 1149 return err;
1133} 1150}
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
index c4b3944e..149bd17a 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h
@@ -58,7 +58,7 @@ int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
58int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, 58int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
59 struct channel_gk20a *ch, bool enable); 59 struct channel_gk20a *ch, bool enable);
60int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, 60int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
61 struct channel_gk20a *ch, u64 gpu_va, bool enable); 61 struct channel_gk20a *ch, u64 gpu_va, u32 mode);
62int vgpu_gr_clear_sm_error_state(struct gk20a *g, 62int vgpu_gr_clear_sm_error_state(struct gk20a *g,
63 struct channel_gk20a *ch, u32 sm_id); 63 struct channel_gk20a *ch, u32 sm_id);
64int vgpu_gr_suspend_contexts(struct gk20a *g, 64int vgpu_gr_suspend_contexts(struct gk20a *g,
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index b85ca7b6..446f5bd3 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1286,8 +1286,12 @@ struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args {
1286 1286
1287 1287
1288/* PM Context Switch Mode */ 1288/* PM Context Switch Mode */
1289/*This mode says that the pms are not to be context switched. */
1289#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000) 1290#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
1291/* This mode says that the pms in Mode-B are to be context switched */
1290#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW (0x00000001) 1292#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW (0x00000001)
1293/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
1294#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
1291 1295
1292struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args { 1296struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
1293 __u32 mode; 1297 __u32 mode;