gpu: nvgpu: Allow enabling PC sampling

Allow enabling of PC sampling hardware workaround. It is only applicable to gm20b. Bug 1517458 Bug 1573150 Change-Id: Iad6a3ae556489fb7ab9628637d291849d2cd98ea Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/710421
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-02-20 21:15:04 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 21:56:54 -0400
commit: 325e0587d9180b05d59869679fc06b0ba979d973 (patch)
tree: dc7e743440c5e86d72c1a2850b8b93a0cc9d0296 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent: 1b6372ec6b8704d6bacc6b8abf7e599f1cbc15ca (diff)
1 files changed, 33 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index da1c1ab0..19340643 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>        /* for mdelay */
 #include <linux/io.h>
 #include <linux/tegra-fuse.h>
+#include <linux/vmalloc.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
                gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
                gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
+        return 0;
+}
+static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
+                                       bool enable)
+{
+        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        void *ctx_ptr = NULL;
+        u32 v;
+        gk20a_dbg_fn("");
+        if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
+                return -EINVAL;
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
+                        0, pgprot_writecombine(PAGE_KERNEL));
+        if (!ctx_ptr)
+                return -ENOMEM;
+        v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
+        v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
+        v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
+        gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
+        vunmap(ctx_ptr);
+        gk20a_dbg_fn("done");
        return 0;
 }
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.update_ctxsw_preemption_mode =
                gr_gm20b_update_ctxsw_preemption_mode;
        gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
+        gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
 }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-02-20 21:15:04 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 21:56:54 -0400
commit	325e0587d9180b05d59869679fc06b0ba979d973 (patch)
tree	dc7e743440c5e86d72c1a2850b8b93a0cc9d0296 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent	1b6372ec6b8704d6bacc6b8abf7e599f1cbc15ca (diff)

diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index da1c1ab0..19340643 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -17,6 +17,7 @@
17	#include <linux/delay.h> /* for mdelay */	17	#include <linux/delay.h> /* for mdelay */
18	#include <linux/io.h>	18	#include <linux/io.h>
19	#include <linux/tegra-fuse.h>	19	#include <linux/tegra-fuse.h>
		20	#include <linux/vmalloc.h>
20		21
21	#include "gk20a/gk20a.h"	22	#include "gk20a/gk20a.h"
22	#include "gk20a/gr_gk20a.h"	23	#include "gk20a/gr_gk20a.h"
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
946	gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));	947	gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
947	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",	948	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
948	gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));	949	gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
		950
		951	return 0;
		952	}
		953
		954	static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
		955	bool enable)
		956	{
		957	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
		958	void *ctx_ptr = NULL;
		959	u32 v;
		960
		961	gk20a_dbg_fn("");
		962
		963	if (!ch_ctx \|\| !ch_ctx->gr_ctx \|\| c->vpr)
		964	return -EINVAL;
		965
		966	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
		967	PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
		968	0, pgprot_writecombine(PAGE_KERNEL));
		969	if (!ctx_ptr)
		970	return -ENOMEM;
		971
		972	v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
		973	v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
		974	v \|= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
		975	gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
		976
		977	vunmap(ctx_ptr);
		978
		979	gk20a_dbg_fn("done");
		980
949	return 0;	981	return 0;
950	}	982	}
951		983
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
993	gops->gr.update_ctxsw_preemption_mode =	1025	gops->gr.update_ctxsw_preemption_mode =
994	gr_gm20b_update_ctxsw_preemption_mode;	1026	gr_gm20b_update_ctxsw_preemption_mode;
995	gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;	1027	gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
		1028	gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
996	}	1029	}