From 8d1ab756ed8a7f4d3138dc5da9d2de9f52915261 Mon Sep 17 00:00:00 2001
From: sujeet baranwal <sbaranwal@nvidia.com>
Date: Thu, 19 Feb 2015 10:34:51 -0800
Subject: gpu: nvgpu: ioctl for flushing GPU L2

CUDA devtools need to be able to flush the GPU's cache
in a sideband fashion and so cannot use methods. This
change implements an nvgpu_gpu_ioctl to flush and
optionally invalidate the GPU's L2 cache and flush fb.

Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com>
Reviewed-on: http://git-master/r/671809
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c    | 18 ++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c |  7 ++++---
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      |  8 ++++----
 drivers/gpu/nvgpu/gk20a/regops_gk20a.c  |  2 +-
 4 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 4dee3faf..7b617a03 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
 	return err;
 }
 
+static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
+		struct nvgpu_gpu_l2_fb_args *args)
+{
+	int err = 0;
+
+	if (args->l2_flush)
+		g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
+
+	if (args->fb_flush)
+		g->ops.mm.fb_flush(g);
+
+	return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *dev = filp->private_data;
@@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		err = gk20a_channel_open_ioctl(g,
 			(struct nvgpu_channel_open_args *)buf);
 		break;
+	case NVGPU_GPU_IOCTL_FLUSH_L2:
+		err = nvgpu_gpu_ioctl_l2_fb_ops(g,
+			   (struct nvgpu_gpu_l2_fb_args *)buf);
+		break;
 	default:
 		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 5bee34fc..ffb52549 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 	case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
 		err = dbg_bind_channel_gk20a(dbg_s,
 			     (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
-		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 
 	case NVGPU_DBG_GPU_IOCTL_REG_OPS:
 		err = nvgpu_ioctl_channel_reg_ops(dbg_s,
 			   (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
-		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 
 	case NVGPU_DBG_GPU_IOCTL_POWERGATE:
 		err = nvgpu_ioctl_powergate_gk20a(dbg_s,
 			   (struct nvgpu_dbg_gpu_powergate_args *)buf);
-		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 
 	case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
@@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	}
 
+	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+
 	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
 		err = copy_to_user((void __user *)arg,
 				   buf, _IOC_SIZE(cmd));
@@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 	bool ch_is_curr_ctx;
 	int err = 0, action = args->mode;
 
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
+
 	mutex_lock(&g->dbg_sessions_lock);
 
 	/* Suspend GPU context switching */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3115b5c3..ab3f18ba 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 	ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
 	ctx_header_words >>= 2;
 
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 
 	for (i = 0; i < ctx_header_words; i++) {
 		data = gk20a_mem_rd32(ctx_ptr, i);
@@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 
 	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
 			PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 
 	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
 			PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 		goto cleanup;
 	}
 
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 
 	/* write to appropriate place in context image,
 	 * first have to figure out where that really is */
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
index ceda48b3..b1c25ac9 100644
--- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
 		ok &= !err;
 	}
 
-	gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n",
+	gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
 		   *ctx_wr_count, *ctx_rd_count);
 
 	return ok;
-- 
cgit v1.2.2