From 8d1ab756ed8a7f4d3138dc5da9d2de9f52915261 Mon Sep 17 00:00:00 2001 From: sujeet baranwal Date: Thu, 19 Feb 2015 10:34:51 -0800 Subject: gpu: nvgpu: ioctl for flushing GPU L2 CUDA devtools need to be able to flush the GPU's cache in a sideband fashion and so cannot use methods. This change implements an nvgpu_gpu_ioctl to flush and optionally invalidate the GPU's L2 cache and flush fb. Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988 Signed-off-by: sujeet baranwal Signed-off-by: Mayank Kaushik Reviewed-on: http://git-master/r/671809 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 18 ++++++++++++++++++ drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 7 ++++--- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 ++++---- drivers/gpu/nvgpu/gk20a/regops_gk20a.c | 2 +- 4 files changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 4dee3faf..7b617a03 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, return err; } +static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, + struct nvgpu_gpu_l2_fb_args *args) +{ + int err = 0; + + if (args->l2_flush) + g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); + + if (args->fb_flush) + g->ops.mm.fb_flush(g); + + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = gk20a_channel_open_ioctl(g, (struct nvgpu_channel_open_args *)buf); break; + case NVGPU_GPU_IOCTL_FLUSH_L2: + err = nvgpu_gpu_ioctl_l2_fb_ops(g, + (struct nvgpu_gpu_l2_fb_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 5bee34fc..ffb52549 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: err = dbg_bind_channel_gk20a(dbg_s, (struct nvgpu_dbg_gpu_bind_channel_args *)buf); - gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); break; case NVGPU_DBG_GPU_IOCTL_REG_OPS: err = nvgpu_ioctl_channel_reg_ops(dbg_s, (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); - gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); break; case NVGPU_DBG_GPU_IOCTL_POWERGATE: err = nvgpu_ioctl_powergate_gk20a(dbg_s, (struct nvgpu_dbg_gpu_powergate_args *)buf); - gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); break; case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: @@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, break; } + gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); @@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( bool ch_is_curr_ctx; int err = 0, action = args->mode; + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); + mutex_lock(&g->dbg_sessions_lock); /* Suspend GPU context switching */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3115b5c3..ab3f18ba 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); ctx_header_words >>= 2; - gk20a_mm_l2_flush(g, true); + g->ops.mm.l2_flush(g, true); for (i = 0; i < ctx_header_words; i++) { data = gk20a_mem_rd32(ctx_ptr, i); @@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - gk20a_mm_l2_flush(g, true); + g->ops.mm.l2_flush(g, true); ctx_ptr = vmap(ch_ctx->gr_ctx->pages, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, @@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - gk20a_mm_l2_flush(g, true); + g->ops.mm.l2_flush(g, true); ctx_ptr = vmap(ch_ctx->gr_ctx->pages, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, @@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, goto cleanup; } - gk20a_mm_l2_flush(g, true); + g->ops.mm.l2_flush(g, true); /* write to appropriate place in context image, * first have to figure out where that really is */ diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c index ceda48b3..b1c25ac9 100644 --- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c @@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s, ok &= !err; } - gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n", + gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", *ctx_wr_count, *ctx_rd_count); return ok; -- cgit v1.2.2