summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-02-19 13:34:51 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:08:16 -0400
commit8d1ab756ed8a7f4d3138dc5da9d2de9f52915261 (patch)
tree969a5a5aead991570d8c8c56acd41adb2103b8f5
parentac205be1d31b00c5641df81d53f2da5f143d3354 (diff)
gpu: nvgpu: ioctl for flushing GPU L2
CUDA devtools need to be able to flush the GPU's cache in a sideband fashion and so cannot use methods. This change implements an nvgpu_gpu_ioctl to flush and optionally invalidate the GPU's L2 cache and flush fb. Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com> Reviewed-on: http://git-master/r/671809 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.c2
-rw-r--r--include/uapi/linux/nvgpu.h13
5 files changed, 38 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 4dee3faf..7b617a03 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
243 return err; 243 return err;
244} 244}
245 245
246static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
247 struct nvgpu_gpu_l2_fb_args *args)
248{
249 int err = 0;
250
251 if (args->l2_flush)
252 g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
253
254 if (args->fb_flush)
255 g->ops.mm.fb_flush(g);
256
257 return err;
258}
259
246long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 260long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
247{ 261{
248 struct platform_device *dev = filp->private_data; 262 struct platform_device *dev = filp->private_data;
@@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
423 err = gk20a_channel_open_ioctl(g, 437 err = gk20a_channel_open_ioctl(g,
424 (struct nvgpu_channel_open_args *)buf); 438 (struct nvgpu_channel_open_args *)buf);
425 break; 439 break;
440 case NVGPU_GPU_IOCTL_FLUSH_L2:
441 err = nvgpu_gpu_ioctl_l2_fb_ops(g,
442 (struct nvgpu_gpu_l2_fb_args *)buf);
443 break;
426 default: 444 default:
427 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 445 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
428 err = -ENOTTY; 446 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 5bee34fc..ffb52549 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
412 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: 412 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
413 err = dbg_bind_channel_gk20a(dbg_s, 413 err = dbg_bind_channel_gk20a(dbg_s,
414 (struct nvgpu_dbg_gpu_bind_channel_args *)buf); 414 (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
415 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
416 break; 415 break;
417 416
418 case NVGPU_DBG_GPU_IOCTL_REG_OPS: 417 case NVGPU_DBG_GPU_IOCTL_REG_OPS:
419 err = nvgpu_ioctl_channel_reg_ops(dbg_s, 418 err = nvgpu_ioctl_channel_reg_ops(dbg_s,
420 (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); 419 (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
421 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
422 break; 420 break;
423 421
424 case NVGPU_DBG_GPU_IOCTL_POWERGATE: 422 case NVGPU_DBG_GPU_IOCTL_POWERGATE:
425 err = nvgpu_ioctl_powergate_gk20a(dbg_s, 423 err = nvgpu_ioctl_powergate_gk20a(dbg_s,
426 (struct nvgpu_dbg_gpu_powergate_args *)buf); 424 (struct nvgpu_dbg_gpu_powergate_args *)buf);
427 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
428 break; 425 break;
429 426
430 case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: 427 case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
@@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
460 break; 457 break;
461 } 458 }
462 459
460 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
461
463 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 462 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
464 err = copy_to_user((void __user *)arg, 463 err = copy_to_user((void __user *)arg,
465 buf, _IOC_SIZE(cmd)); 464 buf, _IOC_SIZE(cmd));
@@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
741 bool ch_is_curr_ctx; 740 bool ch_is_curr_ctx;
742 int err = 0, action = args->mode; 741 int err = 0, action = args->mode;
743 742
743 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
744
744 mutex_lock(&g->dbg_sessions_lock); 745 mutex_lock(&g->dbg_sessions_lock);
745 746
746 /* Suspend GPU context switching */ 747 /* Suspend GPU context switching */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3115b5c3..ab3f18ba 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1502 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1502 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1503 ctx_header_words >>= 2; 1503 ctx_header_words >>= 2;
1504 1504
1505 gk20a_mm_l2_flush(g, true); 1505 g->ops.mm.l2_flush(g, true);
1506 1506
1507 for (i = 0; i < ctx_header_words; i++) { 1507 for (i = 0; i < ctx_header_words; i++) {
1508 data = gk20a_mem_rd32(ctx_ptr, i); 1508 data = gk20a_mem_rd32(ctx_ptr, i);
@@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1565 1565
1566 /* Channel gr_ctx buffer is gpu cacheable. 1566 /* Channel gr_ctx buffer is gpu cacheable.
1567 Flush and invalidate before cpu update. */ 1567 Flush and invalidate before cpu update. */
1568 gk20a_mm_l2_flush(g, true); 1568 g->ops.mm.l2_flush(g, true);
1569 1569
1570 ctx_ptr = vmap(ch_ctx->gr_ctx->pages, 1570 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
1571 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 1571 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1605 1605
1606 /* Channel gr_ctx buffer is gpu cacheable. 1606 /* Channel gr_ctx buffer is gpu cacheable.
1607 Flush and invalidate before cpu update. */ 1607 Flush and invalidate before cpu update. */
1608 gk20a_mm_l2_flush(g, true); 1608 g->ops.mm.l2_flush(g, true);
1609 1609
1610 ctx_ptr = vmap(ch_ctx->gr_ctx->pages, 1610 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
1611 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 1611 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7003 goto cleanup; 7003 goto cleanup;
7004 } 7004 }
7005 7005
7006 gk20a_mm_l2_flush(g, true); 7006 g->ops.mm.l2_flush(g, true);
7007 7007
7008 /* write to appropriate place in context image, 7008 /* write to appropriate place in context image,
7009 * first have to figure out where that really is */ 7009 * first have to figure out where that really is */
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
index ceda48b3..b1c25ac9 100644
--- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
697 ok &= !err; 697 ok &= !err;
698 } 698 }
699 699
700 gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n", 700 gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
701 *ctx_wr_count, *ctx_rd_count); 701 *ctx_wr_count, *ctx_rd_count);
702 702
703 return ok; 703 return ok;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index ebeacf9b..3c53ba94 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -242,6 +242,14 @@ struct nvgpu_gpu_open_channel_args {
242 __s32 channel_fd; 242 __s32 channel_fd;
243}; 243};
244 244
245/* L2 cache writeback, optionally invalidate clean lines and flush fb */
246struct nvgpu_gpu_l2_fb_args {
247 __u32 l2_flush:1;
248 __u32 l2_invalidate:1;
249 __u32 fb_flush:1;
250 __u32 reserved;
251} __packed;
252
245#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 253#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
246 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 254 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
247#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 255#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -264,13 +272,14 @@ struct nvgpu_gpu_open_channel_args {
264 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args) 272 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
265#define NVGPU_GPU_IOCTL_OPEN_CHANNEL \ 273#define NVGPU_GPU_IOCTL_OPEN_CHANNEL \
266 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args) 274 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
275#define NVGPU_GPU_IOCTL_FLUSH_L2 \
276 _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
267 277
268#define NVGPU_GPU_IOCTL_LAST \ 278#define NVGPU_GPU_IOCTL_LAST \
269 _IOC_NR(NVGPU_GPU_IOCTL_OPEN_CHANNEL) 279 _IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2)
270#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 280#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
271 sizeof(struct nvgpu_gpu_prepare_compressible_read_args) 281 sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
272 282
273
274/* 283/*
275 * /dev/nvhost-tsg-gpu device 284 * /dev/nvhost-tsg-gpu device
276 * 285 *