diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-20 21:15:04 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:56:54 -0400 |
commit | 325e0587d9180b05d59869679fc06b0ba979d973 (patch) | |
tree | dc7e743440c5e86d72c1a2850b8b93a0cc9d0296 | |
parent | 1b6372ec6b8704d6bacc6b8abf7e599f1cbc15ca (diff) |
gpu: nvgpu: Allow enabling PC sampling
Allow enabling of PC sampling hardware workaround. It is only
applicable to gm20b.
Bug 1517458
Bug 1573150
Change-Id: Iad6a3ae556489fb7ab9628637d291849d2cd98ea
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/710421
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 16 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 33 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 14 |
6 files changed, 73 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index ffb52549..7cda9949 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
377 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | 377 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, |
378 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | 378 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); |
379 | 379 | ||
380 | static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, | ||
381 | struct nvgpu_dbg_gpu_pc_sampling_args *args) | ||
382 | { | ||
383 | struct channel_gk20a *ch = dbg_s->ch; | ||
384 | struct gk20a *g = ch->g; | ||
385 | |||
386 | gk20a_dbg_fn(""); | ||
387 | |||
388 | return g->ops.gr.update_pc_sampling ? | ||
389 | g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; | ||
390 | } | ||
380 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | 391 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, |
381 | unsigned long arg) | 392 | unsigned long arg) |
382 | { | 393 | { |
@@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
449 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | 460 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); |
450 | break; | 461 | break; |
451 | 462 | ||
463 | case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: | ||
464 | err = gk20a_dbg_pc_sampling(dbg_s, | ||
465 | (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); | ||
466 | break; | ||
467 | |||
452 | default: | 468 | default: |
453 | gk20a_err(dev_from_gk20a(g), | 469 | gk20a_err(dev_from_gk20a(g), |
454 | "unrecognized dbg gpu ioctl cmd: 0x%x", | 470 | "unrecognized dbg gpu ioctl cmd: 0x%x", |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index acee2e48..441ea7cc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -165,6 +165,8 @@ struct gpu_ops { | |||
165 | void *ctx_ptr); | 165 | void *ctx_ptr); |
166 | int (*dump_gr_regs)(struct gk20a *g, | 166 | int (*dump_gr_regs)(struct gk20a *g, |
167 | struct gk20a_debug_output *o); | 167 | struct gk20a_debug_output *o); |
168 | int (*update_pc_sampling)(struct channel_gk20a *ch, | ||
169 | bool enable); | ||
168 | } gr; | 170 | } gr; |
169 | const char *name; | 171 | const char *name; |
170 | struct { | 172 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 4b94f863..deafc438 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
500 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 500 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
501 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); | 501 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); |
502 | int gr_gk20a_halt_pipe(struct gk20a *g); | 502 | int gr_gk20a_halt_pipe(struct gk20a *g); |
503 | |||
503 | #endif /*__GR_GK20A_H__*/ | 504 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index da1c1ab0..19340643 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/delay.h> /* for mdelay */ | 17 | #include <linux/delay.h> /* for mdelay */ |
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/tegra-fuse.h> | 19 | #include <linux/tegra-fuse.h> |
20 | #include <linux/vmalloc.h> | ||
20 | 21 | ||
21 | #include "gk20a/gk20a.h" | 22 | #include "gk20a/gk20a.h" |
22 | #include "gk20a/gr_gk20a.h" | 23 | #include "gk20a/gr_gk20a.h" |
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | |||
946 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | 947 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); |
947 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | 948 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", |
948 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | 949 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); |
950 | |||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | ||
955 | bool enable) | ||
956 | { | ||
957 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
958 | void *ctx_ptr = NULL; | ||
959 | u32 v; | ||
960 | |||
961 | gk20a_dbg_fn(""); | ||
962 | |||
963 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) | ||
964 | return -EINVAL; | ||
965 | |||
966 | ctx_ptr = vmap(ch_ctx->gr_ctx->pages, | ||
967 | PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, | ||
968 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
969 | if (!ctx_ptr) | ||
970 | return -ENOMEM; | ||
971 | |||
972 | v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o()); | ||
973 | v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); | ||
974 | v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); | ||
975 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); | ||
976 | |||
977 | vunmap(ctx_ptr); | ||
978 | |||
979 | gk20a_dbg_fn("done"); | ||
980 | |||
949 | return 0; | 981 | return 0; |
950 | } | 982 | } |
951 | 983 | ||
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
993 | gops->gr.update_ctxsw_preemption_mode = | 1025 | gops->gr.update_ctxsw_preemption_mode = |
994 | gr_gm20b_update_ctxsw_preemption_mode; | 1026 | gr_gm20b_update_ctxsw_preemption_mode; |
995 | gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; | 1027 | gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; |
1028 | gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling; | ||
996 | } | 1029 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h index ec44e9fb..cefd91e1 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h | |||
@@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void) | |||
110 | { | 110 | { |
111 | return 0x0; | 111 | return 0x0; |
112 | } | 112 | } |
113 | static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v) | ||
114 | { | ||
115 | return (v & 0x1) << 6; | ||
116 | } | ||
117 | static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void) | ||
118 | { | ||
119 | return 0x1 << 6; | ||
120 | } | ||
113 | static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) | 121 | static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) |
114 | { | 122 | { |
115 | return 0x0000002c; | 123 | return 0x0000002c; |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 3c53ba94..9e3a362f 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args { | |||
457 | #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \ | 457 | #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \ |
458 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args) | 458 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args) |
459 | 459 | ||
460 | /* Enable/disable PC Sampling */ | ||
461 | struct nvgpu_dbg_gpu_pc_sampling_args { | ||
462 | __u32 enable; | ||
463 | __u32 _pad0[1]; | ||
464 | }; | ||
465 | |||
466 | #define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE 0 | ||
467 | #define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE 1 | ||
468 | |||
469 | #define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \ | ||
470 | _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 9, struct nvgpu_dbg_gpu_pc_sampling_args) | ||
471 | |||
460 | #define NVGPU_DBG_GPU_IOCTL_LAST \ | 472 | #define NVGPU_DBG_GPU_IOCTL_LAST \ |
461 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP) | 473 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING) |
462 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ | 474 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ |
463 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) | 475 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) |
464 | 476 | ||