gpu: nvgpu: Add support for FECS ctxsw tracing

bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Anton Vorontsov <avorontsov@nvidia.com> 2015-08-19 17:27:51 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-03-23 10:48:47 -0400
commit: 1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree: 8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a/gk20a.h
parent: 82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8b87c7aa..541e7b50 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -25,6 +25,8 @@ struct channel_gk20a;
 struct gr_gk20a;
 struct sim_gk20a;
 struct gk20a_ctxsw_ucode_segments;
+struct gk20a_fecs_trace;
+struct gk20a_ctxsw_trace;
 struct acr_gm20b;
 #include <linux/sched.h>
@@ -373,6 +375,19 @@ struct gpu_ops {
                bool use_dma_for_fw_bootstrap;
        } gr_ctx;
        struct {
+                int (*init)(struct gk20a *g);
+                int (*max_entries)(struct gk20a *,
+                        struct nvgpu_ctxsw_trace_filter *);
+                int (*flush)(struct gk20a *g);
+                int (*poll)(struct gk20a *g);
+                int (*enable)(struct gk20a *g);
+                int (*disable)(struct gk20a *g);
+                int (*reset)(struct gk20a *g);
+                int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
+                int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
+                int (*deinit)(struct gk20a *g);
+        } fecs_trace;
+        struct {
                bool (*support_sparse)(struct gk20a *g);
                bool (*is_debug_mode_enabled)(struct gk20a *g);
                void (*set_debug_mode)(struct gk20a *g, bool enable);
@@ -613,6 +628,11 @@ struct gk20a {
                struct device *node;
        } tsg;
+        struct {
+                struct cdev cdev;
+                struct device *node;
+        } ctxsw;
        struct mutex client_lock;
        int client_refcount; /* open channels and ctrl nodes */
@@ -639,6 +659,9 @@ struct gk20a {
        struct gk20a_scale_profile *scale_profile;
+        struct gk20a_ctxsw_trace *ctxsw_trace;
+        struct gk20a_fecs_trace *fecs_trace;
        struct device_dma_parameters dma_parms;
        struct gk20a_cde_app cde_app;
@@ -716,6 +739,7 @@ enum gk20a_dbg_categories {
        gpu_dbg_gpu_dbg = BIT(9),  /* gpu debugger/profiler */
        gpu_dbg_cde     = BIT(10), /* cde info messages */
        gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */
+        gpu_dbg_ctxsw   = BIT(12), /* ctxsw tracing */
        gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
@@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
        else
                return (timeout * 10) / scale10x;
 }
+u64 gk20a_read_ptimer(struct gk20a *g);
 #endif /* GK20A_H */
author	Anton Vorontsov <avorontsov@nvidia.com>	2015-08-19 17:27:51 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-03-23 10:48:47 -0400
commit	1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree	8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a/gk20a.h
parent	82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)