From 1c40d09c4c9c011c1318c328c0b4b6b17d1f537e Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 19 Aug 2015 14:27:51 -0700 Subject: gpu: nvgpu: Add support for FECS ctxsw tracing bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.h') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8b87c7aa..541e7b50 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -25,6 +25,8 @@ struct channel_gk20a; struct gr_gk20a; struct sim_gk20a; struct gk20a_ctxsw_ucode_segments; +struct gk20a_fecs_trace; +struct gk20a_ctxsw_trace; struct acr_gm20b; #include @@ -372,6 +374,19 @@ struct gpu_ops { bool (*is_fw_defined)(void); bool use_dma_for_fw_bootstrap; } gr_ctx; + struct { + int (*init)(struct gk20a *g); + int (*max_entries)(struct gk20a *, + struct nvgpu_ctxsw_trace_filter *); + int (*flush)(struct gk20a *g); + int (*poll)(struct gk20a *g); + int (*enable)(struct gk20a *g); + int (*disable)(struct gk20a *g); + int (*reset)(struct gk20a *g); + int (*bind_channel)(struct gk20a *, struct channel_gk20a *); + int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); + int (*deinit)(struct gk20a *g); + } fecs_trace; struct { bool (*support_sparse)(struct gk20a *g); bool (*is_debug_mode_enabled)(struct gk20a *g); @@ -613,6 +628,11 @@ struct gk20a { struct device *node; } tsg; + struct { + struct cdev cdev; + struct device *node; + } ctxsw; + struct mutex client_lock; int client_refcount; /* open channels and ctrl nodes */ @@ -639,6 +659,9 @@ struct gk20a { struct gk20a_scale_profile *scale_profile; + struct gk20a_ctxsw_trace *ctxsw_trace; + struct gk20a_fecs_trace *fecs_trace; + struct device_dma_parameters dma_parms; struct gk20a_cde_app cde_app; @@ -716,6 +739,7 @@ enum gk20a_dbg_categories { gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ gpu_dbg_cde = BIT(10), /* cde info messages */ gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ + gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ }; @@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x) else return (timeout * 10) / scale10x; } + +u64 gk20a_read_ptimer(struct gk20a *g); #endif /* GK20A_H */ -- cgit v1.2.2